Tentative fix for CUDA-10.2 windows build failures (#76204)

Summary:
`C10_UNUSED` somehow triggers segfault in some versions of NVCC that looks like something as follows:
```
caffe2\caffe2\operators\piecewise_linear_transform_op.h(65): internal error: assertion failed: gen_variable_decl: declared_type is NULL (cp_gen_be.c, line 22209 in gen_variable_decl)

1 catastrophic error detected in the compilation of "caffe2/caffe2/operators/piecewise_linear_transform_op.cu".
Compilation aborted.
nvcc error   : 'cudafe++' died with status 0xC0000409

```
Fixes regression introduced by  https://github.com/pytorch/pytorch/pull/75538 / D35747333 (f6c275f55d)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/76204

Test Plan: CI

Reviewed By: EscapeZero, atalman

Differential Revision: D35831451

fbshipit-source-id: f744d4688c9fd324f8f54b27781a3def97778d1e
(cherry picked from commit fd64655aa4b6890d205273ec76b416ee3b524783)
This commit is contained in:
Nikita Shulga
2022-04-22 08:08:20 -07:00
committed by PyTorch MergeBot
parent 4b311a9633
commit ecd5567980
2 changed files with 6 additions and 3 deletions

View File

@ -155,7 +155,8 @@ bool DeformConvOp<T, Context>::RunOnDeviceWithOrderNCHW() {
col_buffer->Resize(buffer_shape);
T* col_buffer_data = col_buffer->template mutable_data<T>();
// Im2col, followed by gemm.
for (C10_UNUSED const auto image_id : c10::irange(N)) {
for (const auto image_id : c10::irange(N)) {
(void)image_id; // CUDA-10.2 on Windows crashes when C10_UNUSED macro is used
for (const auto group_id : c10::irange(group_)) {
DeformableIm2col(
Xdata + group_id * input_offset,
@ -342,7 +343,8 @@ bool DeformConvGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
math::Set<T, Context>(dX->numel(), 0, dXdata, &context_);
}
for (C10_UNUSED const auto image_id : c10::irange(N)) {
for (const auto image_id : c10::irange(N)) {
(void)image_id; // CUDA-10.2 on Windows crashes when C10_UNUSED macro is used
for (const auto group_id : c10::irange(group_)) {
math::Gemm<T, Context>(
CblasTrans,

View File

@ -62,7 +62,8 @@ class PiecewiseLinearTransformOp final : public Operator<Context> {
const int64_t num_bounds_per_group,
const int64_t num_group) {
const T* start = bounds;
for (C10_UNUSED const auto i : c10::irange(num_group)) {
for (const auto i : c10::irange(num_group)) {
(void)i; // CUDA-10.2 on Windows crashes when C10_UNUSED macro is used
if (!std::is_sorted(start, start + num_bounds_per_group)) {
return false;
}