[BUG] Fix nonzero_static crash on CUDA when the input is a empty tensor (#162578)

Fixes #162473 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162578 Approved by: https://github.com/ngimel
2025-10-20 12:54:11 +08:00 · 2025-09-15 05:44:15 +00:00
parent 755cf90672
commit b3ad8f4a9c
2 changed files with 20 additions and 0 deletions
--- a/aten/src/ATen/native/cuda/Nonzero.cu
+++ b/aten/src/ATen/native/cuda/Nonzero.cu
@ -317,6 +317,17 @@ void nonzero_static_cuda_out_impl(
    out_temp =
        Tensor(at::detail::empty_cuda({self.dim(), size}, out.options())).t();
  }
+  // If input has zero elements, avoid kernel grid calculations (which can
+  // produce zero divisors) and just fill the output with fill_value.
+  if (self.numel() == 0) {
+    if (need_to_copy) {
+      out_temp.fill_(fill_value);
+      out.copy_(out_temp);
+    } else {
+      out.fill_(fill_value);
+    }
+    return;
+  }
  int64_t* out_data_ptr = need_to_copy ? out_temp.mutable_data_ptr<int64_t>()
                                       : out.mutable_data_ptr<int64_t>();

--- a/test/test_unary_ufuncs.py
+++ b/test/test_unary_ufuncs.py
@ -1654,6 +1654,15 @@ class TestUnaryUfuncs(TestCase):
            ),
        )

+        # empty input
+        # https://github.com/pytorch/pytorch/issues/162473
+        input_tensor = torch.tensor([], device=device)
+        static_size = 1
+        self.assertEqual(
+            torch.nonzero_static(input_tensor, size=static_size),
+            torch.tensor([[-1]], device=device),
+        )
+
        # 1D input
        input_tensor = torch.tensor([0, 8], device=device)
        static_size = 1