Add fp16-overflow regression test (#162401)

Discovered while debugging https://github.com/pytorch/pytorch/issues/160841 where sdpa returned NaNs, because during the computation intermediate values were cast back to fp16 before normalization, which was fixed by https://github.com/pytorch/pytorch/pull/161999 ) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162401 Approved by: https://github.com/Skylion007, https://github.com/drisspg
2025-10-20 21:14:14 +08:00 · 2025-09-08 10:44:04 -07:00
parent 26a1b9cce2
commit 015423bef8
1 changed files with 5 additions and 0 deletions
--- a/test/test_transformers.py
+++ b/test/test_transformers.py
@ -2070,6 +2070,11 @@ class TestSDPA(NNTestCase):
            sdp_math = torch.nn.functional.scaled_dot_product_attention(x, x, x, scale=-1.0 / 0.0001)
        self.assertEqual(ref_result, sdp_math)

+    def test_scaled_dot_product_attention_fp16_overflow(self, device):
+        # Regression test for https://github.com/pytorch/pytorch/issues/160841
+        x = torch.full((1, 32, 23, 80), 64.0, dtype=torch.half, device=device)
+        y = torch.nn.functional.scaled_dot_product_attention(x, x, x)
+        self.assertFalse(y.isnan().any().item())

 class TestSDPACpuOnly(NNTestCase):
    """ Used to test CPU only functionality of scaled_dot_product_attention """