Add fp16-overflow regression test (#162401)

Discovered while debugging https://github.com/pytorch/pytorch/issues/160841 where sdpa returned NaNs, because during the computation intermediate values were cast back to fp16 before normalization, which was fixed by https://github.com/pytorch/pytorch/pull/161999 )
Pull Request resolved: https://github.com/pytorch/pytorch/pull/162401
Approved by: https://github.com/Skylion007, https://github.com/drisspg
This commit is contained in:
Nikita Shulga
2025-09-08 10:44:04 -07:00
committed by PyTorch MergeBot
parent 26a1b9cce2
commit 015423bef8

View File

@ -2070,6 +2070,11 @@ class TestSDPA(NNTestCase):
sdp_math = torch.nn.functional.scaled_dot_product_attention(x, x, x, scale=-1.0 / 0.0001)
self.assertEqual(ref_result, sdp_math)
def test_scaled_dot_product_attention_fp16_overflow(self, device):
# Regression test for https://github.com/pytorch/pytorch/issues/160841
x = torch.full((1, 32, 23, 80), 64.0, dtype=torch.half, device=device)
y = torch.nn.functional.scaled_dot_product_attention(x, x, x)
self.assertFalse(y.isnan().any().item())
class TestSDPACpuOnly(NNTestCase):
""" Used to test CPU only functionality of scaled_dot_product_attention """