mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Add fp16-overflow regression test (#162401)
Discovered while debugging https://github.com/pytorch/pytorch/issues/160841 where sdpa returned NaNs, because during the computation intermediate values were cast back to fp16 before normalization, which was fixed by https://github.com/pytorch/pytorch/pull/161999 ) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162401 Approved by: https://github.com/Skylion007, https://github.com/drisspg
This commit is contained in:
committed by
PyTorch MergeBot
parent
26a1b9cce2
commit
015423bef8
@ -2070,6 +2070,11 @@ class TestSDPA(NNTestCase):
|
||||
sdp_math = torch.nn.functional.scaled_dot_product_attention(x, x, x, scale=-1.0 / 0.0001)
|
||||
self.assertEqual(ref_result, sdp_math)
|
||||
|
||||
def test_scaled_dot_product_attention_fp16_overflow(self, device):
|
||||
# Regression test for https://github.com/pytorch/pytorch/issues/160841
|
||||
x = torch.full((1, 32, 23, 80), 64.0, dtype=torch.half, device=device)
|
||||
y = torch.nn.functional.scaled_dot_product_attention(x, x, x)
|
||||
self.assertFalse(y.isnan().any().item())
|
||||
|
||||
class TestSDPACpuOnly(NNTestCase):
|
||||
""" Used to test CPU only functionality of scaled_dot_product_attention """
|
||||
|
Reference in New Issue
Block a user