[CUDA][cuBLAS][FP8] Forward-fix #162022 (#163354)

@ngimel is right, `ciflow/h100` doesn't actually appear to test the PR :(

Pull Request resolved: https://github.com/pytorch/pytorch/pull/163354
Approved by: https://github.com/ngimel, https://github.com/Skylion007
This commit is contained in:
eqy
2025-09-21 00:55:09 +00:00
committed by PyTorch MergeBot
parent 8e3fd3d4f9
commit e37b600007

View File

@ -1550,7 +1550,7 @@ class TestFP8Matmul(TestCase):
# only cuBLAS supports rowwise with fp32 output and cuBLAS only supports
# rowwise on SM 9.0
if torch.cuda.get_device_capability != (9, 0) and output_dtype == torch.float:
if torch.cuda.get_device_capability() != (9, 0) and output_dtype == torch.float:
with self.assertRaisesRegex(
RuntimeError,
"Only bf16 high precision output types are supported for row-wise scaling."