From c917c63282c467ef942c99da3ce4fa57bceba603 Mon Sep 17 00:00:00 2001 From: "Nichols A. Romero" Date: Tue, 22 Jul 2025 19:45:35 +0000 Subject: [PATCH] [ROCm][tunableop] UT tolerance increase for matmul_small_brute_force_tunableop at FP16 (#158788) TunableOp will sometimes find a less precise solution due to the small input vectors used in this UT. Bumping op tolerance to eliminate flakiness. Pull Request resolved: https://github.com/pytorch/pytorch/pull/158788 Approved by: https://github.com/jeffdaily --- test/test_linalg.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_linalg.py b/test/test_linalg.py index 8712d65bb493..f49db43b4ff2 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -4762,6 +4762,7 @@ class TestLinalg(TestCase): @onlyCUDA @skipCUDAIfNotRocm # Skipping due to SM89 OOM in CI, UT doesn't do much on NV anyways @dtypes(*floating_types_and(torch.half)) + @precisionOverride({torch.float16: 1e-1}) # TunableOp may occasionally find less precise solution def test_matmul_small_brute_force_tunableop(self, device, dtype): # disable tunableop buffer rotation for all tests everywhere, it can be slow # We set the TunableOp numerical check environment variable here because it is