From c917c63282c467ef942c99da3ce4fa57bceba603 Mon Sep 17 00:00:00 2001
From: "Nichols A. Romero" <nick.romero@amd.com>
Date: Tue, 22 Jul 2025 19:45:35 +0000
Subject: [PATCH] [ROCm][tunableop] UT tolerance increase for
 matmul_small_brute_force_tunableop at FP16 (#158788)

TunableOp will sometimes find a less precise solution due to the small input vectors used in this UT. Bumping op tolerance to eliminate flakiness.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/158788
Approved by: https://github.com/jeffdaily
---
 test/test_linalg.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/test_linalg.py b/test/test_linalg.py
index 8712d65bb493..f49db43b4ff2 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -4762,6 +4762,7 @@ class TestLinalg(TestCase):
     @onlyCUDA
     @skipCUDAIfNotRocm  # Skipping due to SM89 OOM in CI, UT doesn't do much on NV anyways
     @dtypes(*floating_types_and(torch.half))
+    @precisionOverride({torch.float16: 1e-1})  # TunableOp may occasionally find less precise solution
     def test_matmul_small_brute_force_tunableop(self, device, dtype):
         # disable tunableop buffer rotation for all tests everywhere, it can be slow
         # We set the TunableOp numerical check environment variable here because it is