[CI][MPS] Fix compile benchmark correctness (#159731)

By passing `fullgraph=True` attribute and increasing cache size limit to 2**16 Otherwise, compiler might decide not to fall back to eager to avoid recompilations Pull Request resolved: https://github.com/pytorch/pytorch/pull/159731 Approved by: https://github.com/dcci
2025-10-20 21:14:14 +08:00 · 2025-08-03 12:23:21 -07:00
parent e136a9175b
commit fecdebe385
1 changed files with 4 additions and 3 deletions
--- a/test/bench_mps_ops.py
+++ b/test/bench_mps_ops.py
@ -90,7 +90,7 @@ def bench_reduction(
        return reduction_func(t, dim=0)

    f.__name__ = reduction_func.__name__
-    f_c = torch.compile(f, dynamic=False)
+    f_c = torch.compile(f, dynamic=False, fullgraph=True)

    for size in (512, 1024, 2048, 4096):
        x = torch.testing.make_tensor(size, size, device=device, dtype=dtype)
@ -116,7 +116,7 @@ def bench_scan(
        def f(t):
            return scan_func(t, dim=dim)

-        f_c = torch.compile(f, dynamic=False)
+        f_c = torch.compile(f, dynamic=False, fullgraph=True)

        for size in (32, 128, 512, 1024):
            f.__name__ = f"{scan_func.__name__}-dim{dim}-{size}x{size}"
@ -135,7 +135,7 @@ def bench_scan(
    def f_1d(t):
        return scan_func(t, dim=0)

-    f_1d_c = torch.compile(f_1d, dynamic=False)
+    f_1d_c = torch.compile(f_1d, dynamic=False, fullgraph=True)

    for size in (100, 10000, 1000000):
        f_1d.__name__ = f"{scan_func.__name__}-1d-{size}"
@ -204,4 +204,5 @@ def main() -> None:


 if __name__ == "__main__":
+    torch._dynamo.config.cache_size_limit = 2**16
    main()