[CI][MPS] Fix compile benchmark correctness (#159731)

By passing `fullgraph=True` attribute and increasing cache size limit to 2**16

Otherwise, compiler might decide not to fall back to eager to avoid recompilations
Pull Request resolved: https://github.com/pytorch/pytorch/pull/159731
Approved by: https://github.com/dcci
This commit is contained in:
Nikita Shulga
2025-08-03 12:23:21 -07:00
committed by PyTorch MergeBot
parent e136a9175b
commit fecdebe385

View File

@ -90,7 +90,7 @@ def bench_reduction(
return reduction_func(t, dim=0)
f.__name__ = reduction_func.__name__
f_c = torch.compile(f, dynamic=False)
f_c = torch.compile(f, dynamic=False, fullgraph=True)
for size in (512, 1024, 2048, 4096):
x = torch.testing.make_tensor(size, size, device=device, dtype=dtype)
@ -116,7 +116,7 @@ def bench_scan(
def f(t):
return scan_func(t, dim=dim)
f_c = torch.compile(f, dynamic=False)
f_c = torch.compile(f, dynamic=False, fullgraph=True)
for size in (32, 128, 512, 1024):
f.__name__ = f"{scan_func.__name__}-dim{dim}-{size}x{size}"
@ -135,7 +135,7 @@ def bench_scan(
def f_1d(t):
return scan_func(t, dim=0)
f_1d_c = torch.compile(f_1d, dynamic=False)
f_1d_c = torch.compile(f_1d, dynamic=False, fullgraph=True)
for size in (100, 10000, 1000000):
f_1d.__name__ = f"{scan_func.__name__}-1d-{size}"
@ -204,4 +204,5 @@ def main() -> None:
if __name__ == "__main__":
torch._dynamo.config.cache_size_limit = 2**16
main()