[ci] disable cudagraph for tts_angular on dashboard (#148221)

tts_angular with cudagraph is flaky. Its speedup varies from .05 to 1.01. This PR disables cudagraph for tts_angular to avoid the noise. Since tts_angular shows ~1x speedup while other torchbench models show ~2x speedup, skipping tts_angular would wrongly bump the cudagraph speedup. So this PR only disables cudagraph for tts_angular instead of skipping tts_angular. [Dashboard ](https://github.com/pytorch/pytorch/actions/runs/13597394087) Pull Request resolved: https://github.com/pytorch/pytorch/pull/148221 Approved by: https://github.com/eellison
2025-11-11 22:34:53 +08:00 · 2025-03-02 03:31:19 +00:00
parent de7af81f18
commit 6e10471966
3 changed files with 17 additions and 0 deletions
--- a/benchmarks/dynamo/common.py
+++ b/benchmarks/dynamo/common.py
@ -1824,6 +1824,10 @@ class BenchmarkRunner:
    def skip_models_due_to_control_flow(self):
        return set()

+    @property
+    def disable_cudagraph_models(self):
+        return set()
+
    @property
    def guard_on_nn_module_models(self):
        return set()
@ -3834,6 +3838,9 @@ def run(runner, args, original_dir=None):
        experiment = coverage_experiment
        output_filename = "coverage.csv"

+    if args.only in runner.disable_cudagraph_models:
+        args.disable_cudagraphs = True
+
    if args.inductor or args.backend == "inductor" or args.export_aot_inductor:
        inductor_config.triton.cudagraphs = not args.disable_cudagraphs
        inductor_config.triton.persistent_reductions = (
--- a/benchmarks/dynamo/torchbench.py
+++ b/benchmarks/dynamo/torchbench.py
@ -145,6 +145,10 @@ class TorchBenchmarkRunner(BenchmarkRunner):
    def skip_models_for_freezing_cuda(self):
        return self._skip["freezing"]["cuda"]

+    @property
+    def disable_cudagraph_models(self):
+        return self._config["disable_cudagraph"]
+
    @property
    def skip_models_for_freezing_cpu(self):
        return self._skip["freezing"]["cpu"]
--- a/benchmarks/dynamo/torchbench.yaml
+++ b/benchmarks/dynamo/torchbench.yaml
@ -111,6 +111,12 @@ non_deterministic:
  - sam_fast


+disable_cudagraph:
+  # tts_angular is flaky with cudagraphs. Its speedup
+  # oscillates from .05 to 1.05
+  - tts_angular
+
+
 dtype:
  force_amp_for_fp16_bf16_models:
    - DALLE2_pytorch