Add runtime_overhead PR Time Benchmark (#163866)

This adds a PR time benchmark that checks for runtime overhead on a very small graph. This will help track regressions in runtime overhead. Example Results: ``` runtime_overhead_inductor,instruction_count,222645 runtime_overhead_inductor_inference_mode,instruction_count,234998 runtime_overhead_inductor_requires_grad,instruction_count,293556 runtime_overhead_inductor_requires_grad_backward,instruction_count,78181 runtime_overhead_inductor_dynamic,instruction_count,234870 runtime_overhead_inductor_inference_mode_dynamic,instruction_count,248711 runtime_overhead_inductor_requires_grad_dynamic,instruction_count,309979 runtime_overhead_inductor_requires_grad_backward_dynamic,instruction_count,77599 ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/163866 Approved by: https://github.com/jansel, https://github.com/mlazos, https://github.com/anijain2305
2025-10-20 21:14:14 +08:00 · 2025-09-27 03:26:55 +00:00
parent 9dac6437da
commit 254d2864d6
2 changed files with 112 additions and 1 deletions
--- a/benchmarks/dynamo/pr_time_benchmarks/README.md
+++ b/benchmarks/dynamo/pr_time_benchmarks/README.md
@ -6,4 +6,4 @@
 4. (Optional) flip a flag that you know will change the benchmark and run again with b.txt `PYTHONPATH=./ python benchmarks/[YOUR_BENCHMARK].py a.txt`
 5. Compare `a.txt` and `b.txt` located within the `benchmarks/dynamo/pr_time_benchmarks` folder to make sure things look as you expect
 6. Check in your new benchmark file and submit a new PR
-7. In a few days, if your benchmark is stable, bug Laith Sakka to enable running your benchmark on all PRs. If your a meta employee, you can find the dashboard here: internalfb.com/intern/unidash/dashboard/pt2_diff_time_metrics
+7. In a few days, if your benchmark is stable, bug Laith Sakka to enable running your benchmark on all PRs. If you are a meta employee, you can find the dashboard here: https://internalfb.com/intern/unidash/dashboard/pt2_diff_time_metrics
--- a/benchmarks/dynamo/pr_time_benchmarks/benchmarks/runtime_overhead.py
+++ b/benchmarks/dynamo/pr_time_benchmarks/benchmarks/runtime_overhead.py
@ -0,0 +1,111 @@
+import sys
+
+from benchmark_base import BenchmarkBase
+
+import torch
+from torch.autograd.grad_mode import inference_mode
+
+
+class Benchmark(BenchmarkBase):
+    def __init__(self, requires_grad, inference_mode, backward, dynamic):
+        assert not (inference_mode and backward), (
+            "inference_mode and backward cannot be both True"
+        )
+
+        self._requires_grad = requires_grad
+        self._inference_mode = inference_mode
+        self._backward = backward
+
+        super().__init__(
+            category="runtime_overhead",
+            backend="inductor",
+            device="cuda",
+            dynamic=dynamic,
+        )
+
+    def name(self):
+        prefix = f"{self.category()}_{self.backend()}"
+        if self._requires_grad:
+            prefix += "_requires_grad"
+        if self._inference_mode:
+            prefix += "_inference_mode"
+        if self._backward:
+            prefix += "_backward"
+        if self.is_dynamic():
+            prefix += "_dynamic"
+        return prefix
+
+    def description(self):
+        return "runtime of a compiled add1 op small input"
+
+    def _prepare_once(self):
+        torch._dynamo.reset()
+        self.a = torch.ones(2, device=self.device(), requires_grad=self._requires_grad)
+
+        @torch.compile(
+            backend=self.backend(),
+            fullgraph=True,
+            dynamic=self.is_dynamic(),
+        )
+        def add1(a):
+            return a + 1
+
+        self._add1 = add1
+
+        # warmup
+        for _ in range(10):
+            if self._backward:
+                self.forward_val = self._add1(self.a).sum()
+                self.forward_val.backward()
+            else:
+                self._work()
+
+    def _prepare(self):
+        if self._backward:
+            self.forward_val = self._add1(self.a).sum()
+
+    def _work(self):
+        if self._inference_mode:
+            with inference_mode():
+                self._add1(self.a)
+        elif self._backward:
+            self.forward_val.backward()
+        else:
+            self._add1(self.a)
+
+
+def main():
+    result_path = sys.argv[1]
+    all = [
+        Benchmark(
+            requires_grad=False, inference_mode=False, backward=False, dynamic=False
+        ),
+        Benchmark(
+            requires_grad=False, inference_mode=True, backward=False, dynamic=False
+        ),
+        Benchmark(
+            requires_grad=True, inference_mode=False, backward=False, dynamic=False
+        ),
+        Benchmark(
+            requires_grad=True, inference_mode=False, backward=True, dynamic=False
+        ),
+        Benchmark(
+            requires_grad=False, inference_mode=False, backward=False, dynamic=True
+        ),
+        Benchmark(
+            requires_grad=False, inference_mode=True, backward=False, dynamic=True
+        ),
+        Benchmark(
+            requires_grad=True, inference_mode=False, backward=False, dynamic=True
+        ),
+        Benchmark(
+            requires_grad=True, inference_mode=False, backward=True, dynamic=True
+        ),
+    ]
+
+    for benchmark in all:
+        benchmark.enable_instruction_count().collect_all().append_results(result_path)
+
+
+if __name__ == "__main__":
+    main()