Add runtime_overhead PR Time Benchmark (#163866)

This adds a PR time benchmark that checks for runtime overhead on a very small graph. This will help track regressions in runtime overhead.

Example Results:
```
runtime_overhead_inductor,instruction_count,222645
runtime_overhead_inductor_inference_mode,instruction_count,234998
runtime_overhead_inductor_requires_grad,instruction_count,293556
runtime_overhead_inductor_requires_grad_backward,instruction_count,78181
runtime_overhead_inductor_dynamic,instruction_count,234870
runtime_overhead_inductor_inference_mode_dynamic,instruction_count,248711
runtime_overhead_inductor_requires_grad_dynamic,instruction_count,309979
runtime_overhead_inductor_requires_grad_backward_dynamic,instruction_count,77599
```

Pull Request resolved: https://github.com/pytorch/pytorch/pull/163866
Approved by: https://github.com/jansel, https://github.com/mlazos, https://github.com/anijain2305
This commit is contained in:
Arsh Zahed
2025-09-27 03:26:55 +00:00
committed by PyTorch MergeBot
parent 9dac6437da
commit 254d2864d6
2 changed files with 112 additions and 1 deletions

View File

@ -6,4 +6,4 @@
4. (Optional) flip a flag that you know will change the benchmark and run again with b.txt `PYTHONPATH=./ python benchmarks/[YOUR_BENCHMARK].py a.txt`
5. Compare `a.txt` and `b.txt` located within the `benchmarks/dynamo/pr_time_benchmarks` folder to make sure things look as you expect
6. Check in your new benchmark file and submit a new PR
7. In a few days, if your benchmark is stable, bug Laith Sakka to enable running your benchmark on all PRs. If your a meta employee, you can find the dashboard here: internalfb.com/intern/unidash/dashboard/pt2_diff_time_metrics
7. In a few days, if your benchmark is stable, bug Laith Sakka to enable running your benchmark on all PRs. If you are a meta employee, you can find the dashboard here: https://internalfb.com/intern/unidash/dashboard/pt2_diff_time_metrics

View File

@ -0,0 +1,111 @@
import sys
from benchmark_base import BenchmarkBase
import torch
from torch.autograd.grad_mode import inference_mode
class Benchmark(BenchmarkBase):
def __init__(self, requires_grad, inference_mode, backward, dynamic):
assert not (inference_mode and backward), (
"inference_mode and backward cannot be both True"
)
self._requires_grad = requires_grad
self._inference_mode = inference_mode
self._backward = backward
super().__init__(
category="runtime_overhead",
backend="inductor",
device="cuda",
dynamic=dynamic,
)
def name(self):
prefix = f"{self.category()}_{self.backend()}"
if self._requires_grad:
prefix += "_requires_grad"
if self._inference_mode:
prefix += "_inference_mode"
if self._backward:
prefix += "_backward"
if self.is_dynamic():
prefix += "_dynamic"
return prefix
def description(self):
return "runtime of a compiled add1 op small input"
def _prepare_once(self):
torch._dynamo.reset()
self.a = torch.ones(2, device=self.device(), requires_grad=self._requires_grad)
@torch.compile(
backend=self.backend(),
fullgraph=True,
dynamic=self.is_dynamic(),
)
def add1(a):
return a + 1
self._add1 = add1
# warmup
for _ in range(10):
if self._backward:
self.forward_val = self._add1(self.a).sum()
self.forward_val.backward()
else:
self._work()
def _prepare(self):
if self._backward:
self.forward_val = self._add1(self.a).sum()
def _work(self):
if self._inference_mode:
with inference_mode():
self._add1(self.a)
elif self._backward:
self.forward_val.backward()
else:
self._add1(self.a)
def main():
result_path = sys.argv[1]
all = [
Benchmark(
requires_grad=False, inference_mode=False, backward=False, dynamic=False
),
Benchmark(
requires_grad=False, inference_mode=True, backward=False, dynamic=False
),
Benchmark(
requires_grad=True, inference_mode=False, backward=False, dynamic=False
),
Benchmark(
requires_grad=True, inference_mode=False, backward=True, dynamic=False
),
Benchmark(
requires_grad=False, inference_mode=False, backward=False, dynamic=True
),
Benchmark(
requires_grad=False, inference_mode=True, backward=False, dynamic=True
),
Benchmark(
requires_grad=True, inference_mode=False, backward=False, dynamic=True
),
Benchmark(
requires_grad=True, inference_mode=False, backward=True, dynamic=True
),
]
for benchmark in all:
benchmark.enable_instruction_count().collect_all().append_results(result_path)
if __name__ == "__main__":
main()