mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
use dynamo_timed for all our wrapped calls, instead of our custom timer Pull Request resolved: https://github.com/pytorch/pytorch/pull/144315 Approved by: https://github.com/eellison
110 lines
4.0 KiB
Python
110 lines
4.0 KiB
Python
# Owner(s): ["module: inductor"]
|
|
|
|
import unittest
|
|
|
|
import torch
|
|
from torch._dynamo.utils import counters
|
|
from torch._inductor.runtime.benchmarking import Benchmarker, TritonBenchmarker
|
|
from torch._inductor.test_case import run_tests, TestCase
|
|
from torch.testing._internal.common_utils import (
|
|
decorateIf,
|
|
instantiate_parametrized_tests,
|
|
parametrize,
|
|
)
|
|
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_CPU, HAS_GPU
|
|
|
|
|
|
ALL_BENCHMARKER_CLASSES = (
|
|
Benchmarker,
|
|
TritonBenchmarker,
|
|
)
|
|
|
|
|
|
@instantiate_parametrized_tests
|
|
class TestBenchmarker(TestCase):
|
|
def setUp(self):
|
|
super().setUp()
|
|
torch.manual_seed(12345)
|
|
counters.clear()
|
|
|
|
@staticmethod
|
|
def get_counter_value(benchmarker_cls, fn_name):
|
|
return counters["inductor"][
|
|
f"benchmarking.{benchmarker_cls.__name__}.{fn_name}"
|
|
]
|
|
|
|
@staticmethod
|
|
def make_params(device, size=100):
|
|
fn, fn_args, fn_kwargs = torch.sum, (torch.randn(size, device=device),), {}
|
|
_callable = lambda: fn(*fn_args, **fn_kwargs) # noqa: E731
|
|
return (fn, fn_args, fn_kwargs), _callable
|
|
|
|
@unittest.skipIf(not HAS_CPU or not HAS_GPU, "requires CPU and GPU")
|
|
@decorateIf(
|
|
unittest.expectedFailure,
|
|
lambda params: params["benchmarker_cls"] is Benchmarker
|
|
and params["device"] == GPU_TYPE,
|
|
)
|
|
@parametrize("benchmarker_cls", ALL_BENCHMARKER_CLASSES)
|
|
@parametrize("device", (GPU_TYPE, "cpu"))
|
|
def test_benchmark_smoke(self, benchmarker_cls, device):
|
|
benchmarker = benchmarker_cls()
|
|
(fn, fn_args, fn_kwargs), _ = self.make_params(device)
|
|
timing = benchmarker.benchmark(fn, fn_args, fn_kwargs)
|
|
self.assertGreater(timing, 0)
|
|
self.assertEqual(self.get_counter_value(benchmarker_cls, "benchmark"), 1)
|
|
self.assertEqual(
|
|
self.get_counter_value(
|
|
benchmarker_cls, "benchmark_cpu" if device == "cpu" else "benchmark_gpu"
|
|
),
|
|
1,
|
|
)
|
|
|
|
@unittest.skipIf(not HAS_CPU, "requires CPU")
|
|
@parametrize("benchmarker_cls", ALL_BENCHMARKER_CLASSES)
|
|
def test_benchmark_cpu_smoke(self, benchmarker_cls, device="cpu"):
|
|
benchmarker = benchmarker_cls()
|
|
_, _callable = self.make_params(device)
|
|
timing = benchmarker.benchmark_cpu(_callable)
|
|
self.assertGreater(timing, 0)
|
|
self.assertEqual(self.get_counter_value(benchmarker_cls, "benchmark_cpu"), 1)
|
|
|
|
@unittest.skipIf(not HAS_GPU, "requires GPU")
|
|
@decorateIf(
|
|
unittest.expectedFailure,
|
|
lambda params: params["benchmarker_cls"] is Benchmarker,
|
|
)
|
|
@parametrize("benchmarker_cls", ALL_BENCHMARKER_CLASSES)
|
|
def test_benchmark_gpu_smoke(self, benchmarker_cls, device=GPU_TYPE):
|
|
benchmarker = benchmarker_cls()
|
|
_, _callable = self.make_params(device)
|
|
timing = benchmarker.benchmark_gpu(_callable)
|
|
self.assertGreater(timing, 0)
|
|
self.assertEqual(self.get_counter_value(benchmarker_cls, "benchmark_gpu"), 1)
|
|
|
|
@unittest.skipIf(not HAS_CPU and not HAS_GPU, "requires CPU or GPU")
|
|
@unittest.expectedFailure
|
|
@parametrize("benchmarker_cls", ALL_BENCHMARKER_CLASSES)
|
|
def test_benchmark_safely_infers_device_no_devices(
|
|
self, benchmarker_cls, device="cpu" if HAS_CPU else GPU_TYPE
|
|
):
|
|
benchmarker = benchmarker_cls()
|
|
(fn, _, _), _ = self.make_params(device)
|
|
benchmarker.benchmark(fn, (), {})
|
|
|
|
@unittest.skipIf(not HAS_CPU or not HAS_GPU, "requires CPU and GPU")
|
|
@unittest.expectedFailure
|
|
@parametrize("benchmarker_cls", ALL_BENCHMARKER_CLASSES)
|
|
def test_benchmark_safely_infers_device_many_devices(self, benchmarker_cls):
|
|
benchmarker = benchmarker_cls()
|
|
(fn, cpu_args, cpu_kwargs), _ = self.make_sum("cpu")
|
|
(_, gpu_args, gpu_kwargs), _ = self.make_sum(GPU_TYPE)
|
|
many_devices_args = cpu_args + gpu_args
|
|
many_devices_kwargs = cpu_kwargs
|
|
many_devices_kwargs.update(gpu_kwargs)
|
|
benchmarker.benchmark(fn, many_devices_args, many_devices_kwargs)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
run_tests()
|