mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 05:34:18 +08:00
See https://github.com/pytorch/pytorch/pull/129751#issue-2380881501. Most changes are auto-generated by linter. You can review these PRs via: ```bash git diff --ignore-all-space --ignore-blank-lines HEAD~1 ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/129754 Approved by: https://github.com/ezyang
134 lines
3.7 KiB
Python
134 lines
3.7 KiB
Python
import operator_benchmark as op_bench
|
|
|
|
import torch
|
|
import torch.nn.functional as F
|
|
|
|
|
|
"""Microbenchmarks for batchnorm operator."""
|
|
|
|
# Benchmark cudnn if available
|
|
if torch.backends.cudnn.is_available:
|
|
|
|
def cudnn_benchmark_configs(configs):
|
|
result = []
|
|
for config in configs:
|
|
is_cuda = any("cuda" in attr.values() for attr in config)
|
|
if is_cuda:
|
|
result.append((*config, dict(cudnn=True)))
|
|
result.append((*config, dict(cudnn=False)))
|
|
return result
|
|
|
|
else:
|
|
|
|
def cudnn_benchmark_configs(configs):
|
|
return [(*config, dict(cudnn=False)) for config in configs]
|
|
|
|
|
|
batchnorm_configs_short = cudnn_benchmark_configs(
|
|
op_bench.config_list(
|
|
attr_names=["M", "N", "K"],
|
|
attrs=[
|
|
[1, 256, 3136],
|
|
],
|
|
cross_product_configs={
|
|
"device": ["cpu", "cuda"],
|
|
"training": [True, False],
|
|
},
|
|
tags=["short"],
|
|
)
|
|
)
|
|
|
|
batchnorm_configs_long = cudnn_benchmark_configs(
|
|
op_bench.cross_product_configs(
|
|
M=[2, 128],
|
|
N=[8192, 2048],
|
|
K=[1],
|
|
device=["cpu", "cuda"],
|
|
training=[True, False],
|
|
tags=["long"],
|
|
)
|
|
)
|
|
|
|
|
|
class BatchNormBenchmark(op_bench.TorchBenchmarkBase):
|
|
def init(self, M, N, K, device, training, cudnn):
|
|
self.inputs = {
|
|
"input_one": torch.rand(
|
|
M, N, K, device=device, requires_grad=self.auto_set()
|
|
),
|
|
"mean": torch.rand(N, device=device),
|
|
"var": torch.rand(N, device=device),
|
|
"weight": torch.rand(N, device=device),
|
|
"bias": torch.rand(N, device=device),
|
|
"training": training,
|
|
"cudnn": cudnn,
|
|
}
|
|
self.set_module_name("batchnorm")
|
|
|
|
def forward(self, input_one, mean, var, weight, bias, training, cudnn):
|
|
with torch.backends.cudnn.flags(enabled=cudnn):
|
|
return F.batch_norm(input_one, mean, var, weight, bias, training)
|
|
|
|
|
|
op_bench.generate_pt_test(
|
|
batchnorm_configs_short + batchnorm_configs_long, BatchNormBenchmark
|
|
)
|
|
op_bench.generate_pt_gradient_test(
|
|
batchnorm_configs_short + batchnorm_configs_long, BatchNormBenchmark
|
|
)
|
|
|
|
|
|
batchnorm1d_configs_short = cudnn_benchmark_configs(
|
|
op_bench.config_list(
|
|
attr_names=["N", "C"],
|
|
attrs=[
|
|
[3136, 256],
|
|
],
|
|
cross_product_configs={
|
|
"device": ["cpu", "cuda"],
|
|
"training": [True, False],
|
|
},
|
|
tags=["short"],
|
|
)
|
|
)
|
|
|
|
batchnorm1d_configs_long = cudnn_benchmark_configs(
|
|
op_bench.cross_product_configs(
|
|
N=[2, 128],
|
|
C=[8192, 2048],
|
|
device=["cpu", "cuda"],
|
|
training=[True, False],
|
|
tags=["long"],
|
|
)
|
|
)
|
|
|
|
|
|
class BatchNorm1dBenchmark(op_bench.TorchBenchmarkBase):
|
|
def init(self, N, C, device, training, cudnn):
|
|
self.inputs = {
|
|
"input_one": torch.rand(N, C, device=device, requires_grad=self.auto_set()),
|
|
"mean": torch.rand(C, device=device),
|
|
"var": torch.rand(C, device=device),
|
|
"weight": torch.rand(C, device=device),
|
|
"bias": torch.rand(C, device=device),
|
|
"training": training,
|
|
"cudnn": cudnn,
|
|
}
|
|
self.set_module_name("batchnorm")
|
|
|
|
def forward(self, input_one, mean, var, weight, bias, training, cudnn):
|
|
with torch.backends.cudnn.flags(enabled=cudnn):
|
|
return F.batch_norm(input_one, mean, var, weight, bias, training)
|
|
|
|
|
|
op_bench.generate_pt_test(
|
|
batchnorm1d_configs_short + batchnorm1d_configs_long, BatchNorm1dBenchmark
|
|
)
|
|
op_bench.generate_pt_gradient_test(
|
|
batchnorm1d_configs_short + batchnorm1d_configs_long, BatchNorm1dBenchmark
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
op_bench.benchmark_runner.main()
|