mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 05:34:18 +08:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/28837 The JIT code used in op bench is not compatibility with latest JIT code path. This diff aims to resolve that issue. Test Plan: ```buck run mode/opt //caffe2/benchmarks/operator_benchmark/pt:add_test -- --use_jit Building: finished in 02:29.8 min (100%) 7055/7055 jobs, 1 updated Total time: 02:30.3 min # ---------------------------------------- # PyTorch/Caffe2 Operator Micro-benchmarks # ---------------------------------------- # Tag : short # Benchmarking PyTorch: add # Mode: JIT # Name: add_M64_N64_K64_cpu # Input: M: 64, N: 64, K: 64, device: cpu Forward Execution Time (us) : 118.052 Reviewed By: hl475 Differential Revision: D18197057 fbshipit-source-id: 92edae8a48abc4115a558a91ba46cc9c3edb2eb8
185 lines
6.7 KiB
Python
185 lines
6.7 KiB
Python
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
|
|
import time
|
|
import json
|
|
import benchmark_core
|
|
import torch
|
|
import cpp_extension # noqa
|
|
|
|
|
|
"""PyTorch performance microbenchmarks.
|
|
|
|
This module contains PyTorch-specific functionalities for performance
|
|
microbenchmarks.
|
|
"""
|
|
|
|
class TorchBenchmarkBase(object):
|
|
""" This is a base class used to create Pytorch operator benchmark.
|
|
module_name is the name of the operator being benchmarked.
|
|
test_name is the name (it's created by concatenating all the
|
|
inputs) of a specific test
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.user_given_name = None
|
|
self._jit_forward = None
|
|
self._pass_count = 0
|
|
self._num_inputs_require_grads = 0
|
|
|
|
def _set_backward_test(self, is_backward):
|
|
self._is_backward = is_backward
|
|
|
|
def auto_set(self):
|
|
""" This is used to automatically set the require_grad for the backward patch.
|
|
It is implemented based on two counters. One counter to save the number of
|
|
times init has been called. The other counter to save the number of times
|
|
this function itself has been called. In the very first time init is called,
|
|
this function counts how many inputs require gradient. In each of the
|
|
following init calls, this function will return only one true value.
|
|
Here is an example:
|
|
...
|
|
self.v1 = torch.rand(M, N, K, requires_grad=self.auto_set())
|
|
self.v2 = torch.rand(M, N, K, requires_grad=self.auto_set())
|
|
...
|
|
"""
|
|
if not self._is_backward:
|
|
return False
|
|
|
|
if self._pass_count == 0:
|
|
self._num_inputs_require_grads += 1
|
|
return True
|
|
else:
|
|
self._auto_set_counter += 1
|
|
return (self._pass_count == self._auto_set_counter)
|
|
|
|
def forward(self):
|
|
pass
|
|
|
|
def _wrap_forward(self, foo):
|
|
""" The function passed to JIT trace must have at least one argument,
|
|
this function is to wrap the forward method to meet that requirement.
|
|
_consume op is used to avoid the dead-code-elimination optimization
|
|
in JIT.
|
|
"""
|
|
return torch.ops.operator_benchmark._consume(self.forward())
|
|
|
|
def _generate_jit_forward_graph(self):
|
|
""" generate a graph for the forward function via tracing
|
|
"""
|
|
|
|
func = torch.jit.trace(self._wrap_forward, torch.rand(1))
|
|
place_holder = torch.rand(1) # noqa
|
|
|
|
@torch.jit.script
|
|
def _jit_forward_graph(iters, place_holder):
|
|
# type: (int, Tensor)
|
|
result = torch.jit.annotate(torch.Tensor, place_holder)
|
|
for _ in range(iters):
|
|
result = func(place_holder)
|
|
return result
|
|
return _jit_forward_graph
|
|
|
|
def module_name(self):
|
|
""" this is used to label the operator being benchmarked
|
|
"""
|
|
if self.user_given_name:
|
|
return self.user_given_name
|
|
return self.__class__.__name__
|
|
|
|
def set_module_name(self, name):
|
|
self.user_given_name = name
|
|
|
|
def test_name(self, **kargs):
|
|
""" this is a globally unique name which can be used to
|
|
label a specific test
|
|
"""
|
|
|
|
# This is a list of attributes which will not be included
|
|
# in the test name.
|
|
skip_key_list = ['device']
|
|
|
|
test_name_str = []
|
|
for key in kargs:
|
|
value = kargs[key]
|
|
test_name_str.append(
|
|
('' if key in skip_key_list else key)
|
|
+ str(value if type(value) != bool else int(value)))
|
|
name = (self.module_name() + '_' +
|
|
'_'.join(test_name_str)).replace(" ", "")
|
|
return name
|
|
|
|
|
|
class PyTorchOperatorTestCase(object):
|
|
""" This class includes all the information needed to benchmark an operator.
|
|
op_bench: it's a user-defined class (child of TorchBenchmarkBase)
|
|
which includes input and operator, .etc
|
|
test_config: a namedtuple includes test_name, input_shape, tag, run_backward.
|
|
When run_backward is false, the run_forward method will be executed,
|
|
When run_backward is true, run_forward_eager and _output_mean will be
|
|
executed to generate output. Then, run_backward will be executed.
|
|
"""
|
|
def __init__(self, op_bench, test_config):
|
|
self.test_config = test_config
|
|
self.op_bench = op_bench
|
|
self.place_holder_tensor = torch.ones(1)
|
|
self.framework = "PyTorch"
|
|
self.time_series = []
|
|
|
|
def run_jit_forward(self, num_runs, print_per_iter=False):
|
|
""" Run the forward path of an op with JIT mode
|
|
"""
|
|
if self.op_bench._jit_forward is None:
|
|
self.op_bench._jit_forward = self.op_bench._generate_jit_forward_graph()
|
|
self.op_bench._jit_forward(num_runs, self.place_holder_tensor)
|
|
|
|
def _print_per_iter(self):
|
|
# print last 50 values
|
|
length = min(len(self.time_series), 50)
|
|
for i in range(length):
|
|
print("PyTorchObserver " + json.dumps(
|
|
{
|
|
"type": self.test_config.test_name,
|
|
"metric": "latency",
|
|
"unit": "ms",
|
|
"value": str(self.time_series[length - i - 1]),
|
|
}
|
|
))
|
|
|
|
def run_forward(self, num_runs, print_per_iter):
|
|
""" Run the forward path of an op with eager mode
|
|
"""
|
|
if print_per_iter:
|
|
for _ in range(num_runs):
|
|
start_time = time.time()
|
|
self.output = self.op_bench.forward()
|
|
end_time = time.time()
|
|
self.time_series.append((end_time - start_time) * 1e3)
|
|
else:
|
|
for _ in range(num_runs):
|
|
self.output = self.op_bench.forward()
|
|
|
|
def _output_mean(self):
|
|
""" TODO (mingzhe): it is not necessary to sum up everything by myself,
|
|
torch.autograd.backward do take a gradient tensor. By default, it
|
|
is the same shape as your output tensor, with all 1s.
|
|
Mathematically, it is the same as if the output is summed together.
|
|
So we should be able to get ride of this method.
|
|
dummy function for gradient calculation
|
|
"""
|
|
self.mean = self.output.mean()
|
|
|
|
def run_backward(self, num_runs, print_per_iter=False):
|
|
""" Run the backward path of an op in many iterations
|
|
"""
|
|
# TODO: can we use JIT here to reduce python overhead?
|
|
for _ in range(num_runs):
|
|
self.mean.backward(retain_graph=True)
|
|
|
|
|
|
def register_pytorch_op_test_case(op_bench, test_config):
|
|
test_case = PyTorchOperatorTestCase(op_bench, test_config)
|
|
benchmark_core._register_test(test_case)
|