From 7c825bad10fa81e330f8ecd4e8cac5aa17143b21 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi Date: Fri, 10 Apr 2020 09:03:12 -0700 Subject: [PATCH] [RELAND] Add __torch_function__ benchmarks (#36138) Summary: Re-land of https://github.com/pytorch/pytorch/issues/35530 and https://github.com/pytorch/pytorch/issues/34645 Pull Request resolved: https://github.com/pytorch/pytorch/pull/36138 Differential Revision: D20893770 Pulled By: ezyang fbshipit-source-id: 75ab688a086f5fb87412a853df5246c0c39704ca --- .jenkins/pytorch/test.sh | 14 ++++ benchmarks/overrides_benchmark/README.md | 40 ++++++++++++ benchmarks/overrides_benchmark/bench.py | 67 ++++++++++++++++++++ benchmarks/overrides_benchmark/common.py | 31 +++++++++ benchmarks/overrides_benchmark/pyspybench.py | 28 ++++++++ torch/_overrides.py | 3 + torch/csrc/utils/python_arg_parser.cpp | 4 ++ 7 files changed, 187 insertions(+) create mode 100644 benchmarks/overrides_benchmark/README.md create mode 100644 benchmarks/overrides_benchmark/bench.py create mode 100644 benchmarks/overrides_benchmark/common.py create mode 100644 benchmarks/overrides_benchmark/pyspybench.py diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh index 145041e006c6..e9fce31dc42c 100755 --- a/.jenkins/pytorch/test.sh +++ b/.jenkins/pytorch/test.sh @@ -218,6 +218,18 @@ test_custom_script_ops() { fi } +test_torch_function_benchmark() { + echo "Testing __torch_function__ benchmarks" + pushd benchmarks/overrides_benchmark + python bench.py -n 1 -m 2 + python pyspybench.py Tensor -n 1 + python pyspybench.py SubTensor -n 1 + python pyspybench.py WithTorchFunction -n 1 + python pyspybench.py SubWithTorchFunction -n 1 + popd + assert_git_not_dirty +} + test_xla() { export XLA_USE_XRT=1 XRT_DEVICE_MAP="CPU:0;/job:localservice/replica:0/task:0/device:XLA_CPU:0" # Issue #30717: randomize the port of XLA/gRPC workers is listening on to reduce flaky tests. @@ -286,6 +298,7 @@ elif [[ "${BUILD_ENVIRONMENT}" == *-test2 || "${JOB_BASE_NAME}" == *-test2 ]]; t test_aten test_libtorch test_custom_script_ops + test_torch_function_benchmark elif [[ "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then test_bazel else @@ -295,4 +308,5 @@ else test_aten test_libtorch test_custom_script_ops + test_torch_function_benchmark fi diff --git a/benchmarks/overrides_benchmark/README.md b/benchmarks/overrides_benchmark/README.md new file mode 100644 index 000000000000..03af5c399d5f --- /dev/null +++ b/benchmarks/overrides_benchmark/README.md @@ -0,0 +1,40 @@ +# `__torch_function__` micro-benchmarks + +This benchmark suite provides a systemic way to measure the performance of `__torch_function__` overhead. + +## Getting started +### Initial Setup +Install `py-spy` by doing: + +```bash +pip install py-spy +``` + +Note that more extensive documentation on using `py-spy` is available in `CONTRIBUTING.md`. + +### Running the benchmark +Run one of the following commands in the terminal, with the working directory being `${PYTORCH_CLONE_DIR}/benchmarks/overrides_benchmark`: + +```bash +# Benchmark all the cases +python bench.py + +# Flame graph pertaining to each case. +py-spy record -o tensor.svg --native -- python pyspybench.py Tensor +py-spy record -o subtensor.svg --native -- python pyspybench.py SubTensor +py-spy record -o overridden.svg --native -- python pyspybench.py WithTorchFunction +py-spy record -o suboverridden.svg --native -- python pyspybench.py SubWithTorchFunction +``` + +Here is a brief overview of what the results should look like, if run correctly: + +* Overhead for `torch` functions when run on `torch.Tensor` objects is on the order of 2 μs. +* `__torch_function__` should add zero overhead for `torch.Tensor` inputs, a small overhead for subclasses of `torch.Tensor`, and a couple of microseconds for `Tensor`-likes with `__torch_function__`. +* Changing the dispatching mechanism may result in changes that are on the order of 100 ns, which are hard to detect due to noise, but important. + +## Reporting benchmark results +When modifying any of the machinery around `__torch_function__`, run the benchmark for both the feature branch and the point it diverges from `master`. For each of these: + +* Run `bench.py`, and include the output in your result. +* For each case where `bench.py` shows a regression, run the commands described above, prefixing the output SVG filename (the input to the `-o` switch) with `base-` or `branch-` depending on the commit you are running the benchmark on. +* For each SVG, open it in the browser, take a screenshot and include it in your result. Also include a ZIP file with all SVGs thus produced included. diff --git a/benchmarks/overrides_benchmark/bench.py b/benchmarks/overrides_benchmark/bench.py new file mode 100644 index 000000000000..5ebf967d6c77 --- /dev/null +++ b/benchmarks/overrides_benchmark/bench.py @@ -0,0 +1,67 @@ +import torch +import time +import argparse + +from common import SubTensor, WithTorchFunction, SubWithTorchFunction + +NUM_REPEATS = 1000 +NUM_REPEAT_OF_REPEATS = 1000 + + +def bench(t1, t2): + bench_times = [] + for _ in range(NUM_REPEAT_OF_REPEATS): + time_start = time.time() + for _ in range(NUM_REPEATS): + torch.add(t1, t2) + bench_times.append(time.time() - time_start) + + bench_time = float(torch.min(torch.Tensor(bench_times))) / 1000 + bench_std = float(torch.std(torch.Tensor(bench_times))) / 1000 + + return bench_time, bench_std + + +def main(): + global NUM_REPEATS + global NUM_REPEAT_OF_REPEATS + + parser = argparse.ArgumentParser( + description="Run the __torch_function__ benchmarks." + ) + parser.add_argument( + "--nreps", + "-n", + type=int, + default=NUM_REPEATS, + help="The number of repeats for one measurement.", + ) + parser.add_argument( + "--nrepreps", + "-m", + type=int, + default=NUM_REPEAT_OF_REPEATS, + help="The number of measurements.", + ) + args = parser.parse_args() + + NUM_REPEATS = args.nreps + NUM_REPEAT_OF_REPEATS = args.nrepreps + + types = torch.Tensor, SubTensor, WithTorchFunction, SubWithTorchFunction + + for t in types: + tensor_1 = t(1) + tensor_2 = t(2) + + bench_min, bench_std = bench(tensor_1, tensor_2) + print( + "Type {0} had a minimum time of {1} us" + " and a standard deviation of {2} us.".format( + t.__name__, (10 ** 6 * bench_min), (10 ** 6) * bench_std + ) + ) + + +if __name__ == "__main__": + main() diff --git a/benchmarks/overrides_benchmark/common.py b/benchmarks/overrides_benchmark/common.py new file mode 100644 index 000000000000..9651c0496a91 --- /dev/null +++ b/benchmarks/overrides_benchmark/common.py @@ -0,0 +1,31 @@ +import torch + +NUM_REPEATS = 1000 +NUM_REPEAT_OF_REPEATS = 1000 + + +class SubTensor(torch.Tensor): + pass + + +class WithTorchFunction: + def __init__(self, data, requires_grad=False): + if isinstance(data, torch.Tensor): + self._tensor = data + return + + self._tensor = torch.Tensor(data, requires_grad) + + def __torch_function__(self, func, types, args=(), kwargs=None): + if kwargs is None: + kwargs = {} + + return WithTorchFunction(args[0]._tensor + args[1]._tensor) + + +class SubWithTorchFunction(torch.Tensor): + def __torch_function__(self, func, types, args=(), kwargs=None): + if kwargs is None: + kwargs = {} + + return args[0] + args[1] diff --git a/benchmarks/overrides_benchmark/pyspybench.py b/benchmarks/overrides_benchmark/pyspybench.py new file mode 100644 index 000000000000..d298f74c4381 --- /dev/null +++ b/benchmarks/overrides_benchmark/pyspybench.py @@ -0,0 +1,28 @@ +import torch +import argparse +from common import SubTensor, WithTorchFunction, SubWithTorchFunction # noqa: F401 + +Tensor = torch.Tensor + +NUM_REPEATS = 1000000 + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Run the torch.add for a given class a given number of times." + ) + parser.add_argument( + "tensor_class", metavar="TensorClass", type=str, help="The class to benchmark." + ) + parser.add_argument( + "--nreps", "-n", type=int, default=NUM_REPEATS, help="The number of repeats." + ) + args = parser.parse_args() + + TensorClass = globals()[args.tensor_class] + NUM_REPEATS = args.nreps + + t1 = TensorClass(1) + t2 = TensorClass(2) + + for _ in range(NUM_REPEATS): + torch.add(t1, t2) diff --git a/torch/_overrides.py b/torch/_overrides.py index 808478ffcaac..4a7ef521e450 100644 --- a/torch/_overrides.py +++ b/torch/_overrides.py @@ -13,6 +13,9 @@ https://github.com/pytorch/pytorch/issues/24015 and https://www.numpy.org/neps/nep-0018-array-function-protocol.html ) +If changing this file in a way that can affect ``__torch_function__`` overhead, +please report the benchmarks in ``benchmarks/overrides_benchmark``. See the +instructions in the ``README.md`` in that directory. """ import __future__ diff --git a/torch/csrc/utils/python_arg_parser.cpp b/torch/csrc/utils/python_arg_parser.cpp index 8a804ba71819..4588128ca610 100644 --- a/torch/csrc/utils/python_arg_parser.cpp +++ b/torch/csrc/utils/python_arg_parser.cpp @@ -215,6 +215,10 @@ auto handle_torch_function(PythonArgs &r, PyObject* args, PyObject* kwargs, PyOb * precedence. * * 'obj' is an object to check for a __torch_function__ implementation + * + * If changing this file in a way that can affect the __torch_function__ + * overhead, please report the benchmarks in 'benchmarks/overrides_benchmark'. + * See the instructions in the 'README.md' in that directory. * */