mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 05:34:18 +08:00
This pull request enhances the PyTorch operator benchmarking suite by introducing support for benchmarking with `torch.compile` mode, in addition to existing Eager and JIT. It also adds peak memory measurement (fwd/bwd pass); improves the output format in JSON to be used by dashboard for reporting; and introduce some more CLI options. The new CLI flags introduced are: - Added `--use-compile` CLI argument and corresponding logic to run benchmarks using `torch.compile`, including mutual exclusivity with `--use-jit` - Added `--benchmark-name` argument for customizing the benchmark name in output - Updated default value for `--output-json-for-dashboard` to `benchmark-results.json` for more predictable output file name Sample command to run a single operator: `python -m pt.mm_test --use-compile` Pull Request resolved: https://github.com/pytorch/pytorch/pull/161394 Approved by: https://github.com/jbschlosser
212 lines
5.4 KiB
Python
212 lines
5.4 KiB
Python
import argparse
|
|
|
|
import benchmark_core
|
|
|
|
import benchmark_utils
|
|
|
|
import torch
|
|
|
|
|
|
"""Performance microbenchmarks's main binary.
|
|
|
|
This is the main function for running performance microbenchmark tests.
|
|
It also registers existing benchmark tests via Python module imports.
|
|
"""
|
|
parser = argparse.ArgumentParser(
|
|
description="Run microbenchmarks.",
|
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
conflict_handler="resolve",
|
|
)
|
|
|
|
|
|
def parse_args():
|
|
parser.add_argument(
|
|
"--tag-filter",
|
|
"--tag_filter",
|
|
help="tag_filter can be used to run the shapes which matches the tag. (all is used to run all the shapes)",
|
|
default="short",
|
|
)
|
|
|
|
# This option is used to filter test cases to run.
|
|
parser.add_argument(
|
|
"--operators",
|
|
help="Filter tests based on comma-delimited list of operators to test",
|
|
default=None,
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--operator-range",
|
|
"--operator_range",
|
|
help="Filter tests based on operator_range(e.g. a-c or b,c-d)",
|
|
default=None,
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--test-name",
|
|
"--test_name",
|
|
help="Run tests that have the provided test_name",
|
|
default=None,
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--list-ops",
|
|
"--list_ops",
|
|
help="List operators without running them",
|
|
action="store_true",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--output-json",
|
|
"--output_json",
|
|
help="JSON file path to write the results to",
|
|
default=None,
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--benchmark-name",
|
|
"--benchmark_name",
|
|
help="Name of the benchmark to store results to",
|
|
default="PyTorch operator benchmark",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--list-tests",
|
|
"--list_tests",
|
|
help="List all test cases without running them",
|
|
action="store_true",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--iterations",
|
|
help="Repeat each operator for the number of iterations",
|
|
type=int,
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--num-runs",
|
|
"--num_runs",
|
|
help="Run each test for num_runs. Each run executes an operator for number of <--iterations>",
|
|
type=int,
|
|
default=1,
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--min-time-per-test",
|
|
"--min_time_per_test",
|
|
help="Set the minimum time (unit: seconds) to run each test",
|
|
type=int,
|
|
default=0,
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--warmup-iterations",
|
|
"--warmup_iterations",
|
|
help="Number of iterations to ignore before measuring performance",
|
|
default=100,
|
|
type=int,
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--omp-num-threads",
|
|
"--omp_num_threads",
|
|
help="Number of OpenMP threads used in PyTorch runtime",
|
|
default=None,
|
|
type=int,
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--mkl-num-threads",
|
|
"--mkl_num_threads",
|
|
help="Number of MKL threads used in PyTorch runtime",
|
|
default=None,
|
|
type=int,
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--report-aibench",
|
|
"--report_aibench",
|
|
type=benchmark_utils.str2bool,
|
|
nargs="?",
|
|
const=True,
|
|
default=False,
|
|
help="Print result when running on AIBench",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--use-jit",
|
|
"--use_jit",
|
|
type=benchmark_utils.str2bool,
|
|
nargs="?",
|
|
const=True,
|
|
default=False,
|
|
help="Run operators with PyTorch JIT mode",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--use-compile",
|
|
"--use_compile",
|
|
type=benchmark_utils.str2bool,
|
|
nargs="?",
|
|
const=True,
|
|
default=False,
|
|
help="Run operators with PyTorch Compile mode",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--forward-only",
|
|
"--forward_only",
|
|
type=benchmark_utils.str2bool,
|
|
nargs="?",
|
|
const=True,
|
|
default=False,
|
|
help="Only run the forward path of operators",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--device",
|
|
help="Run tests on the provided architecture (cpu, cuda)",
|
|
default="None",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--output-csv",
|
|
"--output_csv",
|
|
help="CSV file path to store the results",
|
|
default="benchmark_logs",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--output-json-for-dashboard",
|
|
"--output_json_for_dashboard",
|
|
help="Save results in JSON format for display on the OSS dashboard",
|
|
default="benchmark-results.json",
|
|
)
|
|
|
|
args, _ = parser.parse_known_args()
|
|
|
|
if args.omp_num_threads:
|
|
# benchmark_utils.set_omp_threads sets the env variable OMP_NUM_THREADS
|
|
# which doesn't have any impact as C2 init logic has already been called
|
|
# before setting the env var.
|
|
|
|
# In general, OMP_NUM_THREADS (and other OMP env variables) needs to be set
|
|
# before the program is started.
|
|
# From Chapter 4 in OMP standard: https://www.openmp.org/wp-content/uploads/openmp-4.5.pdf
|
|
# "Modifications to the environment variables after the program has started,
|
|
# even if modified by the program itself, are ignored by the OpenMP implementation"
|
|
benchmark_utils.set_omp_threads(args.omp_num_threads)
|
|
torch.set_num_threads(args.omp_num_threads)
|
|
if args.mkl_num_threads:
|
|
benchmark_utils.set_mkl_threads(args.mkl_num_threads)
|
|
|
|
return args
|
|
|
|
|
|
def main():
|
|
args = parse_args()
|
|
benchmark_core.BenchmarkRunner(args).run()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|