mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Requested by Simon on a different PR Pull Request resolved: https://github.com/pytorch/pytorch/pull/156128 Approved by: https://github.com/xmfan
256 lines
7.0 KiB
Python
256 lines
7.0 KiB
Python
import argparse
|
|
import dataclasses
|
|
import json
|
|
import logging
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
from typing import Callable
|
|
|
|
from torch._inductor.utils import fresh_cache
|
|
|
|
|
|
logger: logging.Logger = logging.getLogger(__name__)
|
|
|
|
TIMEOUT: int = 2000
|
|
|
|
|
|
# Keep in sync with .ci/pytorch/test.sh
|
|
TORCHBENCH_MODELS: list[str] = [
|
|
"nanogpt",
|
|
"BERT_pytorch",
|
|
"resnet50",
|
|
"moco",
|
|
"llama",
|
|
"hf_T5",
|
|
]
|
|
HUGGINGFACE_MODELS: list[str] = [
|
|
"AllenaiLongformerBase",
|
|
"BertForMaskedLM",
|
|
"GPT2ForSequenceClassification",
|
|
]
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class RunResult:
|
|
model: str
|
|
mode: str # inference or training
|
|
benchmark: str
|
|
dynamic: bool
|
|
device: str # cuda or cpu
|
|
cold_compile_s: list[float]
|
|
warm_compile_s: list[float]
|
|
speedup_pct: float
|
|
|
|
|
|
def get_compile_time(file: tempfile._TemporaryFileWrapper) -> float:
|
|
lines = file.readlines()
|
|
# Decode from byte string, remove new lines, parse csv
|
|
lines = [line.decode("utf-8").strip().split(",") for line in lines]
|
|
compilation_time_idx = lines[0].index("compilation_latency")
|
|
compilation_time = lines[1][compilation_time_idx]
|
|
return float(compilation_time)
|
|
|
|
|
|
def _run_torchbench_from_args(
|
|
cmd_args: argparse.Namespace,
|
|
model: str,
|
|
args: list[str],
|
|
) -> tuple[list[float], list[float]]:
|
|
cold_compile_time: list[float] = []
|
|
warm_compile_time: list[float] = []
|
|
|
|
for _ in range(cmd_args.repeat):
|
|
with fresh_cache():
|
|
env = os.environ.copy()
|
|
with tempfile.NamedTemporaryFile(suffix=".csv") as file:
|
|
args.append("--output=" + file.name)
|
|
logger.info(f"Performing cold-start run for {model}") # noqa: G004
|
|
subprocess.check_call(args, timeout=TIMEOUT, env=env)
|
|
cold_compile_time.append(get_compile_time(file))
|
|
|
|
args.pop()
|
|
with tempfile.NamedTemporaryFile(suffix=".csv") as file:
|
|
args.append("--output=" + file.name)
|
|
logger.info(f"Performing warm-start run for {model}") # noqa: G004
|
|
subprocess.check_call(args, timeout=TIMEOUT, env=env)
|
|
warm_compile_time.append(get_compile_time(file))
|
|
|
|
return cold_compile_time, warm_compile_time
|
|
|
|
|
|
MODE_ARGS_DICT = {
|
|
"inference": ["--inference", "--bfloat16"],
|
|
"training": ["--training", "--amp"],
|
|
}
|
|
|
|
|
|
BENCHMARK_FILE = {
|
|
"torchbench": "torchbench.py",
|
|
"huggingface": "huggingface.py",
|
|
}
|
|
|
|
|
|
def _run_torchbench_model(
|
|
cmd_args: argparse.Namespace,
|
|
results: list[RunResult],
|
|
model: str,
|
|
) -> None:
|
|
cur_file = os.path.abspath(__file__)
|
|
torchbench_file = os.path.join(
|
|
os.path.dirname(cur_file), BENCHMARK_FILE[cmd_args.benchmark]
|
|
)
|
|
assert os.path.exists(torchbench_file), (
|
|
f"Torchbench does not exist at {torchbench_file}"
|
|
)
|
|
|
|
dynamic = cmd_args.dynamic
|
|
dynamic_args = ["--dynamic-shapes", "--dynamic-batch-only"] if dynamic else []
|
|
|
|
args = (
|
|
[
|
|
sys.executable,
|
|
torchbench_file,
|
|
f"--only={model}",
|
|
"--repeat=1",
|
|
"--performance",
|
|
"--backend=inductor",
|
|
f"--device={cmd_args.device}",
|
|
]
|
|
+ MODE_ARGS_DICT[cmd_args.mode]
|
|
+ dynamic_args
|
|
)
|
|
|
|
logger.info(f"Command: {args}") # noqa: G004
|
|
try:
|
|
cold_compile_t, warm_compile_t = _run_torchbench_from_args(
|
|
cmd_args, model, args
|
|
)
|
|
speedup_pct = (1 - (sum(warm_compile_t) / sum(cold_compile_t))) * 100
|
|
results.append(
|
|
RunResult(
|
|
model=model,
|
|
mode=cmd_args.mode,
|
|
benchmark=cmd_args.benchmark,
|
|
dynamic=dynamic,
|
|
device=cmd_args.device,
|
|
cold_compile_s=cold_compile_t,
|
|
warm_compile_s=warm_compile_t,
|
|
speedup_pct=speedup_pct,
|
|
)
|
|
)
|
|
except Exception:
|
|
logger.info("fail", exc_info=True)
|
|
return None
|
|
|
|
|
|
def _write_results_to_json(
|
|
cmd_args: argparse.Namespace,
|
|
results: list[RunResult],
|
|
) -> None:
|
|
if len(results) == 0:
|
|
# do not write empty results
|
|
return
|
|
|
|
records = []
|
|
for result in results:
|
|
for metric_name, value in [
|
|
("Cold compile time (s)", result.cold_compile_s),
|
|
("Warm compile time (s)", result.warm_compile_s),
|
|
("Speedup (%)", [result.speedup_pct]),
|
|
]:
|
|
records.append(
|
|
{
|
|
"benchmark": {
|
|
"name": "TorchCache Benchmark",
|
|
"mode": result.mode,
|
|
"extra_info": {
|
|
"is_dynamic": result.dynamic,
|
|
"device": result.device,
|
|
},
|
|
},
|
|
"model": {
|
|
"name": result.model,
|
|
"backend": "inductor",
|
|
"origins": [result.benchmark],
|
|
},
|
|
"metric": {
|
|
"name": metric_name,
|
|
"type": "OSS model",
|
|
"benchmark_values": value,
|
|
},
|
|
}
|
|
)
|
|
with open(cmd_args.output, "w") as f:
|
|
json.dump(records, f)
|
|
|
|
|
|
def parse_cmd_args() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(description="Run a TorchCache benchmark.")
|
|
parser.add_argument(
|
|
"-m",
|
|
"--model",
|
|
help="Name of the model to run",
|
|
)
|
|
parser.add_argument(
|
|
"--dynamic",
|
|
action="store_true",
|
|
help="Whether to run with dynamic enabled",
|
|
)
|
|
parser.add_argument(
|
|
"--benchmark",
|
|
choices=("torchbench", "huggingface"),
|
|
required=True,
|
|
help="Name of benchmark suite to run",
|
|
)
|
|
parser.add_argument(
|
|
"--mode",
|
|
choices=("inference", "training"),
|
|
default="training",
|
|
)
|
|
parser.add_argument(
|
|
"--device",
|
|
default="cuda",
|
|
choices=("cuda", "cpu"),
|
|
)
|
|
parser.add_argument(
|
|
"--output",
|
|
required=True,
|
|
help="The output filename (json)",
|
|
)
|
|
parser.add_argument(
|
|
"--repeat",
|
|
type=int,
|
|
default=1,
|
|
choices=range(1, 10),
|
|
help="Number of times to repeat the compilation (reduce noise)",
|
|
)
|
|
args, _ = parser.parse_known_args()
|
|
return args
|
|
|
|
|
|
Dispatch_fn_t = Callable[[argparse.Namespace, list[RunResult], str], None]
|
|
|
|
|
|
def main() -> None:
|
|
cmd_args = parse_cmd_args()
|
|
|
|
dispatcher: dict[str, tuple[Dispatch_fn_t, list[str]]] = {
|
|
"torchbench": (_run_torchbench_model, TORCHBENCH_MODELS),
|
|
"huggingface": (_run_torchbench_model, HUGGINGFACE_MODELS),
|
|
}
|
|
fn, models = dispatcher[cmd_args.benchmark]
|
|
if cmd_args.model is not None:
|
|
models = [cmd_args.model]
|
|
|
|
results: list[RunResult] = []
|
|
for model in models:
|
|
fn(cmd_args, results, model)
|
|
|
|
_write_results_to_json(cmd_args, results)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|