Compare commits

...

1 Commits

Author SHA1 Message Date
ae592dd5f2 add initial benchmark 2025-06-25 22:58:27 -07:00
5 changed files with 456 additions and 0 deletions

View File

@ -0,0 +1,131 @@
import argparse
import csv
import dataclasses
import gc
import itertools
import json
import os
from common import Experiment
from generate import test_configs
from model_zoo import models
DEFAULT_OUTPUT_FILE = "gpt_fast_benchmark.csv"
def output_csv(output_file, headers, row):
if os.path.exists(output_file):
with open(output_file) as fd:
lines = list(csv.reader(fd)) or [[]]
if headers and len(headers) > len(lines[0]):
# if prior results failed the header might not be filled in yet
lines[0] = headers
else:
headers = lines[0]
else:
lines = [headers]
if output_file != DEFAULT_OUTPUT_FILE:
os.makedirs(os.path.dirname(output_file), exist_ok=True)
lines.append([(f"{x:.6f}" if isinstance(x, float) else x) for x in row])
with open(output_file, "w") as fd:
writer = csv.writer(fd, lineterminator="\n")
for line in lines:
writer.writerow(list(line) + ["0"] * (len(headers) - len(line)))
def output_json(output_file, headers, row):
"""
Write the result into JSON format, so that it can be uploaded to the benchmark database
to be displayed on OSS dashboard. The JSON format is defined at
https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
"""
mapping_headers = {headers[i]: v for i, v in enumerate(row)}
record = {
"benchmark": {
"name": "PyTorch LLM benchmark",
"mode": "inference",
"dtype": mapping_headers["dtype"],
"extra_info": {
"device": mapping_headers["device"],
"arch": mapping_headers["arch"],
},
},
"model": {
"name": mapping_headers["name"],
"origins": ["pytorch"],
},
"metric": {
"compilation_time": mapping_headers["compilation_time"],
"tokens_per_second": [mapping_headers["tokens_per_second"]],
"memory_bandwidth": mapping_headers["memory_bandwidth"],
},
}
with open(f"{os.path.splitext(output_file)[0]}.json", "a") as f:
print(json.dumps(record), file=f)
def main(args):
results = []
if args.only:
if args.only not in models:
raise ValueError(f"Unknown model: {args.only}")
experiments = [(args.only, models[args.only])]
else:
experiments = [
(model_name, benchmark_class)
for model_name, benchmark_class in models.items()
]
configs_to_test = [args.test_config] if args.test_config else list(test_configs)
devices = [args.device] if args.device else ["cuda", "cpu"]
results = []
for model_name, benchmark_class in experiments:
for test_config, device in itertools.product(configs_to_test, devices):
print("Processing:", model_name, test_config, device)
benchmark = benchmark_class(model_name, device, test_config)
res = benchmark.run_inference()
print("Results:", dataclasses.astuple(res))
results.append(dataclasses.astuple(res))
# Clean up the memory to avoid OOM
del benchmark
gc.collect()
headers = [field.name for field in dataclasses.fields(Experiment)]
for row in results:
output_csv(args.output_file, headers, row)
output_json(args.output_file, headers, row)
print(f"Results saved to {args.output_file}")
print(f"Results saved to {os.path.splitext(output_file)[0]}.json")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run experiments.")
parser.add_argument(
"--output-file",
default=DEFAULT_OUTPUT_FILE,
help="Set the output CSV file to save the benchmark results",
)
parser.add_argument(
"--only",
help="Specify a model to run exclusively",
)
parser.add_argument(
"--device", help="Specify the device to use", choices=["cuda", "cpu"]
)
parser.add_argument(
"--test-config",
help="Specify the test config to use",
choices=test_configs,
)
args = parser.parse_args()
main(args)

View File

@ -0,0 +1,58 @@
import itertools
import platform
from dataclasses import dataclass
from typing import Optional
import torch
torch.manual_seed(1234)
def get_arch_name() -> str:
if torch.cuda.is_available():
return torch.cuda.get_device_name()
else:
# This returns x86_64 or arm64 (for aarch64)
return platform.machine()
@dataclass
class Experiment:
name: str
dtype: str
device: str
arch: str
test_config: str
compilation_time: int
tokens_per_second: Optional[int] = None
memory_bandwidth: Optional[int] = None
real_time_factor: Optional[float] = None
N_ITER = 10
batch_size_combinations = [1, 4]
max_new_token_combinations = [16, 256]
cache_implementation_combinations = ["hybrid", "static"]
def device_sync(device):
if "cuda" in device:
torch.cuda.synchronize(device)
elif "cpu" in device:
pass
else:
print(f"device={device} is not yet suppported")
# Only count activated parameters and buffers.
def _get_model_size(model):
model_size = 0
for name, child in model.named_children():
if not isinstance(child, torch.nn.Embedding):
model_size += sum(
p.numel() * p.dtype.itemsize
for p in itertools.chain(child.parameters(), child.buffers())
)
return model_size

View File

@ -0,0 +1,188 @@
import copy
import time
from contextlib import nullcontext
from common import (
_get_model_size,
batch_size_combinations,
device_sync,
Experiment,
get_arch_name,
max_new_token_combinations,
N_ITER,
)
from datasets import load_dataset
from prompts import FRANCE_ARTICLE
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
WhisperForConditionalGeneration,
WhisperProcessor,
)
import torch
class Benchmark:
def __init__(self, model_name, device, test_config):
self.device = device
self.model_name = model_name
self.test_config = test_config
self.model = None
self.inputs = None
self.dtype = None
self.get_model_and_inputs() # Sets self.model, self.inputs, and self.dtype
assert self.model is not None
assert self.inputs is not None
assert self.dtype is not None
self.stance = (
torch.compiler.set_stance("force_eager")
if test_config == "eager"
else nullcontext()
)
def get_model_and_inputs(self):
raise NotImplementedError("get_model_and_inputs() not implemented")
def run_inference(self):
raise NotImplementedError("run_inference() not implemented")
class ASRBenchmark(Benchmark):
def run_inference(self):
torch.compiler.reset()
first_iteration = 0
total_real_time_factor = 0
for i in range(N_ITER):
sample = self.inputs[i]["audio"]
input_ids = self.processor(
sample["array"],
sampling_rate=sample["sampling_rate"],
return_tensors="pt",
)
input_ids["input_features"] = input_ids["input_features"].to(self.device)
device_sync(self.device)
start = time.time()
with self.stance:
_ = self.model.generate(**input_ids)
device_sync(self.device)
end = time.time()
input_length = len(sample["array"]) / sample["sampling_rate"]
real_time_factor = (end - start) / input_length
if i == 0:
first_iteration = real_time_factor
total_real_time_factor += real_time_factor
avg_real_time_factor = total_real_time_factor / N_ITER
experiment = Experiment(
name=self.model_name,
dtype=str(self.dtype),
device=self.device,
arch=get_arch_name(),
test_config=self.test_config,
compilation_time=first_iteration if self.test_config == "default" else 0,
real_time_factor=avg_real_time_factor,
)
return experiment
class WhisperBenchmark(ASRBenchmark):
def get_model_and_inputs(self):
self.dtype = torch.float32
self.inputs = load_dataset("google/fleurs", "en_us", split="validation")
model = WhisperForConditionalGeneration.from_pretrained(self.model_name).to(
self.device
)
model.forward = torch.compile(model.forward)
model.config.forced_decoder_ids = None
self.processor = WhisperProcessor.from_pretrained(self.model_name)
self.model = model
class TextGenerationBenchmark(Benchmark):
def __init__(self, model_name, device, test_config):
super().__init__(model_name, device, test_config)
def get_model_and_inputs(self):
self.dtype = torch.bfloat16
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
model = AutoModelForCausalLM.from_pretrained(
self.model_name,
torch_dtype=self.dtype,
).to(self.device)
model.forward = torch.compile(model.forward)
self.model = model
input_ids = self.tokenizer(
FRANCE_ARTICLE * batch_size_combinations[0], return_tensors="pt"
).to(self.device) # batch size =1 right now
generation_kwargs = {
"max_new_tokens": max_new_token_combinations[0],
"min_new_tokens": max_new_token_combinations[0],
"eos_token_id": None,
"do_sample": False,
# "cache_implementation": cache_implementation_combinations[0],
}
generation_config = copy.deepcopy(model.generation_config)
generation_config.update(**generation_kwargs)
input_ids["generation_config"] = generation_config
self.inputs = input_ids
def run_inference(self):
model_size = _get_model_size(self.model)
torch.compiler.reset()
first_iteration = 0
total_time = 0
total_tokens_per_second = 0
total_memory_bandwidth = 0
for i in range(N_ITER):
device_sync(self.device)
start = time.time()
with self.stance:
gen_out = self.model.generate(**self.inputs)
device_sync(self.device)
end = time.time()
if i == 0:
first_iteration = end - start
total_time += end - start
num_tokens = len(gen_out[0]) - len(self.inputs[0])
tokens_per_second = num_tokens / (end - start)
total_tokens_per_second += tokens_per_second
total_memory_bandwidth += model_size * tokens_per_second / 1e9
avg_tokens_per_second = total_tokens_per_second / N_ITER
avg_memory_bandwidth = total_memory_bandwidth / N_ITER
return Experiment(
name=self.model_name,
dtype=str(self.dtype),
device=self.device,
arch=get_arch_name(),
test_config=self.test_config,
compilation_time=first_iteration if self.test_config == "default" else 0,
tokens_per_second=avg_tokens_per_second,
memory_bandwidth=avg_memory_bandwidth,
)
def test_export_aot_inductor():
pass
################################################################
test_configs = {
"eager",
"default",
# "export-aot-inductor": test_export_aot_inductor,
}

View File

@ -0,0 +1,10 @@
from generate import Benchmark, TextGenerationBenchmark, WhisperBenchmark
models: dict[str, Benchmark] = {
"meta-llama/Llama-3.2-1B": TextGenerationBenchmark,
"google/gemma-2-2b": TextGenerationBenchmark,
"google/gemma-3-4b-it": TextGenerationBenchmark,
"openai/whisper-tiny": WhisperBenchmark,
"Qwen/Qwen3-0.6B": TextGenerationBenchmark,
}

View File

@ -0,0 +1,69 @@
FRANCE_ARTICLE = (
"<s>Marseille, France (CNN)The French prosecutor leading an investigation into the crash of Germanwings Flight "
"9525 insisted Wednesday that he was not aware of any video footage from on board the plane. Marseille "
'prosecutor Brice Robin told CNN that "so far no videos were used in the crash investigation." He added, "A '
"person who has such a video needs to immediately give it to the investigators.\" Robin's comments follow claims "
"by two magazines, German daily Bild and French Paris Match, of a cell phone video showing the harrowing final "
"seconds from on board Germanwings Flight 9525 as it crashed into the French Alps. All 150 on board were killed. "
"Paris Match and Bild reported that the video was recovered from a phone at the wreckage site. The two "
"publications described the supposed video, but did not post it on their websites. The publications said that "
"they watched the video, which was found by a source close to the investigation. \"One can hear cries of 'My God' "
'in several languages," Paris Match reported. "Metallic banging can also be heard more than three times, '
"perhaps of the pilot trying to open the cockpit door with a heavy object. Towards the end, after a heavy "
'shake, stronger than the others, the screaming intensifies. Then nothing." "It is a very disturbing scene," '
"said Julian Reichelt, editor-in-chief of Bild online. An official with France's accident investigation agency, "
"the BEA, said the agency is not aware of any such video. Lt. Col. Jean-Marc Menichini, a French Gendarmerie "
"spokesman in charge of communications on rescue efforts around the Germanwings crash site, told CNN that the "
'reports were "completely wrong" and "unwarranted." Cell phones have been collected at the site, he said, '
'but that they "hadn\'t been exploited yet." Menichini said he believed the cell phones would need to be sent '
"to the Criminal Research Institute in Rosny sous-Bois, near Paris, in order to be analyzed by specialized "
"technicians working hand-in-hand with investigators. But none of the cell phones found so far have been sent "
"to the institute, Menichini said. Asked whether staff involved in the search could have leaked a memory card "
'to the media, Menichini answered with a categorical "no." Reichelt told "Erin Burnett: Outfront" that he '
'had watched the video and stood by the report, saying Bild and Paris Match are "very confident" that the clip '
"is real. He noted that investigators only revealed they'd recovered cell phones from the crash site after "
'Bild and Paris Match published their reports. "That is something we did not know before. ... Overall we can '
"say many things of the investigation weren't revealed by the investigation at the beginning,\" he said. What "
"was mental state of Germanwings co-pilot? German airline Lufthansa confirmed Tuesday that co-pilot Andreas "
"Lubitz had battled depression years before he took the controls of Germanwings Flight 9525, which he's "
"accused of deliberately crashing last week in the French Alps. Lubitz told his Lufthansa flight training "
'school in 2009 that he had a "previous episode of severe depression," the airline said Tuesday. Email '
"correspondence between Lubitz and the school discovered in an internal investigation, Lufthansa said, "
"included medical documents he submitted in connection with resuming his flight training. The announcement "
"indicates that Lufthansa, the parent company of Germanwings, knew of Lubitz's battle with depression, allowed "
"him to continue training and ultimately put him in the cockpit. Lufthansa, whose CEO Carsten Spohr previously "
'said Lubitz was 100% fit to fly, described its statement Tuesday as a "swift and seamless clarification" and '
"said it was sharing the information and documents -- including training and medical records -- with public "
"prosecutors. Spohr traveled to the crash site Wednesday, where recovery teams have been working for the past "
"week to recover human remains and plane debris scattered across a steep mountainside. He saw the crisis center "
"set up in Seyne-les-Alpes, laid a wreath in the village of Le Vernet, closer to the crash site, where grieving "
"families have left flowers at a simple stone memorial. Menichini told CNN late Tuesday that no visible human "
"remains were left at the site but recovery teams would keep searching. French President Francois Hollande, "
"speaking Tuesday, said that it should be possible to identify all the victims using DNA analysis by the "
"end of the week, sooner than authorities had previously suggested. In the meantime, the recovery of the "
"victims' personal belongings will start Wednesday, Menichini said. Among those personal belongings could be "
"more cell phones belonging to the 144 passengers and six crew on board. Check out the latest from our "
"correspondents . The details about Lubitz's correspondence with the flight school during his training were "
"among several developments as investigators continued to delve into what caused the crash and Lubitz's "
"possible motive for downing the jet. A Lufthansa spokesperson told CNN on Tuesday that Lubitz had a valid "
'medical certificate, had passed all his examinations and "held all the licenses required." Earlier, a '
"spokesman for the prosecutor's office in Dusseldorf, Christoph Kumpa, said medical records reveal Lubitz "
"suffered from suicidal tendencies at some point before his aviation career and underwent psychotherapy before "
"he got his pilot's license. Kumpa emphasized there's no evidence suggesting Lubitz was suicidal or acting "
"aggressively before the crash. Investigators are looking into whether Lubitz feared his medical condition "
"would cause him to lose his pilot's license, a European government official briefed on the investigation told "
'CNN on Tuesday. While flying was "a big part of his life," the source said, it\'s only one theory being '
"considered. Another source, a law enforcement official briefed on the investigation, also told CNN that "
"authorities believe the primary motive for Lubitz to bring down the plane was that he feared he would not "
"be allowed to fly because of his medical problems. Lubitz's girlfriend told investigators he had seen an eye "
"doctor and a neuropsychologist, both of whom deemed him unfit to work recently and concluded he had "
"psychological issues, the European government official said. But no matter what details emerge about his "
"previous mental health struggles, there's more to the story, said Brian Russell, a forensic psychologist. "
'"Psychology can explain why somebody would turn rage inward on themselves about the fact that maybe they '
"weren't going to keep doing their job and they're upset about that and so they're suicidal,\" he said. \"But "
"there is no mental illness that explains why somebody then feels entitled to also take that rage and turn it "
"outward on 149 other people who had nothing to do with the person's problems.\" Germanwings crash compensation: "
"What we know . Who was the captain of Germanwings Flight 9525? CNN's Margot Haddad reported from Marseille and "
"Pamela Brown from Dusseldorf, while Laura Smith-Spark wrote from London. CNN's Frederik Pleitgen, Pamela "
"Boykoff, Antonia Mortensen, Sandrine Amiel and Anna-Maja Rappard contributed to this report."
)