[BE] fix typos in benchmarks/ (#156077)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/156077
Approved by: https://github.com/Skylion007, https://github.com/malfet
ghstack dependencies: #156069
This commit is contained in:
Xuehai Pan
2025-06-17 09:54:39 +08:00
committed by PyTorch MergeBot
parent 0a0023d984
commit 42015db6a9
22 changed files with 97 additions and 97 deletions

View File

@ -1159,7 +1159,6 @@ exclude_patterns = [
# These files are all grandfathered in, feel free to remove from this list # These files are all grandfathered in, feel free to remove from this list
# as necessary # as necessary
'aten/**', 'aten/**',
'benchmarks/**',
'c10/**', 'c10/**',
'cmake/**', 'cmake/**',
'docs/**', 'docs/**',

View File

@ -6,7 +6,7 @@ import sys
# Note - hf and timm have their own version of this, torchbench does not # Note - hf and timm have their own version of this, torchbench does not
# TOOD(voz): Someday, consolidate all the files into one runner instead of a shim like this... # TODO(voz): Someday, consolidate all the files into one runner instead of a shim like this...
def model_names(filename: str) -> set[str]: def model_names(filename: str) -> set[str]:
names = set() names = set()
with open(filename) as fh: with open(filename) as fh:

View File

@ -1,5 +1,5 @@
""" """
Update commited CSV files used as reference points by dynamo/inductor CI. Update committed CSV files used as reference points by dynamo/inductor CI.
Currently only cares about graph breaks, so only saves those columns. Currently only cares about graph breaks, so only saves those columns.

View File

@ -67,7 +67,7 @@ try:
import torch_xla import torch_xla
import torch_xla.core.xla_model as xm import torch_xla.core.xla_model as xm
# This is to woraround the backward issue https://github.com/pytorch/xla/issues/4174 # This is to workaround the backward issue https://github.com/pytorch/xla/issues/4174
torch_xla._XLAC._init_computation_client() torch_xla._XLAC._init_computation_client()
except ImportError: except ImportError:
# ignore the error if torch_xla is not installed # ignore the error if torch_xla is not installed
@ -270,7 +270,7 @@ DO_NOT_CAST_INPUTS = {"stable_diffusion"}
# Maps a benchmark model name to a list of status codes. For any listed entry, we'll # Maps a benchmark model name to a list of status codes. For any listed entry, we'll
# capture TORCH_COMPILE_DEBUG logs in CI runs and preseve them (i.e., for upload) if # capture TORCH_COMPILE_DEBUG logs in CI runs and preserve them (i.e., for upload) if
# the result status matches one listed. # the result status matches one listed.
CI_PRESERVE_COMPILE_DEBUG = { CI_PRESERVE_COMPILE_DEBUG = {
# For example: # For example:
@ -1074,7 +1074,7 @@ def speedup_experiment(args, model_iter_fn, model, example_inputs, **kwargs):
times = args.iterations_per_run times = args.iterations_per_run
# Use higher tolerance for XLA since XLA cause numerical unstability when # Use higher tolerance for XLA since XLA cause numerical instability when
# graph size changes # graph size changes
tolerance = args.xla_tolerance if args.trace_on_xla else 1e-4 tolerance = args.xla_tolerance if args.trace_on_xla else 1e-4
torch._dynamo.config.repro_tolerance = tolerance torch._dynamo.config.repro_tolerance = tolerance
@ -1680,7 +1680,7 @@ class BenchmarkRunner:
devices = [current_device] if current_device else self.args.devices devices = [current_device] if current_device else self.args.devices
if self.args.amp: if self.args.amp:
# AMP training can lead to small loss values which can undeflow # AMP training can lead to small loss values which can underflow
# gradient values returning in zero gradients. To solve this # gradient values returning in zero gradients. To solve this
# problem, PyTorch introduces GradScaler. GradScaler is a stateful # problem, PyTorch introduces GradScaler. GradScaler is a stateful
# structure, that scales the loss values to prevent underflow. Loss # structure, that scales the loss values to prevent underflow. Loss
@ -1718,7 +1718,7 @@ class BenchmarkRunner:
self.optimizer = torch.optim.SGD(params, lr=0.01, foreach=True) self.optimizer = torch.optim.SGD(params, lr=0.01, foreach=True)
# Disable multi_tensor_sgd for benchmarking, there isn't a large performance benefit (~1%) to compiling # Disable multi_tensor_sgd for benchmarking, there isn't a large performance benefit (~1%) to compiling
# this optimizer because it is a single foreach add, and increases compile time. # this optimizer because it is a single foreach add, and increases compile time.
# After autotuning and fake tensor caching lands, we can enable, becuase the compile time impact will be lower. # After autotuning and fake tensor caching lands, we can enable, because the compile time impact will be lower.
# Fake Tensor caching: https://github.com/pytorch/pytorch/pull/113873 # Fake Tensor caching: https://github.com/pytorch/pytorch/pull/113873
# Autotuning: https://github.com/pytorch/pytorch/issues/117447 # Autotuning: https://github.com/pytorch/pytorch/issues/117447
self.optimizer.step = torch._dynamo.disable(self.optimizer.step) self.optimizer.step = torch._dynamo.disable(self.optimizer.step)
@ -2823,7 +2823,7 @@ class BenchmarkRunner:
) )
# NB: Don't upload them to the benchmark database as they are debugging # NB: Don't upload them to the benchmark database as they are debugging
# infomation. There are also around a million records a day which is # information. There are also around a million records a day which is
# wasteful to store # wasteful to store
write_outputs( write_outputs(
filename, filename,
@ -2881,7 +2881,7 @@ def parse_args(args=None):
iterations_per_run_help = """ iterations_per_run_help = """
Run this may iterations for each time measurement. This is mainly used for Run this may iterations for each time measurement. This is mainly used for
XLA training. We want to run multiple iterations per measurement so the XLA training. We want to run multiple iterations per measurement so the
tracing and computation for different iteartions can overlap with each tracing and computation for different iterations can overlap with each
other. This makes sure we have an accurate xla baseline. other. This makes sure we have an accurate xla baseline.
""" """
parser.add_argument( parser.add_argument(
@ -3040,7 +3040,7 @@ def parse_args(args=None):
parser.add_argument( parser.add_argument(
"--generate-aot-autograd-stats", "--generate-aot-autograd-stats",
action="store_true", action="store_true",
help="Generates AOT Autograd stats like how mnay graphs are sent to AOT", help="Generates AOT Autograd stats like how many graphs are sent to AOT",
) )
parser.add_argument( parser.add_argument(
"--inductor-settings", "--inductor-settings",
@ -3261,7 +3261,7 @@ def parse_args(args=None):
"--warm-start-latency", "--warm-start-latency",
"--warm_start_latency", "--warm_start_latency",
action="store_true", action="store_true",
help="Run model(s) twice and preseve caches in between to enable a 'warm start' on the 2nd run", help="Run model(s) twice and preserve caches in between to enable a 'warm start' on the 2nd run",
) )
group_fuser = parser.add_mutually_exclusive_group() group_fuser = parser.add_mutually_exclusive_group()
@ -3610,7 +3610,7 @@ def run(runner, args, original_dir=None):
torch.backends.mkldnn.deterministic = True torch.backends.mkldnn.deterministic = True
# Remove randomeness when torch manual seed is called # Remove randomness when torch manual seed is called
patch_torch_manual_seed() patch_torch_manual_seed()
# Some models e.g. yolov3 assert batch size on n_gpus # Some models e.g. yolov3 assert batch size on n_gpus

View File

@ -274,7 +274,7 @@ class OperatorInputsLoader:
yield yield
return return
# line[1] represents number of times these inputs occured, ignored for now # line[1] represents number of times these inputs occurred, ignored for now
for line in self.operator_db[str(operator)].items(): for line in self.operator_db[str(operator)].items():
inps = line[0] inps = line[0]

View File

@ -269,7 +269,7 @@ def parse_args():
"--no-graphs", "--no-graphs",
action="store_true", action="store_true",
default=False, default=False,
help="Do not genenerate and upload metric graphs", help="Do not generate and upload metric graphs",
) )
parser.add_argument( parser.add_argument(
"--no-update-archive", "--no-update-archive",
@ -368,7 +368,7 @@ def get_mode(args):
def get_skip_tests(suite, device, is_training: bool): def get_skip_tests(suite, device, is_training: bool):
""" """
Generate -x seperated string to skip the unusual setup training tests Generate -x separated string to skip the unusual setup training tests
""" """
skip_tests = set() skip_tests = set()
original_dir = abspath(os.getcwd()) original_dir = abspath(os.getcwd())
@ -1359,7 +1359,7 @@ class DashboardUpdater:
dtype = self.args.dtypes[0] dtype = self.args.dtypes[0]
day, _ = archive_data(self.args.archive_name) day, _ = archive_data(self.args.archive_name)
target_dir = get_archive_name(self.args, dtype) target_dir = get_archive_name(self.args, dtype)
# Update lookup csv the folder to arhived logs # Update lookup csv the folder to archived logs
subprocess.check_call( subprocess.check_call(
f'echo "{day},performance,{dtype},{target_dir}" >> {self.lookup_file}', f'echo "{day},performance,{dtype},{target_dir}" >> {self.lookup_file}',
shell=True, shell=True,
@ -1418,7 +1418,7 @@ class DashboardUpdater:
def comment_on_gh(self, comment): def comment_on_gh(self, comment):
""" """
Send a commment to dashboard Send a comment to dashboard
""" """
with tempfile.NamedTemporaryFile(mode="w", delete=False) as f: with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
f.write(comment) f.write(comment)

View File

@ -229,7 +229,7 @@ skip:
- doctr_det_predictor - doctr_det_predictor
- doctr_reco_predictor - doctr_reco_predictor
- moondream - moondream
# doesnt fit in memory # doesn't fit in memory
- phi_1_5 - phi_1_5
- detectron2_fcos_r_50_fpn - detectron2_fcos_r_50_fpn

View File

@ -225,7 +225,7 @@ def varlen_lstm_inputs(
return x, lengths, (hx, cx), lstm.all_weights, lstm return x, lengths, (hx, cx), lstm.all_weights, lstm
else: else:
# NB: lstm.all_weights format: # NB: lstm.all_weights format:
# wih, whh, bih, bhh = lstm.all_weights[layer] # w_ih, w_hh, b_ih, b_hh = lstm.all_weights[layer]
return x, lengths, (hx, cx), lstm.all_weights, None return x, lengths, (hx, cx), lstm.all_weights, None
@ -266,10 +266,10 @@ def varlen_lstm_factory(cell, script):
def dynamic_rnn( def dynamic_rnn(
sequences: list[Tensor], sequences: list[Tensor],
hiddens: tuple[Tensor, Tensor], hiddens: tuple[Tensor, Tensor],
wih: Tensor, w_ih: Tensor,
whh: Tensor, w_hh: Tensor,
bih: Tensor, b_ih: Tensor,
bhh: Tensor, b_hh: Tensor,
) -> tuple[list[Tensor], tuple[list[Tensor], list[Tensor]]]: ) -> tuple[list[Tensor], tuple[list[Tensor], list[Tensor]]]:
hx, cx = hiddens hx, cx = hiddens
hxs = hx.unbind(1) hxs = hx.unbind(1)
@ -286,7 +286,7 @@ def varlen_lstm_factory(cell, script):
for seq_idx in range(len(inputs)): for seq_idx in range(len(inputs)):
hy, cy = cell( hy, cy = cell(
inputs[seq_idx].unsqueeze(0), (hy, cy), wih, whh, bih, bhh inputs[seq_idx].unsqueeze(0), (hy, cy), w_ih, w_hh, b_ih, b_hh
) )
output += [hy] output += [hy]
outputs += [torch.stack(output)] outputs += [torch.stack(output)]
@ -315,7 +315,7 @@ def varlen_lstm_creator(script=False, **kwargs):
# cudnn_layernorm_lstm: since cudnn does not have Layernorm LSTM, we cannot benchmark # cudnn_layernorm_lstm: since cudnn does not have Layernorm LSTM, we cannot benchmark
# the lowerbound directly. Instead, we only benchmark the forward pass by mimicing the # the lowerbound directly. Instead, we only benchmark the forward pass by mimicking the
# computation of a cudnn lstm + seq_len * 3 layernorm computation. This should serve # computation of a cudnn lstm + seq_len * 3 layernorm computation. This should serve
# as a perf lowerbound for the Layernorm LSTM forward pass(given that Layernorm itself # as a perf lowerbound for the Layernorm LSTM forward pass(given that Layernorm itself
# is invariant), the lowerbound of backward pass is hard to get since we lose the # is invariant), the lowerbound of backward pass is hard to get since we lose the
@ -352,12 +352,12 @@ def layernorm_pytorch_lstm_creator(**kwargs):
) )
# input: lstm.all_weights format (wih, whh, bih, bhh = lstm.all_weights[layer]) # input: lstm.all_weights format (w_ih, w_hh, b_ih, b_hh = lstm.all_weights[layer])
# output: packed_weights with format # output: packed_weights with format
# packed_weights[0] is wih with size (layer, 4*hiddenSize, inputSize) # packed_weights[0] is w_ih with size (layer, 4*hiddenSize, inputSize)
# packed_weights[1] is whh with size (layer, 4*hiddenSize, hiddenSize) # packed_weights[1] is w_hh with size (layer, 4*hiddenSize, hiddenSize)
# packed_weights[2] is bih with size (layer, 4*hiddenSize) # packed_weights[2] is b_ih with size (layer, 4*hiddenSize)
# packed_weights[3] is bhh with size (layer, 4*hiddenSize) # packed_weights[3] is b_hh with size (layer, 4*hiddenSize)
def stack_weights(weights): def stack_weights(weights):
def unzip_columns(mat): def unzip_columns(mat):
assert isinstance(mat, list) assert isinstance(mat, list)
@ -398,7 +398,7 @@ def lstm_inputs(
return x, (hx, cx), lstm.all_weights, lstm return x, (hx, cx), lstm.all_weights, lstm
else: else:
# NB: lstm.all_weights format: # NB: lstm.all_weights format:
# wih, whh, bih, bhh = lstm.all_weights[layer] # w_ih, w_hh, b_ih, b_hh = lstm.all_weights[layer]
return x, (hx, cx), lstm.all_weights, None return x, (hx, cx), lstm.all_weights, None
@ -406,17 +406,17 @@ def lstm_factory(cell, script):
def dynamic_rnn( def dynamic_rnn(
input: Tensor, input: Tensor,
hidden: tuple[Tensor, Tensor], hidden: tuple[Tensor, Tensor],
wih: Tensor, w_ih: Tensor,
whh: Tensor, w_hh: Tensor,
bih: Tensor, b_ih: Tensor,
bhh: Tensor, b_hh: Tensor,
) -> tuple[Tensor, tuple[Tensor, Tensor]]: ) -> tuple[Tensor, tuple[Tensor, Tensor]]:
hx, cx = hidden hx, cx = hidden
outputs = [] outputs = []
inputs = input.unbind(0) inputs = input.unbind(0)
hy, cy = hx[0], cx[0] hy, cy = hx[0], cx[0]
for seq_idx in range(len(inputs)): for seq_idx in range(len(inputs)):
hy, cy = cell(inputs[seq_idx], (hy, cy), wih, whh, bih, bhh) hy, cy = cell(inputs[seq_idx], (hy, cy), w_ih, w_hh, b_ih, b_hh)
outputs += [hy] outputs += [hy]
return torch.stack(outputs), (hy.unsqueeze(0), cy.unsqueeze(0)) return torch.stack(outputs), (hy.unsqueeze(0), cy.unsqueeze(0))
@ -432,17 +432,17 @@ def lstm_factory_premul(premul_cell, script):
def dynamic_rnn( def dynamic_rnn(
input: Tensor, input: Tensor,
hidden: tuple[Tensor, Tensor], hidden: tuple[Tensor, Tensor],
wih: Tensor, w_ih: Tensor,
whh: Tensor, w_hh: Tensor,
bih: Tensor, b_ih: Tensor,
bhh: Tensor, b_hh: Tensor,
) -> tuple[Tensor, tuple[Tensor, Tensor]]: ) -> tuple[Tensor, tuple[Tensor, Tensor]]:
hx, cx = hidden hx, cx = hidden
outputs = [] outputs = []
inputs = torch.matmul(input, wih.t()).unbind(0) inputs = torch.matmul(input, w_ih.t()).unbind(0)
hy, cy = hx[0], cx[0] hy, cy = hx[0], cx[0]
for seq_idx in range(len(inputs)): for seq_idx in range(len(inputs)):
hy, cy = premul_cell(inputs[seq_idx], (hy, cy), whh, bih, bhh) hy, cy = premul_cell(inputs[seq_idx], (hy, cy), w_hh, b_ih, b_hh)
outputs += [hy] outputs += [hy]
return torch.stack(outputs), (hy.unsqueeze(0), cy.unsqueeze(0)) return torch.stack(outputs), (hy.unsqueeze(0), cy.unsqueeze(0))
@ -458,10 +458,10 @@ def lstm_factory_premul_bias(premul_cell, script):
def dynamic_rnn( def dynamic_rnn(
input: Tensor, input: Tensor,
hidden: tuple[Tensor, Tensor], hidden: tuple[Tensor, Tensor],
wih: Tensor, w_ih: Tensor,
whh: Tensor, w_hh: Tensor,
bih: Tensor, b_ih: Tensor,
bhh: Tensor, b_hh: Tensor,
) -> tuple[Tensor, tuple[Tensor, Tensor]]: ) -> tuple[Tensor, tuple[Tensor, Tensor]]:
hx, cx = hidden hx, cx = hidden
outputs = [] outputs = []
@ -470,11 +470,11 @@ def lstm_factory_premul_bias(premul_cell, script):
# FIXME matmul(x,y) + bias currently goes through jit AD, and backward formula in AD is not optimized for this # FIXME matmul(x,y) + bias currently goes through jit AD, and backward formula in AD is not optimized for this
# case. Workaround with mm and views. # case. Workaround with mm and views.
inpSize = input.size() inpSize = input.size()
inputs = torch.mm(input.view(-1, inpSize[2]), wih.t()) + bih inputs = torch.mm(input.view(-1, inpSize[2]), w_ih.t()) + b_ih
inputs = inputs.view(inpSize[0], inpSize[1], -1).unbind(0) inputs = inputs.view(inpSize[0], inpSize[1], -1).unbind(0)
hy, cy = hx[0], cx[0] hy, cy = hx[0], cx[0]
for seq_idx in range(len(inputs)): for seq_idx in range(len(inputs)):
hy, cy = premul_cell(inputs[seq_idx], (hy, cy), whh, bhh) hy, cy = premul_cell(inputs[seq_idx], (hy, cy), w_hh, b_hh)
outputs += [hy] outputs += [hy]
return torch.stack(outputs), (hy.unsqueeze(0), cy.unsqueeze(0)) return torch.stack(outputs), (hy.unsqueeze(0), cy.unsqueeze(0))
@ -488,12 +488,12 @@ def lstm_factory_premul_bias(premul_cell, script):
# simple: flat inputs (no tuples), no list to accumulate outputs # simple: flat inputs (no tuples), no list to accumulate outputs
# useful mostly for benchmarking older JIT versions # useful mostly for benchmarking older JIT versions
def lstm_factory_simple(cell, script): def lstm_factory_simple(cell, script):
def dynamic_rnn(input, hx, cx, wih, whh, bih, bhh): def dynamic_rnn(input, hx, cx, w_ih, w_hh, b_ih, b_hh):
hy = hx # for scoping hy = hx # for scoping
cy = cx # for scoping cy = cx # for scoping
inputs = input.unbind(0) inputs = input.unbind(0)
for seq_idx in range(len(inputs)): for seq_idx in range(len(inputs)):
hy, cy = cell(inputs[seq_idx], hy, cy, wih, whh, bih, bhh) hy, cy = cell(inputs[seq_idx], hy, cy, w_ih, w_hh, b_ih, b_hh)
return hy, cy return hy, cy
if script: if script:
@ -515,12 +515,12 @@ def lstm_factory_multilayer(cell, script):
hy = hx[layer] hy = hx[layer]
cy = cx[layer] cy = cx[layer]
base_idx = layer * params_stride base_idx = layer * params_stride
wih = params[base_idx] w_ih = params[base_idx]
whh = params[base_idx + 1] w_hh = params[base_idx + 1]
bih = params[base_idx + 2] b_ih = params[base_idx + 2]
bhh = params[base_idx + 3] b_hh = params[base_idx + 3]
for seq_idx in range(len(inputs)): for seq_idx in range(len(inputs)):
hy, cy = cell(inputs[seq_idx], (hy, cy), wih, whh, bih, bhh) hy, cy = cell(inputs[seq_idx], (hy, cy), w_ih, w_hh, b_ih, b_hh)
outputs += [hy] outputs += [hy]
inputs, outputs = outputs, [] inputs, outputs = outputs, []
return torch.stack(inputs), (hy.unsqueeze(0), cy.unsqueeze(0)) return torch.stack(inputs), (hy.unsqueeze(0), cy.unsqueeze(0))

View File

@ -51,34 +51,34 @@ def test_rnns(
print("Setting up...") print("Setting up...")
control = control_creator(**creator_args) control = control_creator(**creator_args)
experim = experim_creator(**creator_args) experiment = experim_creator(**creator_args)
# Precondition # Precondition
assertEqual(experim.inputs, control.inputs) assertEqual(experiment.inputs, control.inputs)
assertEqual(experim.params, control.params) assertEqual(experiment.params, control.params)
print("Checking outputs...") print("Checking outputs...")
control_outputs = control.forward(*control.inputs) control_outputs = control.forward(*control.inputs)
experim_outputs = experim.forward(*experim.inputs) experim_outputs = experiment.forward(*experiment.inputs)
assertEqual(experim_outputs, control_outputs) assertEqual(experim_outputs, control_outputs)
print("Checking grads...") print("Checking grads...")
assert control.backward_setup is not None assert control.backward_setup is not None
assert experim.backward_setup is not None assert experiment.backward_setup is not None
assert control.backward is not None assert control.backward is not None
assert experim.backward is not None assert experiment.backward is not None
control_backward_inputs = control.backward_setup(control_outputs, seed) control_backward_inputs = control.backward_setup(control_outputs, seed)
experim_backward_inputs = experim.backward_setup(experim_outputs, seed) experim_backward_inputs = experiment.backward_setup(experim_outputs, seed)
control.backward(*control_backward_inputs) control.backward(*control_backward_inputs)
experim.backward(*experim_backward_inputs) experiment.backward(*experim_backward_inputs)
control_grads = [p.grad for p in control.params] control_grads = [p.grad for p in control.params]
experim_grads = [p.grad for p in experim.params] experim_grads = [p.grad for p in experiment.params]
assertEqual(experim_grads, control_grads) assertEqual(experim_grads, control_grads)
if verbose: if verbose:
print(experim.forward.graph_for(*experim.inputs)) print(experiment.forward.graph_for(*experiment.inputs))
print() print()
@ -103,16 +103,16 @@ def test_vl_py(**test_args):
print("Setting up...") print("Setting up...")
control = control_creator(**creator_args) control = control_creator(**creator_args)
experim = experim_creator(**creator_args) experiment = experim_creator(**creator_args)
# Precondition # Precondition
assertEqual(experim.inputs, control.inputs[:2]) assertEqual(experiment.inputs, control.inputs[:2])
assertEqual(experim.params, control.params) assertEqual(experiment.params, control.params)
print("Checking outputs...") print("Checking outputs...")
control_out, control_hiddens = control.forward(*control.inputs) control_out, control_hiddens = control.forward(*control.inputs)
control_hx, control_cx = control_hiddens control_hx, control_cx = control_hiddens
experim_out, experim_hiddens = experim.forward(*experim.inputs) experim_out, experim_hiddens = experiment.forward(*experiment.inputs)
experim_hx, experim_cx = experim_hiddens experim_hx, experim_cx = experim_hiddens
experim_padded = nn.utils.rnn.pad_sequence(experim_out).squeeze(-2) experim_padded = nn.utils.rnn.pad_sequence(experim_out).squeeze(-2)
@ -122,25 +122,25 @@ def test_vl_py(**test_args):
print("Checking grads...") print("Checking grads...")
assert control.backward_setup is not None assert control.backward_setup is not None
assert experim.backward_setup is not None assert experiment.backward_setup is not None
assert control.backward is not None assert control.backward is not None
assert experim.backward is not None assert experiment.backward is not None
control_backward_inputs = control.backward_setup( control_backward_inputs = control.backward_setup(
(control_out, control_hiddens), test_args["seed"] (control_out, control_hiddens), test_args["seed"]
) )
experim_backward_inputs = experim.backward_setup( experim_backward_inputs = experiment.backward_setup(
(experim_out, experim_hiddens), test_args["seed"] (experim_out, experim_hiddens), test_args["seed"]
) )
control.backward(*control_backward_inputs) control.backward(*control_backward_inputs)
experim.backward(*experim_backward_inputs) experiment.backward(*experim_backward_inputs)
control_grads = [p.grad for p in control.params] control_grads = [p.grad for p in control.params]
experim_grads = [p.grad for p in experim.params] experim_grads = [p.grad for p in experiment.params]
assertEqual(experim_grads, control_grads) assertEqual(experim_grads, control_grads)
if test_args["verbose"]: if test_args["verbose"]:
print(experim.forward.graph_for(*experim.inputs)) print(experiment.forward.graph_for(*experiment.inputs))
print() print()

View File

@ -885,7 +885,7 @@ class HungarianMatcher(nn.Module):
self.cost_bbox = cost_bbox self.cost_bbox = cost_bbox
self.cost_giou = cost_giou self.cost_giou = cost_giou
assert cost_class != 0 or cost_bbox != 0 or cost_giou != 0, ( assert cost_class != 0 or cost_bbox != 0 or cost_giou != 0, (
"all costs cant be 0" "all costs can't be 0"
) )
@torch.no_grad() @torch.no_grad()
@ -920,13 +920,13 @@ class HungarianMatcher(nn.Module):
# Compute the classification cost. Contrary to the loss, we don't use the NLL, # Compute the classification cost. Contrary to the loss, we don't use the NLL,
# but approximate it in 1 - proba[target class]. # but approximate it in 1 - proba[target class].
# The 1 is a constant that doesn't change the matching, it can be ommitted. # The 1 is a constant that doesn't change the matching, it can be omitted.
cost_class = -out_prob[:, tgt_ids] cost_class = -out_prob[:, tgt_ids]
# Compute the L1 cost between boxes # Compute the L1 cost between boxes
cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1) cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)
# Compute the giou cost betwen boxes # Compute the giou cost between boxes
cost_giou = -generalized_box_iou( cost_giou = -generalized_box_iou(
box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox) box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox)
) )

View File

@ -44,7 +44,7 @@ def device_sync(device):
elif "cpu" in device: elif "cpu" in device:
pass pass
else: else:
print(f"device={device} is not yet suppported") print(f"device={device} is not yet supported")
def get_arch_name() -> str: def get_arch_name() -> str:

View File

@ -20,7 +20,7 @@ For now we omit data preprocessing as well as result post-processing.
### Running a single benchmark ### Running a single benchmark
The togglable commmand line arguments to the script are as follows: The togglable command line arguments to the script are as follows:
- `num_iters` (default: 100): how many requests to send to the backend - `num_iters` (default: 100): how many requests to send to the backend
excluding the first warmup request excluding the first warmup request
- `batch_size` (default: 32): the batch size of the requests. - `batch_size` (default: 32): the batch size of the requests.

View File

@ -45,7 +45,7 @@ class FrontendWorker(mp.Process):
""" """
This function will poll the response queue until it has received all This function will poll the response queue until it has received all
responses. It records the startup latency, the average, max, min latency responses. It records the startup latency, the average, max, min latency
as well as througput of requests. as well as throughput of requests.
""" """
warmup_response_time = None warmup_response_time = None
response_times = [] response_times = []

View File

@ -55,7 +55,7 @@ def main(argv: list[str]) -> None:
results = Runner(work_orders, cadence=30.0).run() results = Runner(work_orders, cadence=30.0).run()
# TODO: Annotate with TypedDict when 3.8 is the minimum supported verson. # TODO: Annotate with TypedDict when 3.8 is the minimum supported version.
grouped_results: dict[str, dict[str, list[Union[float, int]]]] = { grouped_results: dict[str, dict[str, list[Union[float, int]]]] = {
key: {"times": [], "counts": []} for key in keys key: {"times": [], "counts": []} for key in keys
} }

View File

@ -2,7 +2,7 @@
The contents of this file are placeholders, and will be replaced by more The contents of this file are placeholders, and will be replaced by more
expressive and robust components (e.g. better runner and result display expressive and robust components (e.g. better runner and result display
components) in future iterations. However this allows us to excercise the components) in future iterations. However this allows us to exercise the
underlying benchmark generation infrastructure in the mean time. underlying benchmark generation infrastructure in the mean time.
""" """

View File

@ -561,7 +561,7 @@ class BenchmarkRunner:
output_csv_filename = self.args.output_csv output_csv_filename = self.args.output_csv
headers = [ headers = [
"Benchmarking Framework", "Benchmarking Framework",
"Benchamrking Module Name", "Benchmarking Module Name",
"Case Name", "Case Name",
"tag", "tag",
"run_backward", "run_backward",

View File

@ -134,14 +134,14 @@ def _validate(configs):
def config_list(**configs): def config_list(**configs):
"""Generate configs based on the list of input shapes. """Generate configs based on the list of input shapes.
This function will take input shapes specified in a list from user. Besides This function will take input shapes specified in a list from user. Besides
that, all other parameters will be cross producted first and each of the that, all other parameters will be cross produced first and each of the
generated list will be merged with the input shapes list. generated list will be merged with the input shapes list.
Reserved Args: Reserved Args:
attr_names(reserved): a list of names for input shapes. attr_names(reserved): a list of names for input shapes.
attrs(reserved): a list of values for each input shape. attrs(reserved): a list of values for each input shape.
corss_product: a dictionary of attributes which will be corss_product: a dictionary of attributes which will be
cross producted with the input shapes. cross produced with the input shapes.
tags(reserved): a tag used to filter inputs. tags(reserved): a tag used to filter inputs.
Here is an example: Here is an example:

View File

@ -1,4 +1,4 @@
Benchmarking Framework,Benchamrking Module Name,Case Name,tag,run_backward,Execution Time Benchmarking Framework,Benchmarking Module Name,Case Name,tag,run_backward,Execution Time
PyTorch,add,add_M1_N1_K1_cpu,short,FALSE,3.9497 PyTorch,add,add_M1_N1_K1_cpu,short,FALSE,3.9497
PyTorch,add,add_M64_N64_K64_cpu,short,FALSE,14.3181 PyTorch,add,add_M64_N64_K64_cpu,short,FALSE,14.3181
PyTorch,add,add_M64_N64_K128_cpu,short,FALSE,14.6826 PyTorch,add,add_M64_N64_K128_cpu,short,FALSE,14.6826

1 Benchmarking Framework Benchamrking Module Name Benchmarking Module Name Case Name tag run_backward Execution Time
2 PyTorch add add_M1_N1_K1_cpu short FALSE 3.9497
3 PyTorch add add_M64_N64_K64_cpu short FALSE 14.3181
4 PyTorch add add_M64_N64_K128_cpu short FALSE 14.6826

View File

@ -89,7 +89,7 @@ class Benchmark:
@staticmethod @staticmethod
def default_configs(): def default_configs():
"""return a list of defualt configs for this benchmark""" """return a list of default configs for this benchmark"""
raise ValueError("this method should be reimplemented by subclass") raise ValueError("this method should be reimplemented by subclass")
def is_supported(self): def is_supported(self):

View File

@ -271,9 +271,9 @@ def run_single_backend_sdpa(
if config.calculate_bwd_time: if config.calculate_bwd_time:
# TODO: debug backward pass for njt # TODO: debug backward pass for njt
if eager_sdpa and not config.attn_type == "document_mask": if eager_sdpa and not config.attn_type == "document_mask":
dOut = torch.randn_like(out_eager.transpose(1, 2)).transpose(1, 2) d_out = torch.randn_like(out_eager.transpose(1, 2)).transpose(1, 2)
backward_eager_time = benchmark_torch_function_in_microseconds( backward_eager_time = benchmark_torch_function_in_microseconds(
out_eager.backward, dOut, retain_graph=True out_eager.backward, d_out, retain_graph=True
) )
else: else:
backward_eager_time = float("nan") backward_eager_time = float("nan")
@ -340,9 +340,9 @@ def run_single_backend_FA(
if config.calculate_bwd_time: if config.calculate_bwd_time:
if FA: if FA:
dOut = torch.randn_like(out_FA) d_out = torch.randn_like(out_FA)
backward_FA_time = benchmark_torch_function_in_microseconds( backward_FA_time = benchmark_torch_function_in_microseconds(
out_FA.backward, dOut, retain_graph=True out_FA.backward, d_out, retain_graph=True
) )
else: else:
backward_FA_time = float("nan") backward_FA_time = float("nan")
@ -432,9 +432,9 @@ def run_single_experiment(
) )
if config.calculate_bwd_time: if config.calculate_bwd_time:
dOut = torch.randn_like(out_compile) d_out = torch.randn_like(out_compile)
backward_compile_time = benchmark_torch_function_in_microseconds( backward_compile_time = benchmark_torch_function_in_microseconds(
out_compile.backward, dOut, retain_graph=True out_compile.backward, d_out, retain_graph=True
) )
sparsity = block_mask.sparsity() / 100.0 if block_mask is not None else 0.0 sparsity = block_mask.sparsity() / 100.0 if block_mask is not None else 0.0
sparsity = sparsity if config.attn_type != "document_mask" else 0.5 sparsity = sparsity if config.attn_type != "document_mask" else 0.5

View File

@ -172,9 +172,9 @@ def run_single_experiment(config: ExperimentConfig) -> ExperimentResults:
out_torch = scaled_dot_product_attention( out_torch = scaled_dot_product_attention(
q, k, v, is_causal=is_causal, attn_mask=None q, k, v, is_causal=is_causal, attn_mask=None
) )
dOut = torch.randn_like(out_torch) d_out = torch.randn_like(out_torch)
backward_time = benchmark_cuda_function_in_microseconds( backward_time = benchmark_cuda_function_in_microseconds(
out_torch.backward, dOut, retain_graph=True out_torch.backward, d_out, retain_graph=True
) )
# Calculate TFLOPS for forward and backward passes # Calculate TFLOPS for forward and backward passes

View File

@ -1,4 +1,5 @@
coo coo
fro
hsa hsa
nd nd
optins optins