mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[BE] fix typos in benchmarks/ (#156077)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/156077 Approved by: https://github.com/Skylion007, https://github.com/malfet ghstack dependencies: #156069
This commit is contained in:
committed by
PyTorch MergeBot
parent
0a0023d984
commit
42015db6a9
@ -1159,7 +1159,6 @@ exclude_patterns = [
|
|||||||
# These files are all grandfathered in, feel free to remove from this list
|
# These files are all grandfathered in, feel free to remove from this list
|
||||||
# as necessary
|
# as necessary
|
||||||
'aten/**',
|
'aten/**',
|
||||||
'benchmarks/**',
|
|
||||||
'c10/**',
|
'c10/**',
|
||||||
'cmake/**',
|
'cmake/**',
|
||||||
'docs/**',
|
'docs/**',
|
||||||
|
@ -6,7 +6,7 @@ import sys
|
|||||||
|
|
||||||
|
|
||||||
# Note - hf and timm have their own version of this, torchbench does not
|
# Note - hf and timm have their own version of this, torchbench does not
|
||||||
# TOOD(voz): Someday, consolidate all the files into one runner instead of a shim like this...
|
# TODO(voz): Someday, consolidate all the files into one runner instead of a shim like this...
|
||||||
def model_names(filename: str) -> set[str]:
|
def model_names(filename: str) -> set[str]:
|
||||||
names = set()
|
names = set()
|
||||||
with open(filename) as fh:
|
with open(filename) as fh:
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
"""
|
"""
|
||||||
Update commited CSV files used as reference points by dynamo/inductor CI.
|
Update committed CSV files used as reference points by dynamo/inductor CI.
|
||||||
|
|
||||||
Currently only cares about graph breaks, so only saves those columns.
|
Currently only cares about graph breaks, so only saves those columns.
|
||||||
|
|
||||||
|
@ -67,7 +67,7 @@ try:
|
|||||||
import torch_xla
|
import torch_xla
|
||||||
import torch_xla.core.xla_model as xm
|
import torch_xla.core.xla_model as xm
|
||||||
|
|
||||||
# This is to woraround the backward issue https://github.com/pytorch/xla/issues/4174
|
# This is to workaround the backward issue https://github.com/pytorch/xla/issues/4174
|
||||||
torch_xla._XLAC._init_computation_client()
|
torch_xla._XLAC._init_computation_client()
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# ignore the error if torch_xla is not installed
|
# ignore the error if torch_xla is not installed
|
||||||
@ -270,7 +270,7 @@ DO_NOT_CAST_INPUTS = {"stable_diffusion"}
|
|||||||
|
|
||||||
|
|
||||||
# Maps a benchmark model name to a list of status codes. For any listed entry, we'll
|
# Maps a benchmark model name to a list of status codes. For any listed entry, we'll
|
||||||
# capture TORCH_COMPILE_DEBUG logs in CI runs and preseve them (i.e., for upload) if
|
# capture TORCH_COMPILE_DEBUG logs in CI runs and preserve them (i.e., for upload) if
|
||||||
# the result status matches one listed.
|
# the result status matches one listed.
|
||||||
CI_PRESERVE_COMPILE_DEBUG = {
|
CI_PRESERVE_COMPILE_DEBUG = {
|
||||||
# For example:
|
# For example:
|
||||||
@ -1074,7 +1074,7 @@ def speedup_experiment(args, model_iter_fn, model, example_inputs, **kwargs):
|
|||||||
|
|
||||||
times = args.iterations_per_run
|
times = args.iterations_per_run
|
||||||
|
|
||||||
# Use higher tolerance for XLA since XLA cause numerical unstability when
|
# Use higher tolerance for XLA since XLA cause numerical instability when
|
||||||
# graph size changes
|
# graph size changes
|
||||||
tolerance = args.xla_tolerance if args.trace_on_xla else 1e-4
|
tolerance = args.xla_tolerance if args.trace_on_xla else 1e-4
|
||||||
torch._dynamo.config.repro_tolerance = tolerance
|
torch._dynamo.config.repro_tolerance = tolerance
|
||||||
@ -1680,7 +1680,7 @@ class BenchmarkRunner:
|
|||||||
|
|
||||||
devices = [current_device] if current_device else self.args.devices
|
devices = [current_device] if current_device else self.args.devices
|
||||||
if self.args.amp:
|
if self.args.amp:
|
||||||
# AMP training can lead to small loss values which can undeflow
|
# AMP training can lead to small loss values which can underflow
|
||||||
# gradient values returning in zero gradients. To solve this
|
# gradient values returning in zero gradients. To solve this
|
||||||
# problem, PyTorch introduces GradScaler. GradScaler is a stateful
|
# problem, PyTorch introduces GradScaler. GradScaler is a stateful
|
||||||
# structure, that scales the loss values to prevent underflow. Loss
|
# structure, that scales the loss values to prevent underflow. Loss
|
||||||
@ -1718,7 +1718,7 @@ class BenchmarkRunner:
|
|||||||
self.optimizer = torch.optim.SGD(params, lr=0.01, foreach=True)
|
self.optimizer = torch.optim.SGD(params, lr=0.01, foreach=True)
|
||||||
# Disable multi_tensor_sgd for benchmarking, there isn't a large performance benefit (~1%) to compiling
|
# Disable multi_tensor_sgd for benchmarking, there isn't a large performance benefit (~1%) to compiling
|
||||||
# this optimizer because it is a single foreach add, and increases compile time.
|
# this optimizer because it is a single foreach add, and increases compile time.
|
||||||
# After autotuning and fake tensor caching lands, we can enable, becuase the compile time impact will be lower.
|
# After autotuning and fake tensor caching lands, we can enable, because the compile time impact will be lower.
|
||||||
# Fake Tensor caching: https://github.com/pytorch/pytorch/pull/113873
|
# Fake Tensor caching: https://github.com/pytorch/pytorch/pull/113873
|
||||||
# Autotuning: https://github.com/pytorch/pytorch/issues/117447
|
# Autotuning: https://github.com/pytorch/pytorch/issues/117447
|
||||||
self.optimizer.step = torch._dynamo.disable(self.optimizer.step)
|
self.optimizer.step = torch._dynamo.disable(self.optimizer.step)
|
||||||
@ -2823,7 +2823,7 @@ class BenchmarkRunner:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# NB: Don't upload them to the benchmark database as they are debugging
|
# NB: Don't upload them to the benchmark database as they are debugging
|
||||||
# infomation. There are also around a million records a day which is
|
# information. There are also around a million records a day which is
|
||||||
# wasteful to store
|
# wasteful to store
|
||||||
write_outputs(
|
write_outputs(
|
||||||
filename,
|
filename,
|
||||||
@ -2881,7 +2881,7 @@ def parse_args(args=None):
|
|||||||
iterations_per_run_help = """
|
iterations_per_run_help = """
|
||||||
Run this may iterations for each time measurement. This is mainly used for
|
Run this may iterations for each time measurement. This is mainly used for
|
||||||
XLA training. We want to run multiple iterations per measurement so the
|
XLA training. We want to run multiple iterations per measurement so the
|
||||||
tracing and computation for different iteartions can overlap with each
|
tracing and computation for different iterations can overlap with each
|
||||||
other. This makes sure we have an accurate xla baseline.
|
other. This makes sure we have an accurate xla baseline.
|
||||||
"""
|
"""
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@ -3040,7 +3040,7 @@ def parse_args(args=None):
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--generate-aot-autograd-stats",
|
"--generate-aot-autograd-stats",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Generates AOT Autograd stats like how mnay graphs are sent to AOT",
|
help="Generates AOT Autograd stats like how many graphs are sent to AOT",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--inductor-settings",
|
"--inductor-settings",
|
||||||
@ -3261,7 +3261,7 @@ def parse_args(args=None):
|
|||||||
"--warm-start-latency",
|
"--warm-start-latency",
|
||||||
"--warm_start_latency",
|
"--warm_start_latency",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Run model(s) twice and preseve caches in between to enable a 'warm start' on the 2nd run",
|
help="Run model(s) twice and preserve caches in between to enable a 'warm start' on the 2nd run",
|
||||||
)
|
)
|
||||||
|
|
||||||
group_fuser = parser.add_mutually_exclusive_group()
|
group_fuser = parser.add_mutually_exclusive_group()
|
||||||
@ -3610,7 +3610,7 @@ def run(runner, args, original_dir=None):
|
|||||||
|
|
||||||
torch.backends.mkldnn.deterministic = True
|
torch.backends.mkldnn.deterministic = True
|
||||||
|
|
||||||
# Remove randomeness when torch manual seed is called
|
# Remove randomness when torch manual seed is called
|
||||||
patch_torch_manual_seed()
|
patch_torch_manual_seed()
|
||||||
|
|
||||||
# Some models e.g. yolov3 assert batch size on n_gpus
|
# Some models e.g. yolov3 assert batch size on n_gpus
|
||||||
|
@ -274,7 +274,7 @@ class OperatorInputsLoader:
|
|||||||
yield
|
yield
|
||||||
return
|
return
|
||||||
|
|
||||||
# line[1] represents number of times these inputs occured, ignored for now
|
# line[1] represents number of times these inputs occurred, ignored for now
|
||||||
for line in self.operator_db[str(operator)].items():
|
for line in self.operator_db[str(operator)].items():
|
||||||
inps = line[0]
|
inps = line[0]
|
||||||
|
|
||||||
|
@ -269,7 +269,7 @@ def parse_args():
|
|||||||
"--no-graphs",
|
"--no-graphs",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
default=False,
|
default=False,
|
||||||
help="Do not genenerate and upload metric graphs",
|
help="Do not generate and upload metric graphs",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--no-update-archive",
|
"--no-update-archive",
|
||||||
@ -368,7 +368,7 @@ def get_mode(args):
|
|||||||
|
|
||||||
def get_skip_tests(suite, device, is_training: bool):
|
def get_skip_tests(suite, device, is_training: bool):
|
||||||
"""
|
"""
|
||||||
Generate -x seperated string to skip the unusual setup training tests
|
Generate -x separated string to skip the unusual setup training tests
|
||||||
"""
|
"""
|
||||||
skip_tests = set()
|
skip_tests = set()
|
||||||
original_dir = abspath(os.getcwd())
|
original_dir = abspath(os.getcwd())
|
||||||
@ -1359,7 +1359,7 @@ class DashboardUpdater:
|
|||||||
dtype = self.args.dtypes[0]
|
dtype = self.args.dtypes[0]
|
||||||
day, _ = archive_data(self.args.archive_name)
|
day, _ = archive_data(self.args.archive_name)
|
||||||
target_dir = get_archive_name(self.args, dtype)
|
target_dir = get_archive_name(self.args, dtype)
|
||||||
# Update lookup csv the folder to arhived logs
|
# Update lookup csv the folder to archived logs
|
||||||
subprocess.check_call(
|
subprocess.check_call(
|
||||||
f'echo "{day},performance,{dtype},{target_dir}" >> {self.lookup_file}',
|
f'echo "{day},performance,{dtype},{target_dir}" >> {self.lookup_file}',
|
||||||
shell=True,
|
shell=True,
|
||||||
@ -1418,7 +1418,7 @@ class DashboardUpdater:
|
|||||||
|
|
||||||
def comment_on_gh(self, comment):
|
def comment_on_gh(self, comment):
|
||||||
"""
|
"""
|
||||||
Send a commment to dashboard
|
Send a comment to dashboard
|
||||||
"""
|
"""
|
||||||
with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
|
with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
|
||||||
f.write(comment)
|
f.write(comment)
|
||||||
|
@ -229,7 +229,7 @@ skip:
|
|||||||
- doctr_det_predictor
|
- doctr_det_predictor
|
||||||
- doctr_reco_predictor
|
- doctr_reco_predictor
|
||||||
- moondream
|
- moondream
|
||||||
# doesnt fit in memory
|
# doesn't fit in memory
|
||||||
- phi_1_5
|
- phi_1_5
|
||||||
- detectron2_fcos_r_50_fpn
|
- detectron2_fcos_r_50_fpn
|
||||||
|
|
||||||
|
@ -225,7 +225,7 @@ def varlen_lstm_inputs(
|
|||||||
return x, lengths, (hx, cx), lstm.all_weights, lstm
|
return x, lengths, (hx, cx), lstm.all_weights, lstm
|
||||||
else:
|
else:
|
||||||
# NB: lstm.all_weights format:
|
# NB: lstm.all_weights format:
|
||||||
# wih, whh, bih, bhh = lstm.all_weights[layer]
|
# w_ih, w_hh, b_ih, b_hh = lstm.all_weights[layer]
|
||||||
return x, lengths, (hx, cx), lstm.all_weights, None
|
return x, lengths, (hx, cx), lstm.all_weights, None
|
||||||
|
|
||||||
|
|
||||||
@ -266,10 +266,10 @@ def varlen_lstm_factory(cell, script):
|
|||||||
def dynamic_rnn(
|
def dynamic_rnn(
|
||||||
sequences: list[Tensor],
|
sequences: list[Tensor],
|
||||||
hiddens: tuple[Tensor, Tensor],
|
hiddens: tuple[Tensor, Tensor],
|
||||||
wih: Tensor,
|
w_ih: Tensor,
|
||||||
whh: Tensor,
|
w_hh: Tensor,
|
||||||
bih: Tensor,
|
b_ih: Tensor,
|
||||||
bhh: Tensor,
|
b_hh: Tensor,
|
||||||
) -> tuple[list[Tensor], tuple[list[Tensor], list[Tensor]]]:
|
) -> tuple[list[Tensor], tuple[list[Tensor], list[Tensor]]]:
|
||||||
hx, cx = hiddens
|
hx, cx = hiddens
|
||||||
hxs = hx.unbind(1)
|
hxs = hx.unbind(1)
|
||||||
@ -286,7 +286,7 @@ def varlen_lstm_factory(cell, script):
|
|||||||
|
|
||||||
for seq_idx in range(len(inputs)):
|
for seq_idx in range(len(inputs)):
|
||||||
hy, cy = cell(
|
hy, cy = cell(
|
||||||
inputs[seq_idx].unsqueeze(0), (hy, cy), wih, whh, bih, bhh
|
inputs[seq_idx].unsqueeze(0), (hy, cy), w_ih, w_hh, b_ih, b_hh
|
||||||
)
|
)
|
||||||
output += [hy]
|
output += [hy]
|
||||||
outputs += [torch.stack(output)]
|
outputs += [torch.stack(output)]
|
||||||
@ -315,7 +315,7 @@ def varlen_lstm_creator(script=False, **kwargs):
|
|||||||
|
|
||||||
|
|
||||||
# cudnn_layernorm_lstm: since cudnn does not have Layernorm LSTM, we cannot benchmark
|
# cudnn_layernorm_lstm: since cudnn does not have Layernorm LSTM, we cannot benchmark
|
||||||
# the lowerbound directly. Instead, we only benchmark the forward pass by mimicing the
|
# the lowerbound directly. Instead, we only benchmark the forward pass by mimicking the
|
||||||
# computation of a cudnn lstm + seq_len * 3 layernorm computation. This should serve
|
# computation of a cudnn lstm + seq_len * 3 layernorm computation. This should serve
|
||||||
# as a perf lowerbound for the Layernorm LSTM forward pass(given that Layernorm itself
|
# as a perf lowerbound for the Layernorm LSTM forward pass(given that Layernorm itself
|
||||||
# is invariant), the lowerbound of backward pass is hard to get since we lose the
|
# is invariant), the lowerbound of backward pass is hard to get since we lose the
|
||||||
@ -352,12 +352,12 @@ def layernorm_pytorch_lstm_creator(**kwargs):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# input: lstm.all_weights format (wih, whh, bih, bhh = lstm.all_weights[layer])
|
# input: lstm.all_weights format (w_ih, w_hh, b_ih, b_hh = lstm.all_weights[layer])
|
||||||
# output: packed_weights with format
|
# output: packed_weights with format
|
||||||
# packed_weights[0] is wih with size (layer, 4*hiddenSize, inputSize)
|
# packed_weights[0] is w_ih with size (layer, 4*hiddenSize, inputSize)
|
||||||
# packed_weights[1] is whh with size (layer, 4*hiddenSize, hiddenSize)
|
# packed_weights[1] is w_hh with size (layer, 4*hiddenSize, hiddenSize)
|
||||||
# packed_weights[2] is bih with size (layer, 4*hiddenSize)
|
# packed_weights[2] is b_ih with size (layer, 4*hiddenSize)
|
||||||
# packed_weights[3] is bhh with size (layer, 4*hiddenSize)
|
# packed_weights[3] is b_hh with size (layer, 4*hiddenSize)
|
||||||
def stack_weights(weights):
|
def stack_weights(weights):
|
||||||
def unzip_columns(mat):
|
def unzip_columns(mat):
|
||||||
assert isinstance(mat, list)
|
assert isinstance(mat, list)
|
||||||
@ -398,7 +398,7 @@ def lstm_inputs(
|
|||||||
return x, (hx, cx), lstm.all_weights, lstm
|
return x, (hx, cx), lstm.all_weights, lstm
|
||||||
else:
|
else:
|
||||||
# NB: lstm.all_weights format:
|
# NB: lstm.all_weights format:
|
||||||
# wih, whh, bih, bhh = lstm.all_weights[layer]
|
# w_ih, w_hh, b_ih, b_hh = lstm.all_weights[layer]
|
||||||
return x, (hx, cx), lstm.all_weights, None
|
return x, (hx, cx), lstm.all_weights, None
|
||||||
|
|
||||||
|
|
||||||
@ -406,17 +406,17 @@ def lstm_factory(cell, script):
|
|||||||
def dynamic_rnn(
|
def dynamic_rnn(
|
||||||
input: Tensor,
|
input: Tensor,
|
||||||
hidden: tuple[Tensor, Tensor],
|
hidden: tuple[Tensor, Tensor],
|
||||||
wih: Tensor,
|
w_ih: Tensor,
|
||||||
whh: Tensor,
|
w_hh: Tensor,
|
||||||
bih: Tensor,
|
b_ih: Tensor,
|
||||||
bhh: Tensor,
|
b_hh: Tensor,
|
||||||
) -> tuple[Tensor, tuple[Tensor, Tensor]]:
|
) -> tuple[Tensor, tuple[Tensor, Tensor]]:
|
||||||
hx, cx = hidden
|
hx, cx = hidden
|
||||||
outputs = []
|
outputs = []
|
||||||
inputs = input.unbind(0)
|
inputs = input.unbind(0)
|
||||||
hy, cy = hx[0], cx[0]
|
hy, cy = hx[0], cx[0]
|
||||||
for seq_idx in range(len(inputs)):
|
for seq_idx in range(len(inputs)):
|
||||||
hy, cy = cell(inputs[seq_idx], (hy, cy), wih, whh, bih, bhh)
|
hy, cy = cell(inputs[seq_idx], (hy, cy), w_ih, w_hh, b_ih, b_hh)
|
||||||
outputs += [hy]
|
outputs += [hy]
|
||||||
return torch.stack(outputs), (hy.unsqueeze(0), cy.unsqueeze(0))
|
return torch.stack(outputs), (hy.unsqueeze(0), cy.unsqueeze(0))
|
||||||
|
|
||||||
@ -432,17 +432,17 @@ def lstm_factory_premul(premul_cell, script):
|
|||||||
def dynamic_rnn(
|
def dynamic_rnn(
|
||||||
input: Tensor,
|
input: Tensor,
|
||||||
hidden: tuple[Tensor, Tensor],
|
hidden: tuple[Tensor, Tensor],
|
||||||
wih: Tensor,
|
w_ih: Tensor,
|
||||||
whh: Tensor,
|
w_hh: Tensor,
|
||||||
bih: Tensor,
|
b_ih: Tensor,
|
||||||
bhh: Tensor,
|
b_hh: Tensor,
|
||||||
) -> tuple[Tensor, tuple[Tensor, Tensor]]:
|
) -> tuple[Tensor, tuple[Tensor, Tensor]]:
|
||||||
hx, cx = hidden
|
hx, cx = hidden
|
||||||
outputs = []
|
outputs = []
|
||||||
inputs = torch.matmul(input, wih.t()).unbind(0)
|
inputs = torch.matmul(input, w_ih.t()).unbind(0)
|
||||||
hy, cy = hx[0], cx[0]
|
hy, cy = hx[0], cx[0]
|
||||||
for seq_idx in range(len(inputs)):
|
for seq_idx in range(len(inputs)):
|
||||||
hy, cy = premul_cell(inputs[seq_idx], (hy, cy), whh, bih, bhh)
|
hy, cy = premul_cell(inputs[seq_idx], (hy, cy), w_hh, b_ih, b_hh)
|
||||||
outputs += [hy]
|
outputs += [hy]
|
||||||
return torch.stack(outputs), (hy.unsqueeze(0), cy.unsqueeze(0))
|
return torch.stack(outputs), (hy.unsqueeze(0), cy.unsqueeze(0))
|
||||||
|
|
||||||
@ -458,10 +458,10 @@ def lstm_factory_premul_bias(premul_cell, script):
|
|||||||
def dynamic_rnn(
|
def dynamic_rnn(
|
||||||
input: Tensor,
|
input: Tensor,
|
||||||
hidden: tuple[Tensor, Tensor],
|
hidden: tuple[Tensor, Tensor],
|
||||||
wih: Tensor,
|
w_ih: Tensor,
|
||||||
whh: Tensor,
|
w_hh: Tensor,
|
||||||
bih: Tensor,
|
b_ih: Tensor,
|
||||||
bhh: Tensor,
|
b_hh: Tensor,
|
||||||
) -> tuple[Tensor, tuple[Tensor, Tensor]]:
|
) -> tuple[Tensor, tuple[Tensor, Tensor]]:
|
||||||
hx, cx = hidden
|
hx, cx = hidden
|
||||||
outputs = []
|
outputs = []
|
||||||
@ -470,11 +470,11 @@ def lstm_factory_premul_bias(premul_cell, script):
|
|||||||
# FIXME matmul(x,y) + bias currently goes through jit AD, and backward formula in AD is not optimized for this
|
# FIXME matmul(x,y) + bias currently goes through jit AD, and backward formula in AD is not optimized for this
|
||||||
# case. Workaround with mm and views.
|
# case. Workaround with mm and views.
|
||||||
inpSize = input.size()
|
inpSize = input.size()
|
||||||
inputs = torch.mm(input.view(-1, inpSize[2]), wih.t()) + bih
|
inputs = torch.mm(input.view(-1, inpSize[2]), w_ih.t()) + b_ih
|
||||||
inputs = inputs.view(inpSize[0], inpSize[1], -1).unbind(0)
|
inputs = inputs.view(inpSize[0], inpSize[1], -1).unbind(0)
|
||||||
hy, cy = hx[0], cx[0]
|
hy, cy = hx[0], cx[0]
|
||||||
for seq_idx in range(len(inputs)):
|
for seq_idx in range(len(inputs)):
|
||||||
hy, cy = premul_cell(inputs[seq_idx], (hy, cy), whh, bhh)
|
hy, cy = premul_cell(inputs[seq_idx], (hy, cy), w_hh, b_hh)
|
||||||
outputs += [hy]
|
outputs += [hy]
|
||||||
return torch.stack(outputs), (hy.unsqueeze(0), cy.unsqueeze(0))
|
return torch.stack(outputs), (hy.unsqueeze(0), cy.unsqueeze(0))
|
||||||
|
|
||||||
@ -488,12 +488,12 @@ def lstm_factory_premul_bias(premul_cell, script):
|
|||||||
# simple: flat inputs (no tuples), no list to accumulate outputs
|
# simple: flat inputs (no tuples), no list to accumulate outputs
|
||||||
# useful mostly for benchmarking older JIT versions
|
# useful mostly for benchmarking older JIT versions
|
||||||
def lstm_factory_simple(cell, script):
|
def lstm_factory_simple(cell, script):
|
||||||
def dynamic_rnn(input, hx, cx, wih, whh, bih, bhh):
|
def dynamic_rnn(input, hx, cx, w_ih, w_hh, b_ih, b_hh):
|
||||||
hy = hx # for scoping
|
hy = hx # for scoping
|
||||||
cy = cx # for scoping
|
cy = cx # for scoping
|
||||||
inputs = input.unbind(0)
|
inputs = input.unbind(0)
|
||||||
for seq_idx in range(len(inputs)):
|
for seq_idx in range(len(inputs)):
|
||||||
hy, cy = cell(inputs[seq_idx], hy, cy, wih, whh, bih, bhh)
|
hy, cy = cell(inputs[seq_idx], hy, cy, w_ih, w_hh, b_ih, b_hh)
|
||||||
return hy, cy
|
return hy, cy
|
||||||
|
|
||||||
if script:
|
if script:
|
||||||
@ -515,12 +515,12 @@ def lstm_factory_multilayer(cell, script):
|
|||||||
hy = hx[layer]
|
hy = hx[layer]
|
||||||
cy = cx[layer]
|
cy = cx[layer]
|
||||||
base_idx = layer * params_stride
|
base_idx = layer * params_stride
|
||||||
wih = params[base_idx]
|
w_ih = params[base_idx]
|
||||||
whh = params[base_idx + 1]
|
w_hh = params[base_idx + 1]
|
||||||
bih = params[base_idx + 2]
|
b_ih = params[base_idx + 2]
|
||||||
bhh = params[base_idx + 3]
|
b_hh = params[base_idx + 3]
|
||||||
for seq_idx in range(len(inputs)):
|
for seq_idx in range(len(inputs)):
|
||||||
hy, cy = cell(inputs[seq_idx], (hy, cy), wih, whh, bih, bhh)
|
hy, cy = cell(inputs[seq_idx], (hy, cy), w_ih, w_hh, b_ih, b_hh)
|
||||||
outputs += [hy]
|
outputs += [hy]
|
||||||
inputs, outputs = outputs, []
|
inputs, outputs = outputs, []
|
||||||
return torch.stack(inputs), (hy.unsqueeze(0), cy.unsqueeze(0))
|
return torch.stack(inputs), (hy.unsqueeze(0), cy.unsqueeze(0))
|
||||||
|
@ -51,34 +51,34 @@ def test_rnns(
|
|||||||
|
|
||||||
print("Setting up...")
|
print("Setting up...")
|
||||||
control = control_creator(**creator_args)
|
control = control_creator(**creator_args)
|
||||||
experim = experim_creator(**creator_args)
|
experiment = experim_creator(**creator_args)
|
||||||
|
|
||||||
# Precondition
|
# Precondition
|
||||||
assertEqual(experim.inputs, control.inputs)
|
assertEqual(experiment.inputs, control.inputs)
|
||||||
assertEqual(experim.params, control.params)
|
assertEqual(experiment.params, control.params)
|
||||||
|
|
||||||
print("Checking outputs...")
|
print("Checking outputs...")
|
||||||
control_outputs = control.forward(*control.inputs)
|
control_outputs = control.forward(*control.inputs)
|
||||||
experim_outputs = experim.forward(*experim.inputs)
|
experim_outputs = experiment.forward(*experiment.inputs)
|
||||||
assertEqual(experim_outputs, control_outputs)
|
assertEqual(experim_outputs, control_outputs)
|
||||||
|
|
||||||
print("Checking grads...")
|
print("Checking grads...")
|
||||||
assert control.backward_setup is not None
|
assert control.backward_setup is not None
|
||||||
assert experim.backward_setup is not None
|
assert experiment.backward_setup is not None
|
||||||
assert control.backward is not None
|
assert control.backward is not None
|
||||||
assert experim.backward is not None
|
assert experiment.backward is not None
|
||||||
control_backward_inputs = control.backward_setup(control_outputs, seed)
|
control_backward_inputs = control.backward_setup(control_outputs, seed)
|
||||||
experim_backward_inputs = experim.backward_setup(experim_outputs, seed)
|
experim_backward_inputs = experiment.backward_setup(experim_outputs, seed)
|
||||||
|
|
||||||
control.backward(*control_backward_inputs)
|
control.backward(*control_backward_inputs)
|
||||||
experim.backward(*experim_backward_inputs)
|
experiment.backward(*experim_backward_inputs)
|
||||||
|
|
||||||
control_grads = [p.grad for p in control.params]
|
control_grads = [p.grad for p in control.params]
|
||||||
experim_grads = [p.grad for p in experim.params]
|
experim_grads = [p.grad for p in experiment.params]
|
||||||
assertEqual(experim_grads, control_grads)
|
assertEqual(experim_grads, control_grads)
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print(experim.forward.graph_for(*experim.inputs))
|
print(experiment.forward.graph_for(*experiment.inputs))
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
|
||||||
@ -103,16 +103,16 @@ def test_vl_py(**test_args):
|
|||||||
|
|
||||||
print("Setting up...")
|
print("Setting up...")
|
||||||
control = control_creator(**creator_args)
|
control = control_creator(**creator_args)
|
||||||
experim = experim_creator(**creator_args)
|
experiment = experim_creator(**creator_args)
|
||||||
|
|
||||||
# Precondition
|
# Precondition
|
||||||
assertEqual(experim.inputs, control.inputs[:2])
|
assertEqual(experiment.inputs, control.inputs[:2])
|
||||||
assertEqual(experim.params, control.params)
|
assertEqual(experiment.params, control.params)
|
||||||
|
|
||||||
print("Checking outputs...")
|
print("Checking outputs...")
|
||||||
control_out, control_hiddens = control.forward(*control.inputs)
|
control_out, control_hiddens = control.forward(*control.inputs)
|
||||||
control_hx, control_cx = control_hiddens
|
control_hx, control_cx = control_hiddens
|
||||||
experim_out, experim_hiddens = experim.forward(*experim.inputs)
|
experim_out, experim_hiddens = experiment.forward(*experiment.inputs)
|
||||||
experim_hx, experim_cx = experim_hiddens
|
experim_hx, experim_cx = experim_hiddens
|
||||||
|
|
||||||
experim_padded = nn.utils.rnn.pad_sequence(experim_out).squeeze(-2)
|
experim_padded = nn.utils.rnn.pad_sequence(experim_out).squeeze(-2)
|
||||||
@ -122,25 +122,25 @@ def test_vl_py(**test_args):
|
|||||||
|
|
||||||
print("Checking grads...")
|
print("Checking grads...")
|
||||||
assert control.backward_setup is not None
|
assert control.backward_setup is not None
|
||||||
assert experim.backward_setup is not None
|
assert experiment.backward_setup is not None
|
||||||
assert control.backward is not None
|
assert control.backward is not None
|
||||||
assert experim.backward is not None
|
assert experiment.backward is not None
|
||||||
control_backward_inputs = control.backward_setup(
|
control_backward_inputs = control.backward_setup(
|
||||||
(control_out, control_hiddens), test_args["seed"]
|
(control_out, control_hiddens), test_args["seed"]
|
||||||
)
|
)
|
||||||
experim_backward_inputs = experim.backward_setup(
|
experim_backward_inputs = experiment.backward_setup(
|
||||||
(experim_out, experim_hiddens), test_args["seed"]
|
(experim_out, experim_hiddens), test_args["seed"]
|
||||||
)
|
)
|
||||||
|
|
||||||
control.backward(*control_backward_inputs)
|
control.backward(*control_backward_inputs)
|
||||||
experim.backward(*experim_backward_inputs)
|
experiment.backward(*experim_backward_inputs)
|
||||||
|
|
||||||
control_grads = [p.grad for p in control.params]
|
control_grads = [p.grad for p in control.params]
|
||||||
experim_grads = [p.grad for p in experim.params]
|
experim_grads = [p.grad for p in experiment.params]
|
||||||
assertEqual(experim_grads, control_grads)
|
assertEqual(experim_grads, control_grads)
|
||||||
|
|
||||||
if test_args["verbose"]:
|
if test_args["verbose"]:
|
||||||
print(experim.forward.graph_for(*experim.inputs))
|
print(experiment.forward.graph_for(*experiment.inputs))
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
@ -885,7 +885,7 @@ class HungarianMatcher(nn.Module):
|
|||||||
self.cost_bbox = cost_bbox
|
self.cost_bbox = cost_bbox
|
||||||
self.cost_giou = cost_giou
|
self.cost_giou = cost_giou
|
||||||
assert cost_class != 0 or cost_bbox != 0 or cost_giou != 0, (
|
assert cost_class != 0 or cost_bbox != 0 or cost_giou != 0, (
|
||||||
"all costs cant be 0"
|
"all costs can't be 0"
|
||||||
)
|
)
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
@ -920,13 +920,13 @@ class HungarianMatcher(nn.Module):
|
|||||||
|
|
||||||
# Compute the classification cost. Contrary to the loss, we don't use the NLL,
|
# Compute the classification cost. Contrary to the loss, we don't use the NLL,
|
||||||
# but approximate it in 1 - proba[target class].
|
# but approximate it in 1 - proba[target class].
|
||||||
# The 1 is a constant that doesn't change the matching, it can be ommitted.
|
# The 1 is a constant that doesn't change the matching, it can be omitted.
|
||||||
cost_class = -out_prob[:, tgt_ids]
|
cost_class = -out_prob[:, tgt_ids]
|
||||||
|
|
||||||
# Compute the L1 cost between boxes
|
# Compute the L1 cost between boxes
|
||||||
cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)
|
cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)
|
||||||
|
|
||||||
# Compute the giou cost betwen boxes
|
# Compute the giou cost between boxes
|
||||||
cost_giou = -generalized_box_iou(
|
cost_giou = -generalized_box_iou(
|
||||||
box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox)
|
box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox)
|
||||||
)
|
)
|
||||||
|
@ -44,7 +44,7 @@ def device_sync(device):
|
|||||||
elif "cpu" in device:
|
elif "cpu" in device:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
print(f"device={device} is not yet suppported")
|
print(f"device={device} is not yet supported")
|
||||||
|
|
||||||
|
|
||||||
def get_arch_name() -> str:
|
def get_arch_name() -> str:
|
||||||
|
@ -20,7 +20,7 @@ For now we omit data preprocessing as well as result post-processing.
|
|||||||
|
|
||||||
### Running a single benchmark
|
### Running a single benchmark
|
||||||
|
|
||||||
The togglable commmand line arguments to the script are as follows:
|
The togglable command line arguments to the script are as follows:
|
||||||
- `num_iters` (default: 100): how many requests to send to the backend
|
- `num_iters` (default: 100): how many requests to send to the backend
|
||||||
excluding the first warmup request
|
excluding the first warmup request
|
||||||
- `batch_size` (default: 32): the batch size of the requests.
|
- `batch_size` (default: 32): the batch size of the requests.
|
||||||
|
@ -45,7 +45,7 @@ class FrontendWorker(mp.Process):
|
|||||||
"""
|
"""
|
||||||
This function will poll the response queue until it has received all
|
This function will poll the response queue until it has received all
|
||||||
responses. It records the startup latency, the average, max, min latency
|
responses. It records the startup latency, the average, max, min latency
|
||||||
as well as througput of requests.
|
as well as throughput of requests.
|
||||||
"""
|
"""
|
||||||
warmup_response_time = None
|
warmup_response_time = None
|
||||||
response_times = []
|
response_times = []
|
||||||
|
@ -55,7 +55,7 @@ def main(argv: list[str]) -> None:
|
|||||||
|
|
||||||
results = Runner(work_orders, cadence=30.0).run()
|
results = Runner(work_orders, cadence=30.0).run()
|
||||||
|
|
||||||
# TODO: Annotate with TypedDict when 3.8 is the minimum supported verson.
|
# TODO: Annotate with TypedDict when 3.8 is the minimum supported version.
|
||||||
grouped_results: dict[str, dict[str, list[Union[float, int]]]] = {
|
grouped_results: dict[str, dict[str, list[Union[float, int]]]] = {
|
||||||
key: {"times": [], "counts": []} for key in keys
|
key: {"times": [], "counts": []} for key in keys
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
The contents of this file are placeholders, and will be replaced by more
|
The contents of this file are placeholders, and will be replaced by more
|
||||||
expressive and robust components (e.g. better runner and result display
|
expressive and robust components (e.g. better runner and result display
|
||||||
components) in future iterations. However this allows us to excercise the
|
components) in future iterations. However this allows us to exercise the
|
||||||
underlying benchmark generation infrastructure in the mean time.
|
underlying benchmark generation infrastructure in the mean time.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -561,7 +561,7 @@ class BenchmarkRunner:
|
|||||||
output_csv_filename = self.args.output_csv
|
output_csv_filename = self.args.output_csv
|
||||||
headers = [
|
headers = [
|
||||||
"Benchmarking Framework",
|
"Benchmarking Framework",
|
||||||
"Benchamrking Module Name",
|
"Benchmarking Module Name",
|
||||||
"Case Name",
|
"Case Name",
|
||||||
"tag",
|
"tag",
|
||||||
"run_backward",
|
"run_backward",
|
||||||
|
@ -134,14 +134,14 @@ def _validate(configs):
|
|||||||
def config_list(**configs):
|
def config_list(**configs):
|
||||||
"""Generate configs based on the list of input shapes.
|
"""Generate configs based on the list of input shapes.
|
||||||
This function will take input shapes specified in a list from user. Besides
|
This function will take input shapes specified in a list from user. Besides
|
||||||
that, all other parameters will be cross producted first and each of the
|
that, all other parameters will be cross produced first and each of the
|
||||||
generated list will be merged with the input shapes list.
|
generated list will be merged with the input shapes list.
|
||||||
|
|
||||||
Reserved Args:
|
Reserved Args:
|
||||||
attr_names(reserved): a list of names for input shapes.
|
attr_names(reserved): a list of names for input shapes.
|
||||||
attrs(reserved): a list of values for each input shape.
|
attrs(reserved): a list of values for each input shape.
|
||||||
corss_product: a dictionary of attributes which will be
|
corss_product: a dictionary of attributes which will be
|
||||||
cross producted with the input shapes.
|
cross produced with the input shapes.
|
||||||
tags(reserved): a tag used to filter inputs.
|
tags(reserved): a tag used to filter inputs.
|
||||||
|
|
||||||
Here is an example:
|
Here is an example:
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
Benchmarking Framework,Benchamrking Module Name,Case Name,tag,run_backward,Execution Time
|
Benchmarking Framework,Benchmarking Module Name,Case Name,tag,run_backward,Execution Time
|
||||||
PyTorch,add,add_M1_N1_K1_cpu,short,FALSE,3.9497
|
PyTorch,add,add_M1_N1_K1_cpu,short,FALSE,3.9497
|
||||||
PyTorch,add,add_M64_N64_K64_cpu,short,FALSE,14.3181
|
PyTorch,add,add_M64_N64_K64_cpu,short,FALSE,14.3181
|
||||||
PyTorch,add,add_M64_N64_K128_cpu,short,FALSE,14.6826
|
PyTorch,add,add_M64_N64_K128_cpu,short,FALSE,14.6826
|
||||||
|
|
@ -89,7 +89,7 @@ class Benchmark:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def default_configs():
|
def default_configs():
|
||||||
"""return a list of defualt configs for this benchmark"""
|
"""return a list of default configs for this benchmark"""
|
||||||
raise ValueError("this method should be reimplemented by subclass")
|
raise ValueError("this method should be reimplemented by subclass")
|
||||||
|
|
||||||
def is_supported(self):
|
def is_supported(self):
|
||||||
|
@ -271,9 +271,9 @@ def run_single_backend_sdpa(
|
|||||||
if config.calculate_bwd_time:
|
if config.calculate_bwd_time:
|
||||||
# TODO: debug backward pass for njt
|
# TODO: debug backward pass for njt
|
||||||
if eager_sdpa and not config.attn_type == "document_mask":
|
if eager_sdpa and not config.attn_type == "document_mask":
|
||||||
dOut = torch.randn_like(out_eager.transpose(1, 2)).transpose(1, 2)
|
d_out = torch.randn_like(out_eager.transpose(1, 2)).transpose(1, 2)
|
||||||
backward_eager_time = benchmark_torch_function_in_microseconds(
|
backward_eager_time = benchmark_torch_function_in_microseconds(
|
||||||
out_eager.backward, dOut, retain_graph=True
|
out_eager.backward, d_out, retain_graph=True
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
backward_eager_time = float("nan")
|
backward_eager_time = float("nan")
|
||||||
@ -340,9 +340,9 @@ def run_single_backend_FA(
|
|||||||
|
|
||||||
if config.calculate_bwd_time:
|
if config.calculate_bwd_time:
|
||||||
if FA:
|
if FA:
|
||||||
dOut = torch.randn_like(out_FA)
|
d_out = torch.randn_like(out_FA)
|
||||||
backward_FA_time = benchmark_torch_function_in_microseconds(
|
backward_FA_time = benchmark_torch_function_in_microseconds(
|
||||||
out_FA.backward, dOut, retain_graph=True
|
out_FA.backward, d_out, retain_graph=True
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
backward_FA_time = float("nan")
|
backward_FA_time = float("nan")
|
||||||
@ -432,9 +432,9 @@ def run_single_experiment(
|
|||||||
)
|
)
|
||||||
|
|
||||||
if config.calculate_bwd_time:
|
if config.calculate_bwd_time:
|
||||||
dOut = torch.randn_like(out_compile)
|
d_out = torch.randn_like(out_compile)
|
||||||
backward_compile_time = benchmark_torch_function_in_microseconds(
|
backward_compile_time = benchmark_torch_function_in_microseconds(
|
||||||
out_compile.backward, dOut, retain_graph=True
|
out_compile.backward, d_out, retain_graph=True
|
||||||
)
|
)
|
||||||
sparsity = block_mask.sparsity() / 100.0 if block_mask is not None else 0.0
|
sparsity = block_mask.sparsity() / 100.0 if block_mask is not None else 0.0
|
||||||
sparsity = sparsity if config.attn_type != "document_mask" else 0.5
|
sparsity = sparsity if config.attn_type != "document_mask" else 0.5
|
||||||
|
@ -172,9 +172,9 @@ def run_single_experiment(config: ExperimentConfig) -> ExperimentResults:
|
|||||||
out_torch = scaled_dot_product_attention(
|
out_torch = scaled_dot_product_attention(
|
||||||
q, k, v, is_causal=is_causal, attn_mask=None
|
q, k, v, is_causal=is_causal, attn_mask=None
|
||||||
)
|
)
|
||||||
dOut = torch.randn_like(out_torch)
|
d_out = torch.randn_like(out_torch)
|
||||||
backward_time = benchmark_cuda_function_in_microseconds(
|
backward_time = benchmark_cuda_function_in_microseconds(
|
||||||
out_torch.backward, dOut, retain_graph=True
|
out_torch.backward, d_out, retain_graph=True
|
||||||
)
|
)
|
||||||
|
|
||||||
# Calculate TFLOPS for forward and backward passes
|
# Calculate TFLOPS for forward and backward passes
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
coo
|
coo
|
||||||
|
fro
|
||||||
hsa
|
hsa
|
||||||
nd
|
nd
|
||||||
optins
|
optins
|
||||||
|
Reference in New Issue
Block a user