[BE] fix typos in benchmarks/ (#156077)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/156077 Approved by: https://github.com/Skylion007, https://github.com/malfet ghstack dependencies: #156069
2025-10-20 12:54:11 +08:00 · 2025-06-17 09:54:39 +08:00
parent 0a0023d984
commit 42015db6a9
22 changed files with 97 additions and 97 deletions
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@ -1159,7 +1159,6 @@ exclude_patterns = [
    # These files are all grandfathered in, feel free to remove from this list
    # as necessary
    'aten/**',
    'benchmarks/**',
    'c10/**',
    'cmake/**',
    'docs/**',
--- a/benchmarks/dynamo/benchmarks.py
+++ b/benchmarks/dynamo/benchmarks.py
@ -6,7 +6,7 @@ import sys
 # Note - hf and timm have their own version of this, torchbench does not
-# TOOD(voz): Someday, consolidate all the files into one runner instead of a shim like this...
+# TODO(voz): Someday, consolidate all the files into one runner instead of a shim like this...
 def model_names(filename: str) -> set[str]:
    names = set()
    with open(filename) as fh:
--- a/benchmarks/dynamo/ci_expected_accuracy/update_expected.py
+++ b/benchmarks/dynamo/ci_expected_accuracy/update_expected.py
@ -1,5 +1,5 @@
 """
-Update commited CSV files used as reference points by dynamo/inductor CI.
+Update committed CSV files used as reference points by dynamo/inductor CI.
 Currently only cares about graph breaks, so only saves those columns.
--- a/benchmarks/dynamo/common.py
+++ b/benchmarks/dynamo/common.py
@ -67,7 +67,7 @@ try:
    import torch_xla
    import torch_xla.core.xla_model as xm
-    # This is to woraround the backward issue https://github.com/pytorch/xla/issues/4174
+    # This is to workaround the backward issue https://github.com/pytorch/xla/issues/4174
    torch_xla._XLAC._init_computation_client()
 except ImportError:
    # ignore the error if torch_xla is not installed
@ -270,7 +270,7 @@ DO_NOT_CAST_INPUTS = {"stable_diffusion"}
 # Maps a benchmark model name to a list of status codes. For any listed entry, we'll
-# capture TORCH_COMPILE_DEBUG logs in CI runs and preseve them (i.e., for upload) if
+# capture TORCH_COMPILE_DEBUG logs in CI runs and preserve them (i.e., for upload) if
 # the result status matches one listed.
 CI_PRESERVE_COMPILE_DEBUG = {
    # For example:
@ -1074,7 +1074,7 @@ def speedup_experiment(args, model_iter_fn, model, example_inputs, **kwargs):
    times = args.iterations_per_run
-    # Use higher tolerance for XLA since XLA cause numerical unstability when
+    # Use higher tolerance for XLA since XLA cause numerical instability when
    # graph size changes
    tolerance = args.xla_tolerance if args.trace_on_xla else 1e-4
    torch._dynamo.config.repro_tolerance = tolerance
@ -1680,7 +1680,7 @@ class BenchmarkRunner:
        devices = [current_device] if current_device else self.args.devices
        if self.args.amp:
-            # AMP training can lead to small loss values which can undeflow
+            # AMP training can lead to small loss values which can underflow
            # gradient values returning in zero gradients. To solve this
            # problem, PyTorch introduces GradScaler. GradScaler is a stateful
            # structure, that scales the loss values to prevent underflow. Loss
@ -1718,7 +1718,7 @@ class BenchmarkRunner:
                self.optimizer = torch.optim.SGD(params, lr=0.01, foreach=True)
                # Disable multi_tensor_sgd for benchmarking, there isn't a large performance benefit (~1%) to compiling
                # this optimizer because it is a single foreach add, and increases compile time.
-                # After autotuning and fake tensor caching lands, we can enable, becuase the compile time impact will be lower.
+                # After autotuning and fake tensor caching lands, we can enable, because the compile time impact will be lower.
                # Fake Tensor caching: https://github.com/pytorch/pytorch/pull/113873
                # Autotuning: https://github.com/pytorch/pytorch/issues/117447
                self.optimizer.step = torch._dynamo.disable(self.optimizer.step)
@ -2823,7 +2823,7 @@ class BenchmarkRunner:
                )
                # NB: Don't upload them to the benchmark database as they are debugging
-                # infomation. There are also around a million records a day which is
+                # information. There are also around a million records a day which is
                # wasteful to store
                write_outputs(
                    filename,
@ -2881,7 +2881,7 @@ def parse_args(args=None):
    iterations_per_run_help = """
        Run this may iterations for each time measurement. This is mainly used for
        XLA training. We want to run multiple iterations per measurement so the
-        tracing and computation for different iteartions can overlap with each
+        tracing and computation for different iterations can overlap with each
        other. This makes sure we have an accurate xla baseline.
    """
    parser.add_argument(
@ -3040,7 +3040,7 @@ def parse_args(args=None):
    parser.add_argument(
        "--generate-aot-autograd-stats",
        action="store_true",
-        help="Generates AOT Autograd stats like how mnay graphs are sent to AOT",
+        help="Generates AOT Autograd stats like how many graphs are sent to AOT",
    )
    parser.add_argument(
        "--inductor-settings",
@ -3261,7 +3261,7 @@ def parse_args(args=None):
        "--warm-start-latency",
        "--warm_start_latency",
        action="store_true",
-        help="Run model(s) twice and preseve caches in between to enable a 'warm start' on the 2nd run",
+        help="Run model(s) twice and preserve caches in between to enable a 'warm start' on the 2nd run",
    )
    group_fuser = parser.add_mutually_exclusive_group()
@ -3610,7 +3610,7 @@ def run(runner, args, original_dir=None):
        torch.backends.mkldnn.deterministic = True
-        # Remove randomeness when torch manual seed is called
+        # Remove randomness when torch manual seed is called
        patch_torch_manual_seed()
        # Some models e.g. yolov3 assert batch size on n_gpus
--- a/benchmarks/dynamo/microbenchmarks/operator_inp_utils.py
+++ b/benchmarks/dynamo/microbenchmarks/operator_inp_utils.py
@ -274,7 +274,7 @@ class OperatorInputsLoader:
            yield
            return
-        # line[1] represents number of times these inputs occured, ignored for now
+        # line[1] represents number of times these inputs occurred, ignored for now
        for line in self.operator_db[str(operator)].items():
            inps = line[0]
--- a/benchmarks/dynamo/runner.py
+++ b/benchmarks/dynamo/runner.py
@ -269,7 +269,7 @@ def parse_args():
        "--no-graphs",
        action="store_true",
        default=False,
-        help="Do not genenerate and upload metric graphs",
+        help="Do not generate and upload metric graphs",
    )
    parser.add_argument(
        "--no-update-archive",
@ -368,7 +368,7 @@ def get_mode(args):
 def get_skip_tests(suite, device, is_training: bool):
    """
-    Generate -x seperated string to skip the unusual setup training tests
+    Generate -x separated string to skip the unusual setup training tests
    """
    skip_tests = set()
    original_dir = abspath(os.getcwd())
@ -1359,7 +1359,7 @@ class DashboardUpdater:
        dtype = self.args.dtypes[0]
        day, _ = archive_data(self.args.archive_name)
        target_dir = get_archive_name(self.args, dtype)
-        # Update lookup csv the folder to arhived logs
+        # Update lookup csv the folder to archived logs
        subprocess.check_call(
            f'echo "{day},performance,{dtype},{target_dir}" >> {self.lookup_file}',
            shell=True,
@ -1418,7 +1418,7 @@ class DashboardUpdater:
    def comment_on_gh(self, comment):
        """
-        Send a commment to dashboard
+        Send a comment to dashboard
        """
        with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
            f.write(comment)
--- a/benchmarks/dynamo/torchbench.yaml
+++ b/benchmarks/dynamo/torchbench.yaml
@ -229,7 +229,7 @@ skip:
      - doctr_det_predictor
      - doctr_reco_predictor
      - moondream
-      # doesnt fit in memory
+      # doesn't fit in memory
      - phi_1_5
      - detectron2_fcos_r_50_fpn
--- a/benchmarks/fastrnns/factory.py
+++ b/benchmarks/fastrnns/factory.py
@ -225,7 +225,7 @@ def varlen_lstm_inputs(
        return x, lengths, (hx, cx), lstm.all_weights, lstm
    else:
        # NB: lstm.all_weights format:
-        # wih, whh, bih, bhh = lstm.all_weights[layer]
+        # w_ih, w_hh, b_ih, b_hh = lstm.all_weights[layer]
        return x, lengths, (hx, cx), lstm.all_weights, None
@ -266,10 +266,10 @@ def varlen_lstm_factory(cell, script):
    def dynamic_rnn(
        sequences: list[Tensor],
        hiddens: tuple[Tensor, Tensor],
-        wih: Tensor,
+        w_ih: Tensor,
-        whh: Tensor,
+        w_hh: Tensor,
-        bih: Tensor,
+        b_ih: Tensor,
-        bhh: Tensor,
+        b_hh: Tensor,
    ) -> tuple[list[Tensor], tuple[list[Tensor], list[Tensor]]]:
        hx, cx = hiddens
        hxs = hx.unbind(1)
@ -286,7 +286,7 @@ def varlen_lstm_factory(cell, script):
            for seq_idx in range(len(inputs)):
                hy, cy = cell(
-                    inputs[seq_idx].unsqueeze(0), (hy, cy), wih, whh, bih, bhh
+                    inputs[seq_idx].unsqueeze(0), (hy, cy), w_ih, w_hh, b_ih, b_hh
                )
                output += [hy]
            outputs += [torch.stack(output)]
@ -315,7 +315,7 @@ def varlen_lstm_creator(script=False, **kwargs):
 # cudnn_layernorm_lstm: since cudnn does not have Layernorm LSTM, we cannot benchmark
-# the lowerbound directly. Instead, we only benchmark the forward pass by mimicing the
+# the lowerbound directly. Instead, we only benchmark the forward pass by mimicking the
 # computation of a cudnn lstm + seq_len * 3 layernorm computation. This should serve
 # as a perf lowerbound for the Layernorm LSTM forward pass(given that Layernorm itself
 # is invariant), the lowerbound of backward pass is hard to get since we lose the
@ -352,12 +352,12 @@ def layernorm_pytorch_lstm_creator(**kwargs):
    )
-# input: lstm.all_weights format (wih, whh, bih, bhh = lstm.all_weights[layer])
+# input: lstm.all_weights format (w_ih, w_hh, b_ih, b_hh = lstm.all_weights[layer])
 # output: packed_weights with format
-# packed_weights[0] is wih with size (layer, 4*hiddenSize, inputSize)
+# packed_weights[0] is w_ih with size (layer, 4*hiddenSize, inputSize)
-# packed_weights[1] is whh with size (layer, 4*hiddenSize, hiddenSize)
+# packed_weights[1] is w_hh with size (layer, 4*hiddenSize, hiddenSize)
-# packed_weights[2] is bih with size (layer, 4*hiddenSize)
+# packed_weights[2] is b_ih with size (layer, 4*hiddenSize)
-# packed_weights[3] is bhh with size (layer, 4*hiddenSize)
+# packed_weights[3] is b_hh with size (layer, 4*hiddenSize)
 def stack_weights(weights):
    def unzip_columns(mat):
        assert isinstance(mat, list)
@ -398,7 +398,7 @@ def lstm_inputs(
        return x, (hx, cx), lstm.all_weights, lstm
    else:
        # NB: lstm.all_weights format:
-        # wih, whh, bih, bhh = lstm.all_weights[layer]
+        # w_ih, w_hh, b_ih, b_hh = lstm.all_weights[layer]
        return x, (hx, cx), lstm.all_weights, None
@ -406,17 +406,17 @@ def lstm_factory(cell, script):
    def dynamic_rnn(
        input: Tensor,
        hidden: tuple[Tensor, Tensor],
-        wih: Tensor,
+        w_ih: Tensor,
-        whh: Tensor,
+        w_hh: Tensor,
-        bih: Tensor,
+        b_ih: Tensor,
-        bhh: Tensor,
+        b_hh: Tensor,
    ) -> tuple[Tensor, tuple[Tensor, Tensor]]:
        hx, cx = hidden
        outputs = []
        inputs = input.unbind(0)
        hy, cy = hx[0], cx[0]
        for seq_idx in range(len(inputs)):
-            hy, cy = cell(inputs[seq_idx], (hy, cy), wih, whh, bih, bhh)
+            hy, cy = cell(inputs[seq_idx], (hy, cy), w_ih, w_hh, b_ih, b_hh)
            outputs += [hy]
        return torch.stack(outputs), (hy.unsqueeze(0), cy.unsqueeze(0))
@ -432,17 +432,17 @@ def lstm_factory_premul(premul_cell, script):
    def dynamic_rnn(
        input: Tensor,
        hidden: tuple[Tensor, Tensor],
-        wih: Tensor,
+        w_ih: Tensor,
-        whh: Tensor,
+        w_hh: Tensor,
-        bih: Tensor,
+        b_ih: Tensor,
-        bhh: Tensor,
+        b_hh: Tensor,
    ) -> tuple[Tensor, tuple[Tensor, Tensor]]:
        hx, cx = hidden
        outputs = []
-        inputs = torch.matmul(input, wih.t()).unbind(0)
+        inputs = torch.matmul(input, w_ih.t()).unbind(0)
        hy, cy = hx[0], cx[0]
        for seq_idx in range(len(inputs)):
-            hy, cy = premul_cell(inputs[seq_idx], (hy, cy), whh, bih, bhh)
+            hy, cy = premul_cell(inputs[seq_idx], (hy, cy), w_hh, b_ih, b_hh)
            outputs += [hy]
        return torch.stack(outputs), (hy.unsqueeze(0), cy.unsqueeze(0))
@ -458,10 +458,10 @@ def lstm_factory_premul_bias(premul_cell, script):
    def dynamic_rnn(
        input: Tensor,
        hidden: tuple[Tensor, Tensor],
-        wih: Tensor,
+        w_ih: Tensor,
-        whh: Tensor,
+        w_hh: Tensor,
-        bih: Tensor,
+        b_ih: Tensor,
-        bhh: Tensor,
+        b_hh: Tensor,
    ) -> tuple[Tensor, tuple[Tensor, Tensor]]:
        hx, cx = hidden
        outputs = []
@ -470,11 +470,11 @@ def lstm_factory_premul_bias(premul_cell, script):
        # FIXME matmul(x,y) + bias currently goes through jit AD, and backward formula in AD is not optimized for this
        # case. Workaround with mm and views.
        inpSize = input.size()
-        inputs = torch.mm(input.view(-1, inpSize[2]), wih.t()) + bih
+        inputs = torch.mm(input.view(-1, inpSize[2]), w_ih.t()) + b_ih
        inputs = inputs.view(inpSize[0], inpSize[1], -1).unbind(0)
        hy, cy = hx[0], cx[0]
        for seq_idx in range(len(inputs)):
-            hy, cy = premul_cell(inputs[seq_idx], (hy, cy), whh, bhh)
+            hy, cy = premul_cell(inputs[seq_idx], (hy, cy), w_hh, b_hh)
            outputs += [hy]
        return torch.stack(outputs), (hy.unsqueeze(0), cy.unsqueeze(0))
@ -488,12 +488,12 @@ def lstm_factory_premul_bias(premul_cell, script):
 # simple: flat inputs (no tuples), no list to accumulate outputs
 #         useful mostly for benchmarking older JIT versions
 def lstm_factory_simple(cell, script):
-    def dynamic_rnn(input, hx, cx, wih, whh, bih, bhh):
+    def dynamic_rnn(input, hx, cx, w_ih, w_hh, b_ih, b_hh):
        hy = hx  # for scoping
        cy = cx  # for scoping
        inputs = input.unbind(0)
        for seq_idx in range(len(inputs)):
-            hy, cy = cell(inputs[seq_idx], hy, cy, wih, whh, bih, bhh)
+            hy, cy = cell(inputs[seq_idx], hy, cy, w_ih, w_hh, b_ih, b_hh)
        return hy, cy
    if script:
@ -515,12 +515,12 @@ def lstm_factory_multilayer(cell, script):
            hy = hx[layer]
            cy = cx[layer]
            base_idx = layer * params_stride
-            wih = params[base_idx]
+            w_ih = params[base_idx]
-            whh = params[base_idx + 1]
+            w_hh = params[base_idx + 1]
-            bih = params[base_idx + 2]
+            b_ih = params[base_idx + 2]
-            bhh = params[base_idx + 3]
+            b_hh = params[base_idx + 3]
            for seq_idx in range(len(inputs)):
-                hy, cy = cell(inputs[seq_idx], (hy, cy), wih, whh, bih, bhh)
+                hy, cy = cell(inputs[seq_idx], (hy, cy), w_ih, w_hh, b_ih, b_hh)
                outputs += [hy]
            inputs, outputs = outputs, []
        return torch.stack(inputs), (hy.unsqueeze(0), cy.unsqueeze(0))
--- a/benchmarks/fastrnns/test.py
+++ b/benchmarks/fastrnns/test.py
@ -51,34 +51,34 @@ def test_rnns(
    print("Setting up...")
    control = control_creator(**creator_args)
-    experim = experim_creator(**creator_args)
+    experiment = experim_creator(**creator_args)
    # Precondition
-    assertEqual(experim.inputs, control.inputs)
+    assertEqual(experiment.inputs, control.inputs)
-    assertEqual(experim.params, control.params)
+    assertEqual(experiment.params, control.params)
    print("Checking outputs...")
    control_outputs = control.forward(*control.inputs)
-    experim_outputs = experim.forward(*experim.inputs)
+    experim_outputs = experiment.forward(*experiment.inputs)
    assertEqual(experim_outputs, control_outputs)
    print("Checking grads...")
    assert control.backward_setup is not None
-    assert experim.backward_setup is not None
+    assert experiment.backward_setup is not None
    assert control.backward is not None
-    assert experim.backward is not None
+    assert experiment.backward is not None
    control_backward_inputs = control.backward_setup(control_outputs, seed)
-    experim_backward_inputs = experim.backward_setup(experim_outputs, seed)
+    experim_backward_inputs = experiment.backward_setup(experim_outputs, seed)
    control.backward(*control_backward_inputs)
-    experim.backward(*experim_backward_inputs)
+    experiment.backward(*experim_backward_inputs)
    control_grads = [p.grad for p in control.params]
-    experim_grads = [p.grad for p in experim.params]
+    experim_grads = [p.grad for p in experiment.params]
    assertEqual(experim_grads, control_grads)
    if verbose:
-        print(experim.forward.graph_for(*experim.inputs))
+        print(experiment.forward.graph_for(*experiment.inputs))
    print()
@ -103,16 +103,16 @@ def test_vl_py(**test_args):
        print("Setting up...")
        control = control_creator(**creator_args)
-        experim = experim_creator(**creator_args)
+        experiment = experim_creator(**creator_args)
        # Precondition
-        assertEqual(experim.inputs, control.inputs[:2])
+        assertEqual(experiment.inputs, control.inputs[:2])
-        assertEqual(experim.params, control.params)
+        assertEqual(experiment.params, control.params)
        print("Checking outputs...")
        control_out, control_hiddens = control.forward(*control.inputs)
        control_hx, control_cx = control_hiddens
-        experim_out, experim_hiddens = experim.forward(*experim.inputs)
+        experim_out, experim_hiddens = experiment.forward(*experiment.inputs)
        experim_hx, experim_cx = experim_hiddens
        experim_padded = nn.utils.rnn.pad_sequence(experim_out).squeeze(-2)
@ -122,25 +122,25 @@ def test_vl_py(**test_args):
        print("Checking grads...")
        assert control.backward_setup is not None
-        assert experim.backward_setup is not None
+        assert experiment.backward_setup is not None
        assert control.backward is not None
-        assert experim.backward is not None
+        assert experiment.backward is not None
        control_backward_inputs = control.backward_setup(
            (control_out, control_hiddens), test_args["seed"]
        )
-        experim_backward_inputs = experim.backward_setup(
+        experim_backward_inputs = experiment.backward_setup(
            (experim_out, experim_hiddens), test_args["seed"]
        )
        control.backward(*control_backward_inputs)
-        experim.backward(*experim_backward_inputs)
+        experiment.backward(*experim_backward_inputs)
        control_grads = [p.grad for p in control.params]
-        experim_grads = [p.grad for p in experim.params]
+        experim_grads = [p.grad for p in experiment.params]
        assertEqual(experim_grads, control_grads)
        if test_args["verbose"]:
-            print(experim.forward.graph_for(*experim.inputs))
+            print(experiment.forward.graph_for(*experiment.inputs))
        print()
--- a/benchmarks/functional_autograd_benchmark/torchvision_models.py
+++ b/benchmarks/functional_autograd_benchmark/torchvision_models.py
@ -885,7 +885,7 @@ class HungarianMatcher(nn.Module):
        self.cost_bbox = cost_bbox
        self.cost_giou = cost_giou
        assert cost_class != 0 or cost_bbox != 0 or cost_giou != 0, (
-            "all costs cant be 0"
+            "all costs can't be 0"
        )
    @torch.no_grad()
@ -920,13 +920,13 @@ class HungarianMatcher(nn.Module):
        # Compute the classification cost. Contrary to the loss, we don't use the NLL,
        # but approximate it in 1 - proba[target class].
-        # The 1 is a constant that doesn't change the matching, it can be ommitted.
+        # The 1 is a constant that doesn't change the matching, it can be omitted.
        cost_class = -out_prob[:, tgt_ids]
        # Compute the L1 cost between boxes
        cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)
-        # Compute the giou cost betwen boxes
+        # Compute the giou cost between boxes
        cost_giou = -generalized_box_iou(
            box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox)
        )
--- a/benchmarks/gpt_fast/generate.py
+++ b/benchmarks/gpt_fast/generate.py
@ -44,7 +44,7 @@ def device_sync(device):
    elif "cpu" in device:
        pass
    else:
-        print(f"device={device} is not yet suppported")
+        print(f"device={device} is not yet supported")
 def get_arch_name() -> str:
--- a/benchmarks/inference/README.md
+++ b/benchmarks/inference/README.md
@ -20,7 +20,7 @@ For now we omit data preprocessing as well as result post-processing.
 ### Running a single benchmark
-The togglable commmand line arguments to the script are as follows:
+The togglable command line arguments to the script are as follows:
  - `num_iters` (default: 100): how many requests to send to the backend
    excluding the first warmup request
  - `batch_size` (default: 32): the batch size of the requests.
--- a/benchmarks/inference/server.py
+++ b/benchmarks/inference/server.py
@ -45,7 +45,7 @@ class FrontendWorker(mp.Process):
        """
        This function will poll the response queue until it has received all
        responses. It records the startup latency, the average, max, min latency
-        as well as througput of requests.
+        as well as throughput of requests.
        """
        warmup_response_time = None
        response_times = []
--- a/benchmarks/instruction_counts/applications/ci.py
+++ b/benchmarks/instruction_counts/applications/ci.py
@ -55,7 +55,7 @@ def main(argv: list[str]) -> None:
    results = Runner(work_orders, cadence=30.0).run()
-    # TODO: Annotate with TypedDict when 3.8 is the minimum supported verson.
+    # TODO: Annotate with TypedDict when 3.8 is the minimum supported version.
    grouped_results: dict[str, dict[str, list[Union[float, int]]]] = {
        key: {"times": [], "counts": []} for key in keys
    }
--- a/benchmarks/instruction_counts/main.py
+++ b/benchmarks/instruction_counts/main.py
@ -2,7 +2,7 @@
 The contents of this file are placeholders, and will be replaced by more
 expressive and robust components (e.g. better runner and result display
-components) in future iterations. However this allows us to excercise the
+components) in future iterations. However this allows us to exercise the
 underlying benchmark generation infrastructure in the mean time.
 """
--- a/benchmarks/operator_benchmark/benchmark_core.py
+++ b/benchmarks/operator_benchmark/benchmark_core.py
@ -561,7 +561,7 @@ class BenchmarkRunner:
        output_csv_filename = self.args.output_csv
        headers = [
            "Benchmarking Framework",
-            "Benchamrking Module Name",
+            "Benchmarking Module Name",
            "Case Name",
            "tag",
            "run_backward",
--- a/benchmarks/operator_benchmark/benchmark_utils.py
+++ b/benchmarks/operator_benchmark/benchmark_utils.py
@ -134,14 +134,14 @@ def _validate(configs):
 def config_list(**configs):
    """Generate configs based on the list of input shapes.
    This function will take input shapes specified in a list from user. Besides
-    that, all other parameters will be cross producted first and each of the
+    that, all other parameters will be cross produced first and each of the
    generated list will be merged with the input shapes list.
    Reserved Args:
        attr_names(reserved): a list of names for input shapes.
        attrs(reserved): a list of values for each input shape.
        corss_product: a dictionary of attributes which will be
-                       cross producted with the input shapes.
+                       cross produced with the input shapes.
        tags(reserved): a tag used to filter inputs.
    Here is an example:
--- a/benchmarks/operator_benchmark/expected_ci_operator_benchmark_eager_float32_cpu.csv
+++ b/benchmarks/operator_benchmark/expected_ci_operator_benchmark_eager_float32_cpu.csv
@ -1,4 +1,4 @@
-Benchmarking Framework,Benchamrking Module Name,Case Name,tag,run_backward,Execution Time
+Benchmarking Framework,Benchmarking Module Name,Case Name,tag,run_backward,Execution Time
 PyTorch,add,add_M1_N1_K1_cpu,short,FALSE,3.9497
 PyTorch,add,add_M64_N64_K64_cpu,short,FALSE,14.3181
 PyTorch,add,add_M64_N64_K128_cpu,short,FALSE,14.6826
--- a/benchmarks/tensorexpr/benchmark.py
+++ b/benchmarks/tensorexpr/benchmark.py
@ -89,7 +89,7 @@ class Benchmark:
    @staticmethod
    def default_configs():
-        """return a list of defualt configs for this benchmark"""
+        """return a list of default configs for this benchmark"""
        raise ValueError("this method should be reimplemented by subclass")
    def is_supported(self):
--- a/benchmarks/transformer/score_mod.py
+++ b/benchmarks/transformer/score_mod.py
@ -271,9 +271,9 @@ def run_single_backend_sdpa(
        if config.calculate_bwd_time:
            # TODO: debug backward pass for njt
            if eager_sdpa and not config.attn_type == "document_mask":
-                dOut = torch.randn_like(out_eager.transpose(1, 2)).transpose(1, 2)
+                d_out = torch.randn_like(out_eager.transpose(1, 2)).transpose(1, 2)
                backward_eager_time = benchmark_torch_function_in_microseconds(
-                    out_eager.backward, dOut, retain_graph=True
+                    out_eager.backward, d_out, retain_graph=True
                )
            else:
                backward_eager_time = float("nan")
@ -340,9 +340,9 @@ def run_single_backend_FA(
    if config.calculate_bwd_time:
        if FA:
-            dOut = torch.randn_like(out_FA)
+            d_out = torch.randn_like(out_FA)
            backward_FA_time = benchmark_torch_function_in_microseconds(
-                out_FA.backward, dOut, retain_graph=True
+                out_FA.backward, d_out, retain_graph=True
            )
        else:
            backward_FA_time = float("nan")
@ -432,9 +432,9 @@ def run_single_experiment(
            )
    if config.calculate_bwd_time:
-        dOut = torch.randn_like(out_compile)
+        d_out = torch.randn_like(out_compile)
        backward_compile_time = benchmark_torch_function_in_microseconds(
-            out_compile.backward, dOut, retain_graph=True
+            out_compile.backward, d_out, retain_graph=True
        )
    sparsity = block_mask.sparsity() / 100.0 if block_mask is not None else 0.0
    sparsity = sparsity if config.attn_type != "document_mask" else 0.5
--- a/benchmarks/transformer/sdpa.py
+++ b/benchmarks/transformer/sdpa.py
@ -172,9 +172,9 @@ def run_single_experiment(config: ExperimentConfig) -> ExperimentResults:
        out_torch = scaled_dot_product_attention(
            q, k, v, is_causal=is_causal, attn_mask=None
        )
-        dOut = torch.randn_like(out_torch)
+        d_out = torch.randn_like(out_torch)
        backward_time = benchmark_cuda_function_in_microseconds(
-            out_torch.backward, dOut, retain_graph=True
+            out_torch.backward, d_out, retain_graph=True
        )
    # Calculate TFLOPS for forward and backward passes
--- a/tools/linter/dictionary.txt
+++ b/tools/linter/dictionary.txt
@ -1,4 +1,5 @@
 coo
 fro
 hsa
 nd
 optins