[BE][4/16] fix typos in torch/ (torch/_dynamo/) (#156314)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/156314 Approved by: https://github.com/jingsh ghstack dependencies: #156313
2025-11-04 08:00:58 +08:00 · 2025-06-22 22:22:30 +08:00
parent 6ff6630375
commit 1b2146fc6d
30 changed files with 71 additions and 70 deletions
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@ -1172,7 +1172,6 @@ exclude_patterns = [
    'test/distributed/**',
    'torch/**',
    'torch/_*/**',
-    'torch/_dynamo/**',
    'torch/ao/**',
    'torch/fx/**',
    'torch/distributed/**',
--- a/test/dynamo/test_error_messages.py
+++ b/test/dynamo/test_error_messages.py
@ -957,7 +957,7 @@ Graph break: skip: from user code at:
 Data-dependent assertion failed (cannot compile partial graph)
  Explanation: Dynamo has determined when encountering a data-dependent assert failure that it should not compile the partial graph.
  Hint: This graph break is fundamental - it is unlikely that Dynamo will ever be able to trace through your code. Consider finding a workaround.
-  Hint: Use `torch._assert()` to raise a hard AssertionError when the check fails. This error will propagate back the user code that called the compiled function (i.e. Dynamo wil not trace any exception handling).
+  Hint: Use `torch._assert()` to raise a hard AssertionError when the check fails. This error will propagate back the user code that called the compiled function (i.e. Dynamo will not trace any exception handling).
  Hint: Remove the assert statement.
  Hint: Move the assert statement outside of any context managers in order to graph break with partial graph compilation (if fullgraph=False).

--- a/torch/_dynamo/_trace_wrapped_higher_order_op.py
+++ b/torch/_dynamo/_trace_wrapped_higher_order_op.py
@ -50,7 +50,7 @@ __all__ = ["trace_wrapped"]


 if not torch._running_with_deploy():
-    # torch.library.custom_op does not work with torch.deploy/multipy
+    # torch.library.custom_op does not work with torch.deploy/multipy  # codespell:ignore

    @torch.library.custom_op("flex_lib::zeros_and_scatter", mutates_args=())  # type: ignore[misc]
    def zeros_and_scatter(
--- a/torch/_dynamo/backends/debugging.py
+++ b/torch/_dynamo/backends/debugging.py
@ -56,7 +56,7 @@ def make_eager_backend_with_torch_function_mode(mode):


 def make_eager_backend_with_torch_function_modes(modes):
-    """Used to trace HOPs (cond and while) for eager exectution, the metadata
+    """Used to trace HOPs (cond and while) for eager execution, the metadata
    TF mode mutates vars outside of the scope of the HOP, and we can't have graph breaks
    in the HOP, so we need to externally run this mode and not trace it."""
    from contextlib import ExitStack
--- a/torch/_dynamo/bytecode_transformation.py
+++ b/torch/_dynamo/bytecode_transformation.py
@ -1649,7 +1649,7 @@ def bytecode_from_template(fn, varname_map=None, noreturn=True, noprefix=True):
            # replace returns with jumps
            for inst in returns:
                # don't replace inst with new instruction
-                # due to targetting/exn table/etc.
+                # due to targeting/exn table/etc.
                jump_inst = create_jump_absolute(insts[-1])
                inst.opname = jump_inst.opname
                inst.opcode = jump_inst.opcode
--- a/torch/_dynamo/cache_size.py
+++ b/torch/_dynamo/cache_size.py
@ -86,7 +86,7 @@ class CacheSizeRelevantForFrame:
    num_cache_entries_with_same_id_matched_objs: int = 0

    def will_compilation_exceed(self, limit: int) -> bool:
-        # Checks if a compilation will exceed the given limit (thats why >=).
+        # Checks if a compilation will exceed the given limit (that's why >=).
        return (
            self.will_compilation_exceed_accumulated_limit()
            or self.will_compilation_exceed_specific_limit(limit)
--- a/torch/_dynamo/codegen.py
+++ b/torch/_dynamo/codegen.py
@ -253,7 +253,7 @@ class PyCodegen:
            # above, export _wants to_ obtain an identity FX graph (despite it
            # appears unnecessarily expensive for `torch.compile`), so we have
            # the following option to override Dynamo's preference for codegen
-            # from source. Morever, this option applies recursively, for cases
+            # from source. Moreover, this option applies recursively, for cases
            # like input tensor being returned in a new dictionary.
            #
            # And why the `ValueMutationExisting` check? Not sure, so leaving it
--- a/torch/_dynamo/config.py
+++ b/torch/_dynamo/config.py
@ -397,7 +397,7 @@ use_numpy_random_stream = False
 # Use C++ guard manager (deprecated: always true)
 enable_cpp_guard_manager = True

-# Use C++ guard manger for symbolic shapes
+# Use C++ guard manager for symbolic shapes
 enable_cpp_symbolic_shape_guards = False

 # Enable tracing through contextlib.contextmanager
@ -418,7 +418,7 @@ inline_inbuilt_nn_modules = Config(  # type: ignore[var-annotated]

 # Install "free" tensor variables (globals, non-locals, nn module attributes)
 # as graph attributes.  This is useful for export, as it
-# produces a consitent number of inputs to the graph.
+# produces a consistent number of inputs to the graph.
 install_free_tensors = False

 # Use C++ FrameLocalsMapping (raw array view of Python frame fastlocals) (deprecated: always True)
@ -493,14 +493,14 @@ only_allow_pt2_compliant_ops = False
 # This flag is ignored and maintained for backwards compatibility.
 capture_autograd_function = True

-# This flag is ignored and maintained for backwards compatbility.
+# This flag is ignored and maintained for backwards compatibility.
 capture_func_transforms = True

 # If to log Dynamo compilation metrics into log files (for OSS) and Scuba tables (for fbcode).
 log_compilation_metrics = True

 # A set of logging functions which will be reordered to the end of graph breaks,
-# allowing dynamo to construct larget graph. Note that there are some
+# allowing dynamo to construct large graph. Note that there are some
 # limitations to this, such as how it does not correctly print objects that were
 # mutated after the print statement.
 reorderable_logging_functions: set[Callable[[Any], None]] = set()
--- a/torch/_dynamo/decorators.py
+++ b/torch/_dynamo/decorators.py
@ -707,7 +707,7 @@ def mark_static(t, index=None):

    if not isinstance(t, torch.Tensor):
        raise TypeError(
-            f"mark_static expects a tensor/nn.Module class but recieved {type(t)}"
+            f"mark_static expects a tensor/nn.Module class but received {type(t)}"
        )

    if isinstance(index, int):
@ -733,7 +733,7 @@ def mark_static_address(t, guard=True):
    Tensors marked in this way will be kept alive until `torch._dynamo.reset()` is called.
    """
    if not isinstance(t, torch.Tensor):
-        raise TypeError(f"mark_static_address expects a tensor but recieved {type(t)}")
+        raise TypeError(f"mark_static_address expects a tensor but received {type(t)}")

    if guard:
        t._dynamo_static_input_type = "guarded"  # type: ignore[attr-defined]
@ -854,7 +854,7 @@ def patch_dynamo_config(

    See _allowed_config_patches for the list of allowed config patches.

-    Arguments are the same as with torch._dynamo.confing.patch.
+    Arguments are the same as with torch._dynamo.config.patch.

    Can be used as a decorator or a context manager.

--- a/torch/_dynamo/eval_frame.py
+++ b/torch/_dynamo/eval_frame.py
@ -692,7 +692,7 @@ class _TorchDynamoContext:
                # something onto the DynamicLayerStack then we pop it off (the
                # constructed graph code isn't guarded with try/finally).
                #
-                # This used to be a context but putting a `with` here is a noticible
+                # This used to be a context but putting a `with` here is a noticeable
                # perf regression (#126293)
                saved_dynamic_layer_stack_depth = (
                    torch._C._functorch.get_dynamic_layer_stack_depth()
@ -1209,7 +1209,7 @@ class FlattenInputOutputSignature(torch.fx.Transformer):
            if i in matched_input_elements_to_fake:
                arg.node.meta["val"] = matched_input_elements_to_fake[i]
            else:
-                # Fill node.mata["val"] with faketensor from the input,
+                # Fill node.meta["val"] with faketensor from the input,
                # if it's not found in matched_input_elements_positions
                if fake_mode is not None and isinstance(flat_args[i], torch.Tensor):
                    # TODO(zhxchen17) Also preserve all the user constraints here.
@ -1852,7 +1852,7 @@ def export(
                "Failed to produce a graph during tracing as no tensor operations were found and same_signature is False."
            )
            # If the module does not contain any tensor computation, we would create a graph with inputs and outputs.
-            # To be consitant with the graph traced by dynano, `graph` will have only tensor inputs as placeholders
+            # To be consistent with the graph traced by dynano, `graph` will have only tensor inputs as placeholders
            # and tensor outputs as output nodes. non-tensor inputs and outputs will be added when rewriting signature.
            # We will also construct the `example_inputs`, `graph_captured_input`, and `graph_captured_result` corresponding
            # to `graph`.
--- a/torch/_dynamo/graph_region_tracker.py
+++ b/torch/_dynamo/graph_region_tracker.py
@ -432,7 +432,7 @@ def fully_expand_region_group(

        if add_to_all_regions:
            assert len(region_wrappers) == len(nodes_to_add), (
-                "Numer of nodes to add must equal the number of regions"
+                "Number of nodes to add must equal the number of regions"
            )
            for region_wrapper, node in zip(region_wrappers, nodes_to_add):
                region_wrapper.add(node)
--- a/torch/_dynamo/guards.py
+++ b/torch/_dynamo/guards.py
@ -555,7 +555,7 @@ class NNModuleAttrAccessorInfo:
    # Either the actual name or _parameters/_buffers/_modules
    l1_key: Optional[str] = None

-    # Actual paramter/buffer/submodule name
+    # Actual parameter/buffer/submodule name
    l2_key: Optional[str] = None


--- a/torch/_dynamo/output_graph.py
+++ b/torch/_dynamo/output_graph.py
@ -563,7 +563,7 @@ class OutputGraph(OutputGraphGuardsState):

    def install_builtins_dict_in_fglobals(self):
        # f_globals["__builtins__"] can be a dict or a module. This is an
-        # implemenation detail -
+        # implementation detail -
        # https://docs.python.org/3/library/builtins.html.

        # This makes guarding on any builtin messy because the guard check_fn
@ -2737,7 +2737,7 @@ class SubgraphTracer(fx.Tracer):
        ):
            return self.bound_symbols[example_value.node.expr]

-        # Proxys are associated with VariableTracker.
+        # Proxies are associated with VariableTracker.
        # It is possible that we've already lifted the Proxy to be an input.
        # If that is the case, just return the already lifted Proxy.
        if proxy in self.lifted_freevars:
@ -2791,7 +2791,7 @@ class SubgraphTracer(fx.Tracer):
        self, example_value, e_proxy: Union[LazyProxy, torch.fx.Proxy]
    ):
        # When binding the symbols in an exmaple_value, we bind the symbols
-        # to the proxy's associatied Tracer instead of current tracer.
+        # to the proxy's associated Tracer instead of current tracer.
        # This is because:
        # 1. We may be calling wrap_tensors during speculate_subgraph because
        # the variables are lazily realized. The proxy are top-level phs but
--- a/torch/_dynamo/package.py
+++ b/torch/_dynamo/package.py
@ -103,7 +103,7 @@ class _DynamoCodeCacheEntry:
    ingredients:
      1. The "original" code object, which serves as the entry point for eager
         execution, i.e. the code only executed when there's no cache entry hit.
-      2. The python module name this code object belongs to, for idenfifying the
+      2. The python module name this code object belongs to, for identifying the
         enclosing global scope to inject compiled and resume functions.
      3. A list of function names that pointing to this code object. There could be
         multiple function objects pointing to the same code such as recursive functions.
@ -147,7 +147,7 @@ class CompilePackage:
    end users. It has the following interface:

    1. `CompilePackage.__init__()` which optionally takes previously serialized dynamo states.
-        a. when `dynamo` argument is None, it will contruct a brand new CompilePackage object.
+        a. when `dynamo` argument is None, it will construct a brand new CompilePackage object.
        b. when `dynamo` argument is not None, it will load a pre-compiled dynamo state.
    2. `package.save()` which dumps the dynamo and backend states to a DynamoCacheEntry object.
    3. `package.install(backends) which will handle all the side-effectful global scope
--- a/torch/_dynamo/precompile_context.py
+++ b/torch/_dynamo/precompile_context.py
@ -79,7 +79,7 @@ class PrecompileContext(CacheArtifactManager):
    # are transferred to _new_cache_artifacts before serialization.
    _new_cache_artifacts_by_key: dict[str, CacheArtifact] = {}
    _new_cache_artifacts: CacheArtifactsResult = defaultdict(list)
-    # Keep a seperate seen artifacts list to make avoid unnecessary duplicates
+    # Keep a separate seen artifacts list to make avoid unnecessary duplicates
    # This list will not be cleared between serialize() calls
    _seen_artifacts: OrderedSet[CacheArtifact] = OrderedSet()
    # When serialize() is called, artifacts are transferred from _cache_artifacts to
--- a/torch/_dynamo/symbolic_convert.py
+++ b/torch/_dynamo/symbolic_convert.py
@ -273,7 +273,7 @@ SpeculationLog diverged at index {self.index} (log had {len(self.entries)} entri
 - Expected: {entry.filename}:{entry.lineno} ({entry.inst.opname} at ip={entry.instruction_pointer})
 - Actual: {filename}:{lineno} ({inst.opname} at ip={instruction_pointer})
 {prev_entry_msg}
-There are two usual reasons why this may have occured:
+There are two usual reasons why this may have occurred:
 - When Dynamo analysis restarted, the second run took a different path than
  the first.  If this occurred, the previous instruction is the critical instruction that
  behaved differently.
@ -653,7 +653,7 @@ def generic_jump(truth_fn: typing.Callable[[object], bool], push: bool):
                            *graph_break_hints.FUNDAMENTAL,
                            "Use `torch._assert()` to raise a hard AssertionError when the check fails. "
                            "This error will propagate back the user code "
-                            "that called the compiled function (i.e. Dynamo wil not trace any exception handling).",
+                            "that called the compiled function (i.e. Dynamo will not trace any exception handling).",
                            "Remove the assert statement.",
                            "Move the assert statement outside of any context managers in order to graph break with "
                            "partial graph compilation (if fullgraph=False).",
@ -1772,7 +1772,7 @@ class InstructionTranslatorBase(
    def _raise_exception_variable(self, val) -> NoReturn:
        # User can raise exception in 2 ways
        #   1) raise exception type - raise NotImplementedError
-        #   2) raise execption instance - raise NotImplemetedError("foo")
+        #   2) raise exception instance - raise NotImplemetedError("foo")

        # 1) when user raises exception type
        val = self._create_exception_type(val)
@ -1929,7 +1929,7 @@ class InstructionTranslatorBase(
                self.jump(exn_tab_entry)
            else:
                # No handler found. Bubble the exception to the parent
-                # instruction translater. We use special exception for this.
+                # instruction translator. We use special exception for this.
                self.stack.clear()
                if type(self) is InstructionTranslator:
                    unimplemented_v2(
@ -1955,7 +1955,7 @@ class InstructionTranslatorBase(
                    self.exn_vt_stack.pop()
                    if len(self.block_stack) == 0:
                        # No handler found in this frame. Bubble the exception to the parent
-                        # instruction translater.
+                        # instruction translator.
                        self.stack.clear()
                        if type(self) is InstructionTranslator:
                            unimplemented_v2(
@ -2009,7 +2009,7 @@ class InstructionTranslatorBase(
                self.jump(block_stack_entry)
            else:
                # No handler found. Bubble the exception to the parent
-                # instruction translater. We use special exception for this.
+                # instruction translator. We use special exception for this.
                self.stack.clear()
                if type(self) is InstructionTranslator:
                    unimplemented_v2(
@ -2116,7 +2116,7 @@ class InstructionTranslatorBase(
                unimplemented_v2(
                    gb_type="Caught non-Exception value",
                    context=str(exc_instance),
-                    explanation=f"Except expects to recieve an object of Exception type but received {exc_instance}.",
+                    explanation=f"Except expects to receive an object of Exception type but received {exc_instance}.",
                    hints=[*graph_break_hints.USER_ERROR],
                )

@ -4101,7 +4101,7 @@ class InliningInstructionTranslator(InstructionTranslatorBase):

 class InliningGeneratorInstructionTranslator(InliningInstructionTranslator):
    generated_items: list[VariableTracker]
-    # Flag wether or not the InlineGenerator should consume the entire iterator
+    # Flag whether or not the InlineGenerator should consume the entire iterator

    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
--- a/torch/_dynamo/trace_rules.py
+++ b/torch/_dynamo/trace_rules.py
@ -3722,7 +3722,7 @@ Let's illustrate the logic with an example:
        ......

 There are mainly three call sites of check/check_verbose:
-* The compile region entrance (like function f1), the correspoinding code is located at eval_frame.py.
+* The compile region entrance (like function f1), the corresponding code is located at eval_frame.py.
 * When tracing the recursively called functions (like function f2 and f3).
    * Dynamo decides inline/skip every time it encounters a new recursively function call, and the call site
      is in InliningInstructionTranslator.check_inlineable of symbolic_convert.py.
--- a/torch/_dynamo/types.py
+++ b/torch/_dynamo/types.py
@ -75,7 +75,7 @@ class ConvertFrameReturn:
    # default return is no compiled code (i.e. `return None`):
    # strategy is to skip non-recursively, for all future intercepted frames too

-    # eval fram execution strategy for this frame
+    # eval frame execution strategy for this frame
    frame_exec_strategy: FrameExecStrategy = dataclasses.field(
        default_factory=lambda: FrameExecStrategy(FrameAction.SKIP, FrameAction.DEFAULT)
    )
--- a/torch/_dynamo/utils.py
+++ b/torch/_dynamo/utils.py
@ -2714,7 +2714,7 @@ from torch._subclasses import UnsupportedFakeTensorException  # noqa: F401
 def get_safe_global_name(tx, root, obj):
    # The global_mangled_class_name should be different for different
    # invocations of torch.compile. Otherwise, we can run into a situation
-    # where multiple torch.compile invocations re-use the same global name,
+    # where multiple torch.compile invocations reuse the same global name,
    # but the global's lifetime is tied to the first invocation (and
    # may be deleted when the first torch.compile invocation is deleted)
    # We mangle it based off of the output_graph's id.
@ -2977,7 +2977,7 @@ def same(
                    ):
                        # In the presence of noise, noise might dominate our error
                        # metric for smaller tensors.
-                        # Similary, for 1x1 kernels, there seems to be high noise with amp.
+                        # Similarly, for 1x1 kernels, there seems to be high noise with amp.
                        multiplier = 3.0
                    return multiplier

--- a/torch/_dynamo/variables/builder.py
+++ b/torch/_dynamo/variables/builder.py
@ -713,7 +713,7 @@ class VariableBuilder:
                # 2) For non-constant objects, we also have to guard on the keys
                # (like TENSOR_MATCH on tensor). We might also have guards on
                # the attributes of the keys (like tensor.grad). To make this
-                # work in tree strucutre is complicated.
+                # work in tree structure is complicated.
                #
                # So, instead we guard on the key order. While guarding on key
                # order, we just save the indices and use it to access keys and
@ -1050,7 +1050,7 @@ class VariableBuilder:
            return ItertoolsVariable(value, source=self.source)
        elif is_torch_sym(value):
            # Note: this doesn't handle nested symints.
-            # For SymBool input, we re-use the infra for SymInt by simulating SymBool with a SymInt in dynamo.
+            # For SymBool input, we reuse the infra for SymInt by simulating SymBool with a SymInt in dynamo.

            # Concretely,
            # 1. We create a SymInt in dynamo's shape_env, whose source is constructed as ConvertIntSource(self.source).
@ -1297,7 +1297,7 @@ class VariableBuilder:
                )

                # setting is_unspecialized=False to not insert a as_tensor call in reconstruct by default
-                # seting example to be real value because these example values will be used
+                # setting example to be real value because these example values will be used
                # as example_inputs for user compiler.
                proxy.node.meta["grapharg"] = GraphArg(
                    self.source, value, False, None, False, value
@ -1342,7 +1342,7 @@ class VariableBuilder:
            )

            # setting is_unspecialized=False to not insert a as_tensor call in reconstruct by default
-            # seting example to be real value because these example values will be used
+            # setting example to be real value because these example values will be used
            # as example_inputs for user compiler.
            proxy.node.meta["grapharg"] = GraphArg(
                self.source, value, False, None, False, fake_script_obj
@ -1829,7 +1829,7 @@ class VariableBuilder:
                    ):
                        # This means that it is an integer from a NN module.
                        # Dynamo considers nn module int attributes to be static
-                        # (a good heursitic). But a user might want to mark the
+                        # (a good heuristic). But a user might want to mark the
                        # int attribute to be a symint, so track this integer
                        # for recompilation later.
                        recompile_hint = (
@ -1998,7 +1998,7 @@ class VariableBuilder:
        ):
            # A hot fix for sparse tensors + torch.compile. Support for
            # export + sparsity is being added but we need to create
-            # SPARSE_TENSOR_GUARDS for guards to work propertly.
+            # SPARSE_TENSOR_GUARDS for guards to work properly.
            unimplemented_v2(
                gb_type="Attempted to wrap sparse Tensor",
                context="",
--- a/torch/_dynamo/variables/builtin.py
+++ b/torch/_dynamo/variables/builtin.py
@ -824,7 +824,7 @@ class BuiltinVariable(VariableTracker):

        if inspect.isclass(fn) and (
            issubclass(fn, Exception)
-            # GeneratorExit doens't inherit from Exception
+            # GeneratorExit doesn't inherit from Exception
            # >>> issubclass(GeneratorExit, Exception)
            # False
            or fn is GeneratorExit
@ -2244,7 +2244,7 @@ class BuiltinVariable(VariableTracker):
                    # get_fake_val will get the same fake tensor
                    existing_fake_attr = get_fake_value(getattr_var.as_proxy().node, tx)

-                    # same tensor identiy, setattr is a no-op
+                    # same tensor identity, setattr is a no-op
                    mod_setattr = inspect.getattr_static(obj.module_type, "__setattr__")
                    if (
                        existing_fake_attr is assigning_fake_val
--- a/torch/_dynamo/variables/ctx_manager.py
+++ b/torch/_dynamo/variables/ctx_manager.py
@ -155,7 +155,7 @@ class ContextWrappingVariable(VariableTracker):

 class GenericContextWrappingVariable(UserDefinedObjectVariable):
    # Some methods in ContextWrappingVariable assumes the arguments are
-    # python contants. Which might not always be the case here.
+    # python constants. Which might not always be the case here.
    def __init__(self, cm_obj, **kwargs) -> None:
        assert cm_obj is not None
        super().__init__(
@ -197,7 +197,7 @@ class GenericContextWrappingVariable(UserDefinedObjectVariable):


 class GradInplaceRequiresGradCtxManagerVariable(ContextWrappingVariable):
-    """represents torch grad requries grad"""
+    """represents torch grad requires grad"""

    @staticmethod
    def create(tx: "InstructionTranslator", target_values, **kwargs):
--- a/torch/_dynamo/variables/dicts.py
+++ b/torch/_dynamo/variables/dicts.py
@ -144,7 +144,7 @@ class ConstDictVariable(VariableTracker):
        def __init__(self, vt) -> None:
            # We specialize SymNodes
            vt = specialize_symnode(vt)
-            # TODO Temorarily remove to figure out what keys are we breaking on
+            # TODO Temporarily remove to figure out what keys are we breaking on
            # and add proper support for them
            if not is_hashable(vt):
                raise_unhashable(vt)
@ -756,7 +756,7 @@ class DefaultDictVariable(ConstDictVariable):
 # footgun, because self method calls in dict will route back to the set
 # implementation, which is almost assuredly wrong
 class SetVariable(ConstDictVariable):
-    """We model a sets as dictonary with None values"""
+    """We model a sets as dictionary with None values"""

    def __init__(
        self,
@ -778,7 +778,7 @@ class SetVariable(ConstDictVariable):

    @staticmethod
    def _default_value():
-        # Variable to fill in he keys of the dictinary
+        # Variable to fill in he keys of the dictionary
        return ConstantVariable.create(None)

    def as_proxy(self):
--- a/torch/_dynamo/variables/distributed.py
+++ b/torch/_dynamo/variables/distributed.py
@ -49,7 +49,7 @@ class DistributedVariable(VariableTracker):
    Concrete distributed objects could inherit this class and add object
    specific logic.

-    i.e. It provides the check on the distributed package existance
+    i.e. It provides the check on the distributed package existence
    and hold the tracking value for the corresponding distributed object.
    """

@ -59,7 +59,7 @@ class DistributedVariable(VariableTracker):
            unimplemented_v2(
                gb_type="torch.distributed package is not available!",
                context="",
-                explanation="The PyTorch package doesn't include torch.distributed when builing from source.",
+                explanation="The PyTorch package doesn't include torch.distributed when building from source.",
                hints=[
                    "Set USE_DISTRIBUTED=1 to enable it when building PyTorch from source."
                ],
--- a/torch/_dynamo/variables/higher_order_ops.py
+++ b/torch/_dynamo/variables/higher_order_ops.py
@ -509,7 +509,7 @@ def _merge_graph_inputs(
        #
        # Note: ideally, dynamo should just create a single proxy for the same attribute of a nn module. But
        # true_branch and false_branch belong to two separate tracing contexts, they may register the same
-        # attribute to top level seperately. This creates two get_attr proxies for the same attribute
+        # attribute to top level separately. This creates two get_attr proxies for the same attribute
        # that have different meta data such as stack_trace (one stack trace for the true_branch,
        # and the other for false_branch). It seems better to discard the proxy explicitly in cond
        # than make dynamo create a single proxy for the same get_attr target.
@ -580,7 +580,7 @@ def _merge_graph_inputs(
                if arg in lifted_freevars:
                    old_ph = lifted_freevars[arg].node
                    old_ph.replace_all_uses_with(new_ph)
-                    # replace_all_uses_with doesn't clean users. Clean it mannually so that we could erase it.
+                    # replace_all_uses_with doesn't clean users. Clean it manually so that we could erase it.
                    old_ph.users = {}
                    graph.erase_node(old_ph)

@ -752,8 +752,8 @@ def speculate_subgraph(

                # NOTE: [HigherOrderOperator subgraph input ordering]
                # The input ordering of the higher order ops is determined by the order of
-                # the creatation of the placehoder.
-                # Mannually created inputs are created in validate_args_and_maybe_create_graph_inputs before
+                # the creation of the placeholder.
+                # Manually created inputs are created in validate_args_and_maybe_create_graph_inputs before
                # speculating subgraph.
                # During subgraph speculation, we may lift closured tensors and free symbols as inputs,
                # their ordering is determined by the time they are lifted: earlier lifted ones precede later
@ -1403,7 +1403,7 @@ class WhileLoopHigherOrderVariable(TorchHigherOrderOperatorVariable):
        )

        # Note: cond_shared and body_shared refer to the same proxy in parent graph
-        # so using either of them is OK. Use cond_shared as it doesnt matter.
+        # so using either of them is OK. Use cond_shared as it doesn't matter.
        additional_lifted_inputs = cond_shared + cond_unique + body_unique

        body_nn_modules = dict(tx.output.nn_modules)
@ -3229,7 +3229,7 @@ class AutogradFunctionApplyVariable(VariableTracker):
        # Store the invocation as a call
        from torch._functorch.autograd_function import autograd_function_apply

-        # We use speculate_subgraph to get the fwd graph, but it's alway under no grad mode like what eager mode does.
+        # We use speculate_subgraph to get the fwd graph, but it's always under no grad mode like what eager mode does.
        # The fwd outputs (tensor's example_value) need to be inferred from fake tensor prop to get the correct attributes
        # (e.g, tensor.requires_grad), which would be used by downstream Dynamo tracing.
        # Since there can be other ops like Triton kernels, which depends on python dispatcher, we have to enable it.
--- a/torch/_dynamo/variables/misc.py
+++ b/torch/_dynamo/variables/misc.py
@ -1441,7 +1441,9 @@ class NumpyVariable(VariableTracker):
                and config.use_numpy_random_stream
            ):
                msg = f"delegate '{func.__qualname__}' to NumPy itself via "
-                msg += f"confg.use_numpy_random_stream={config.use_numpy_random_stream}"
+                msg += (
+                    f"config.use_numpy_random_stream={config.use_numpy_random_stream}"
+                )
                unimplemented(msg)

            args, kwargs = NumpyNdarrayVariable.patch_args(func.__name__, args, kwargs)
--- a/torch/_dynamo/variables/nn_module.py
+++ b/torch/_dynamo/variables/nn_module.py
@ -868,7 +868,7 @@ class UnspecializedNNModuleVariable(UserDefinedObjectVariable):
        if type(value) is torch.jit._script.RecursiveScriptModule:
            raise Unsupported(
                "ScriptModules aren't supported in UnspecializedNNModuleVariable"
-                " becuase their .forward function isn't a static member of their type"
+                " because their .forward function isn't a static member of their type"
            )
        if "value_type" in kwargs:
            lazy_value_to_become = getattr(kwargs["value_type"], "cls_to_become", None)
@ -1054,7 +1054,7 @@ class UnspecializedNNModuleVariable(UserDefinedObjectVariable):
                # Record if mutations happens on parameters/buffers/modules. The
                # mutations on these are not tracked by base class
                # UserDefinedObject vt. This will be used later to graph break
-                # on seeing a paramters() and family calls.
+                # on seeing a parameters() and family calls.
                # TODO(anijain2305) - This might not be needed if we let Dynamo
                # inline both getattr and setattr. In that case, it should see
                # the lowest level dicts - _parameters and family and
@ -1130,7 +1130,7 @@ class UnspecializedNNModuleVariable(UserDefinedObjectVariable):

        # For non-empty hook dicts, one way is to just fallback to VariableTracker.build() and create a ConstDictVariable.
        # However, ConstDictVariable guards on keys. This can cause recompiles when the same hook is installed for
-        # differnt nn module instances, because the key keeps changing (look more into RemovableHandle to understand why
+        # different nn module instances, because the key keeps changing (look more into RemovableHandle to understand why
        # key changes - also related https://github.com/pytorch/pytorch/issues/125836). Here, we carefully craft a
        # NNModuleHooksDictVariable (a subclass of ConstDictVariable) to avoid any guard on the keys.
        if (
--- a/torch/_dynamo/variables/torch.py
+++ b/torch/_dynamo/variables/torch.py
@ -1264,7 +1264,7 @@ If the above doesn't work, please subtmit an issue to GitHub.
            # Guard against inplace view op on input tensor (not supported)
            if args and isinstance(args[0], variables.TensorVariable):
                tensor_var = args[0]
-                # Check if input tensor and inplace_view op specifcally
+                # Check if input tensor and inplace_view op specifically
                if tensor_var.source is not None and hasattr(torch.ops.aten, name):
                    fn = getattr(torch.ops.aten, name)
                    if (
@ -1528,7 +1528,7 @@ Either create the tensor outside the compiled region, or do not set the tensor t
        # Alternate version if we have a .source
        varname = tx.output.new_var()

-        # construct the nn.Parmeter before the graph save it to varname
+        # construct the nn.Parameter before the graph save it to varname
        cg = PyCodegen(tx)
        cg.add_push_null(lambda: cg.load_import_from("torch.nn", "Parameter"))
        cg(data.source)
--- a/torch/_dynamo/variables/torch_function.py
+++ b/torch/_dynamo/variables/torch_function.py
@ -368,7 +368,7 @@ class TorchFunctionModeVariable(GenericContextWrappingVariable):
        # We are able to trace custom modes but if there are graph breaks under them
        # and they have a custom __enter__/__exit__ we don't handle this for the
        # same reason we don't handle generic context managers: there may be side effects
-        # that are now affected by executing the funtion across two frames instead of one
+        # that are now affected by executing the function across two frames instead of one
        # Today we support the enter/exit of the default TorchFunctionMode as well as
        # DeviceContext (which is used for set_default_device)
        return issubclass(ty, (NoEnterTorchFunctionMode, DeviceContext)) or (
--- a/torch/_dynamo/variables/user_defined.py
+++ b/torch/_dynamo/variables/user_defined.py
@ -791,7 +791,7 @@ class UserDefinedObjectVariable(UserDefinedVariable):
        self.base_cls_vt = base_cls_vt
        self.init_args = init_args

-        # This records names of the attributes that were modifed via instance
+        # This records names of the attributes that were modified via instance
        # `__dict__` directly, rather than the normal setattr path.
        #
        # TODO consider emulating `obj.__dict__` as a `ConstDictVariable` to get