[BE][8/16] fix typos in torch/ (torch/csrc/jit/) (#156318)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/156318 Approved by: https://github.com/albanD
2025-10-20 21:14:14 +08:00 · 2025-07-03 02:11:53 +08:00
parent c0e155a8d2
commit 541584d22e
45 changed files with 76 additions and 77 deletions
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@ -1177,7 +1177,6 @@ exclude_patterns = [
    'torch/distributed/tensor/**',
    'torch/[j-o]*/**',
    'torch/utils/**',
-    'torch/csrc/jit/**',
    'torch/csrc/jit/[a-o]*/**',
 ]
 init_command = [
--- a/tools/linter/dictionary.txt
+++ b/tools/linter/dictionary.txt
@ -16,7 +16,7 @@ nin
 nout
 NowNs
 optins
-OT
+ot
 overrideable
 padD
 ptd
--- a/torch/csrc/jit/OVERVIEW.md
+++ b/torch/csrc/jit/OVERVIEW.md
@ -958,7 +958,7 @@ torch._C._jit_set_fusion_strategy([
 ])
 ```

-This will make two attempts to generate static-shape graphs, and after that fall back to generating dynamic-shape graphs. If for some reason compilation keeps occuring (even with dynamic-shape graphs - e.g. this could happen if ranks or dtypes vary), after 20 compilation attempts the graph executor will fall back to running the graph without any attempts to compile it.
+This will make two attempts to generate static-shape graphs, and after that fall back to generating dynamic-shape graphs. If for some reason compilation keeps occurring (even with dynamic-shape graphs - e.g. this could happen if ranks or dtypes vary), after 20 compilation attempts the graph executor will fall back to running the graph without any attempts to compile it.

 ### Pre-derivative Optimization ###

--- a/torch/csrc/jit/README.md
+++ b/torch/csrc/jit/README.md
@ -26,5 +26,5 @@ A brief summary of the source tree:
 **Refer** to each folder for more in-depth documentation.

 Other relevant parts of the codebase not contained here:
- [aten/src/ATen/core](../../../aten/src/ATen/core): contains JIT code re-used by other elements of the
+- [aten/src/ATen/core](../../../aten/src/ATen/core): contains JIT code reused by other elements of the
  runtime system (eager, mobile, etc.)
--- a/torch/csrc/jit/passes/batch_mm.cpp
+++ b/torch/csrc/jit/passes/batch_mm.cpp
@ -319,7 +319,7 @@ static void BatchMMTreeReduce(Block* block, AliasDb& alias_db) {
 }

 static bool shape_is_fast_for_side(const at::Tensor& other_side_input) {
-  // Cutoff chosed by benchmarking on a TITAN V
+  // Cutoff chose by benchmarking on a TITAN V
  return other_side_input.numel() <= 1024 * 2048;
 }

--- a/torch/csrc/jit/passes/canonicalize.cpp
+++ b/torch/csrc/jit/passes/canonicalize.cpp
@ -96,7 +96,7 @@ static bool isBefore(Node* n1, Node* n2) {
    }
  }

-  // Now they are the same numer of blocks from the graph block,
+  // Now they are the same number of blocks from the graph block,
  // recurse upwards, checking if they are on the same block
  while (true) {
    if (n1->owningBlock() == n2->owningBlock()) {
--- a/torch/csrc/jit/passes/frozen_ops_to_mkldnn.cpp
+++ b/torch/csrc/jit/passes/frozen_ops_to_mkldnn.cpp
@ -98,7 +98,7 @@ void InplaceMKLDNNSubgraph(const std::shared_ptr<Graph>& graph) {
  // This function first calculates aliasing sets,
  // then calculates the last node each aliasing set is alive for.
  // Then we go through each node, if it's a node which has an equivalent
-  // inplace node and the aliasing set for its input is dead afer this node, we
+  // inplace node and the aliasing set for its input is dead after this node, we
  // inplace it. Then we merge the aliasing sets for the input and output of the
  // node and extend the liveness of the set. To inplace a node you need to
  // prove device and dtype of the input and output are the same, which we've
@ -812,7 +812,7 @@ void ComputeSubgraphInMKLDNN(Node* subgraph_node) {

    if (body_node->kind() == aten::conv2d ||
        body_node->kind() == aten::conv3d) {
-      // this node doesnt handle string padding yet...
+      // this node doesn't handle string padding yet...
      if (!body_node->namedInput("padding")->type()->cast<StringType>()) {
        body_node->replaceWithNewSymbol(Symbol::prim("mkldnn_convolution"));
        body_node->destroy();
--- a/torch/csrc/jit/passes/onnx.cpp
+++ b/torch/csrc/jit/passes/onnx.cpp
@ -167,7 +167,7 @@ std::shared_ptr<Graph> ToONNX(
  ConstantValueMap::ClearMaps();
  auto new_graph = std::make_shared<Graph>(graph->current_scope());
  py::dict env;
-  // Kept identical to values in env. Used for constant-time existance check.
+  // Kept identical to values in env. Used for constant-time existence check.
  py::set values_in_env;
  try {
    BlockToONNX(
--- a/torch/csrc/jit/passes/onnx/pattern_conversion/pattern_encapsulation.h
+++ b/torch/csrc/jit/passes/onnx/pattern_conversion/pattern_encapsulation.h
@ -17,7 +17,7 @@ namespace torch::jit {
 // information. Shape and type information is only available after
 // _jit_pass_onnx, which converts aten nodes to onnx nodes. So there is a
 // interdependent issue. _jit_pass_onnx depends on preprocess passes to convert
-// aten nodes into convertable condition, and preprocess passes depend on
+// aten nodes into convertible condition, and preprocess passes depend on
 // _jit_pass_onnx to convert upstream nodes and apply onnx shape inference.
 // Separating the pass into two parts breaks the interdependency.
 //
--- a/torch/csrc/jit/passes/quantization/helper.cpp
+++ b/torch/csrc/jit/passes/quantization/helper.cpp
@ -116,7 +116,7 @@ static std::vector<std::string> _single_input_general_shape_aten_funcs = {
    "__getitem__",
 };

-// Theses are prim::CallFunctions for ops that doesn't require observation and
+// These are prim::CallFunctions for ops that doesn't require observation and
 // have a single input Tensor
 // Also these ops do computation on the value of Tensor
 // TODO: [Need verify] looks like we can quantize simple functionals that just
@ -136,7 +136,7 @@ static std::vector<std::string> _single_input_general_value_call_funcs = {
    "leaky_relu",
 };

-// Theses are aten functions for ops that doesn't require observation and
+// These are aten functions for ops that doesn't require observation and
 // have a single input Tensor
 // Also these ops do computation on the value of Tensor
 // e.g. `aten::avg_pool2d(%input_tensor, ...)`
--- a/torch/csrc/jit/passes/quantization/insert_observers.cpp
+++ b/torch/csrc/jit/passes/quantization/insert_observers.cpp
@ -1702,7 +1702,7 @@ Module InsertObserversForOnDevicePTQ(
  // you will have multiple getattrs for the same attribute and thus potentially
  // multiple observers observing the same value. This will also lead to
  // increased size of the packed param struct. I dont expect this to be a
-  // common pattern but something to be aware fo Note that current quant
+  // common pattern but something to be aware of Note that current quant
  // workflow does not prevent this anyway since during inset quant dequant
  // things are inlined anyway
  helper.fillBoundaryValueMap(cloned_module, observer_method_name);
--- a/torch/csrc/jit/passes/quantization/insert_quant_dequant.cpp
+++ b/torch/csrc/jit/passes/quantization/insert_quant_dequant.cpp
@ -1622,7 +1622,7 @@ void InsertQuantDeQuantHelper::insertCalculateQParamsAndQuantizationOps(
 void InsertQuantDeQuantHelper::runForOnDevicePTQ(
    Module& module,
    const std::string& method_name) {
-  // In all likelihood this really wont do anything because we expect that
+  // In all likelihood this really won't do anything because we expect that
  // the input method for quantization's prepare step will be inlined. Thus
  // only call methods we will see will belong to observer's forward calls.
  for (auto& invoked_methods : getInvokedMethods(module, method_name)) {
@ -1834,8 +1834,8 @@ Module InsertQuantDeQuantOnDevicePTQ(
  // ReplicateChooseQParamsQuantDequant: This is propagating dynamic quant's
  // quant dequant RemoveRedundantQuantizationOps: THis is removing activation
  // observers for dynamic quant when the op related to it is not dynamically
-  // quantizable. Doesnt really make sense. In our case we wont have those
-  // anyway since for dynamic quant activations wont be observed We can still
+  // quantizable. Doesn't really make sense. In our case we won't have those
+  // anyway since for dynamic quant activations won't be observed We can still
  // use this function because the above two methods should really be a noop
  h.propagateQuantizationOps(module);
  return module;
--- a/torch/csrc/jit/passes/quantization/quantization_patterns.h
+++ b/torch/csrc/jit/passes/quantization/quantization_patterns.h
@ -206,7 +206,7 @@ QuantFusionInfo getFixedQParamOpFusionInfo(
          %r = )";
  op_pattern += op_name + "(" + "%a_dequant" + extra_op_arg_list + ")";
  // IR pattern common to all ops with fixed quantization parameters for
-  // asymetric quantization
+  // asymmetric quantization
  std::string asym_fixed_qparam_op_suffix = R"(
          %r_scale : float = prim::Constant[value=0.00390625]()
          %r_zero_point : int = prim::Constant[value=0]()
--- a/torch/csrc/jit/passes/symbolic_shape_cache.h
+++ b/torch/csrc/jit/passes/symbolic_shape_cache.h
@ -8,7 +8,7 @@ namespace torch::jit {
 struct TORCH_API CanonicalizedSymbolicShape {
  // TODO: Consider in the future if it is reasonable to
  // merge code with SymbolicShape or VaryingShape while keeping
-  // the two not implicitly convertable (and cause bugs).
+  // the two not implicitly convertible (and cause bugs).
  CanonicalizedSymbolicShape(
      const c10::SymbolicShape& orig_shape,
      std::unordered_map<int64_t, int64_t>& ss_map) {
--- a/torch/csrc/jit/passes/tensorexpr_fuser.cpp
+++ b/torch/csrc/jit/passes/tensorexpr_fuser.cpp
@ -396,7 +396,7 @@ void insertTypeGuard(

 namespace {
 bool has_unsupported_pin_memory(const Node* node) {
-  // cant support non-constant pin_memory or pin_memory = True
+  // can't support non-constant pin_memory or pin_memory = True
  if (auto maybe_index = node->schema().argumentIndexWithName("pin_memory")) {
    int index = *maybe_index;
    auto inp = node->input(index);
--- a/torch/csrc/jit/passes/tensorexpr_fuser.h
+++ b/torch/csrc/jit/passes/tensorexpr_fuser.h
@ -66,7 +66,7 @@ TORCH_API bool isSupported(Node* node);
 /// work with dynamic shapes unless explicitly register the shape function via
 /// `torch::jit::RegisterShapeComputeGraphForSchema` for the custom operator.
 ///
-/// @return Reference of the custome operator set
+/// @return Reference of the custom operator set
 ///
 TORCH_API OperatorSet& getCustomOperatorSet();

--- a/torch/csrc/jit/passes/utils/subgraph_utils.cpp
+++ b/torch/csrc/jit/passes/utils/subgraph_utils.cpp
@ -62,7 +62,7 @@ struct ValueMapper {
    auto new_outputs = merged_node->outputs();
    for (Value* v : new_outputs) {
      auto maybe_last_use = firstOrLastUse(v, /*find_first*/ false);
-      // if it doesnt have a use it shouldnt have been added as output
+      // if it doesn't have a use it shouldn't have been added as output
      TORCH_INTERNAL_ASSERT(maybe_last_use);
      const Use last_use = *maybe_last_use;

--- a/torch/csrc/jit/python/init.cpp
+++ b/torch/csrc/jit/python/init.cpp
@ -2316,7 +2316,7 @@ void initJITBindings(PyObject* module) {
              // Throw errors when calling wait() on the returned Future if
              // any of the original futures would throw.
              // NB: PythonFutureWrapper takes an unwrap_func which serves as a
-              // callback to evalute the value in the Future. RPC uses this
+              // callback to evaluate the value in the Future. RPC uses this
              // unwrap_func to check whether the returned py::object is a
              // RemoteException object, and re-throw the exception if it is.
              // By extracting the c10::ivalue::Future from PythonFutureWrapper
--- a/torch/csrc/jit/python/pybind_utils.cpp
+++ b/torch/csrc/jit/python/pybind_utils.cpp
@ -809,7 +809,7 @@ std::pair<std::shared_ptr<Operator>, Stack> getOpWithStack(
 }

 // This function is used to check if the schema is valid for the given args and
-// kwargs. It checks script object by checking wether the FakeScriptObject is
+// kwargs. It checks script object by checking whether the FakeScriptObject is
 // an instance of the corresponding fake class for the actual class used in
 // schema.
 bool checkSchemaAllowFakeScriptObject(
--- a/torch/csrc/jit/python/pybind_utils.h
+++ b/torch/csrc/jit/python/pybind_utils.h
@ -649,7 +649,7 @@ inline InferredType tryToInferContainerType(
          "."));
    } else {
      // TODO: this message is not correct anymore, since this InferredType is
-      // used from a bunch of circumstances unrelated to tracing. We can re-use
+      // used from a bunch of circumstances unrelated to tracing. We can reuse
      // this instead of the attribute_failure stuff in concreteType
      return InferredType(c10::str(
          "Only tensors and (possibly nested) tuples of tensors, lists, or dicts ",
--- a/torch/csrc/jit/python/python_ivalue.h
+++ b/torch/csrc/jit/python/python_ivalue.h
@ -99,7 +99,7 @@ struct C10_EXPORT ConcretePyObjectHolder final : PyObjectHolder {
    py_obj_.ptr() = nullptr;
  }

-  // explicit construction to avoid errornous implicit conversion and
+  // explicit construction to avoid erroneous implicit conversion and
  // copy-initialization
  explicit ConcretePyObjectHolder(py::object py_obj)
      : py_obj_(std::move(py_obj)) {}
--- a/torch/csrc/jit/python/python_sugared_value.cpp
+++ b/torch/csrc/jit/python/python_sugared_value.cpp
@ -1223,7 +1223,7 @@ std::shared_ptr<SugaredValue> toSugaredValue(
      obj.ptr() == py::module::import("torch.jit").attr("isinstance").ptr()) {
    return SpecialFormValue::create(prim::isinstance);
 #ifdef USE_RPC
-    // RPC module is only avaialble when build flag "USE_DISTRIBUTED" is on.
+    // RPC module is only available when build flag "USE_DISTRIBUTED" is on.
  } else if (
      isRpcAvailable &&
      obj.ptr() ==
@ -1236,7 +1236,7 @@ std::shared_ptr<SugaredValue> toSugaredValue(
    return SpecialFormValue::create(prim::rpc_sync);
  } else if (
      isRpcAvailable &&
-      // RPC module is only avaialble  when build flag "USE_DISTRIBUTED" is on.
+      // RPC module is only available  when build flag "USE_DISTRIBUTED" is on.
      obj.ptr() ==
          py::module::import("torch.distributed.rpc").attr("remote").ptr()) {
    return SpecialFormValue::create(prim::rpc_remote);
--- a/torch/csrc/jit/python/python_sugared_value.h
+++ b/torch/csrc/jit/python/python_sugared_value.h
@ -68,7 +68,7 @@ struct VISIBILITY_HIDDEN PythonValue : public SugaredValue {
        ErrorReport(loc)
        << kind() << " cannot be used as a value. "
        << "Perhaps it is a closed over global variable? If so, please "
-        << "consider passing it in as an argument or use a local varible "
+        << "consider passing it in as an argument or use a local variable "
        << "instead.");
  }

--- a/torch/csrc/jit/python/python_tracer.cpp
+++ b/torch/csrc/jit/python/python_tracer.cpp
@ -89,7 +89,7 @@ std::pair<std::shared_ptr<Graph>, Stack> createGraphByTracingWithDict(
  };

  // The argument_names parameter is parsed in python and its order
-  // is the same as the arguments' decalaration order in forward() method.
+  // is the same as the arguments' declaration order in forward() method.
  // These name shall be added to the graph as debug name and the order
  // should align with the traceable stack we generated by the python dict.
  std::vector<std::string> compact_argument_names;
--- a/torch/csrc/jit/runtime/interpreter.cpp
+++ b/torch/csrc/jit/runtime/interpreter.cpp
@ -55,7 +55,7 @@ C10_DEFINE_bool(
 C10_DEFINE_bool(
    torch_jit_enable_expanded_stacks,
    false,
-    "When true we will attemps to pre-expand node stacks and cache expanded stacks.")
+    "When true we will attempts to pre-expand node stacks and cache expanded stacks.")

 C10_DEFINE_bool(
    torch_jit_expanded_stacks_mangled,
--- a/torch/csrc/jit/runtime/jit_exception.h
+++ b/torch/csrc/jit/runtime/jit_exception.h
@ -18,7 +18,7 @@ struct TORCH_API JITException : public std::runtime_error {
    return python_class_name_;
  }

-  // the original msg if this is from a python exception. The interpretor has
+  // the original msg if this is from a python exception. The interpreter has
  // changed the original message by adding "The following operation failed in
  // the TorchScript interpreter." in front of it in the handleError function.
  std::optional<std::string> getOriginalMsg() const {
--- a/torch/csrc/jit/runtime/register_prim_ops.cpp
+++ b/torch/csrc/jit/runtime/register_prim_ops.cpp
@ -115,8 +115,8 @@ bool isSortableListOfObjectsOrTuples(
  }

  auto type = ivalues.get(0).type();
-  // We assume lists have homogenous types, use first element to determine
-  // best sorting methods. If in the future we need to support heterogenous
+  // We assume lists have homogeneous types, use first element to determine
+  // best sorting methods. If in the future we need to support heterogeneous
  // types inside list, then sorting needs to have runtime sortable checks.
  const size_t n = ivalues.size();
  for (const auto i : c10::irange(n)) {
@ -1141,7 +1141,7 @@ static const std::vector<OperatorGeneratorArgs> opGenArgs{
    //
    // create a clone of these declarations with a _hacked_twin overload name
    // and nullability scrubbed from TensorList arg types
-    // TOOD find out why this exists and how to do it without the hack
+    // TODO find out why this exists and how to do it without the hack
    //
    OperatorGeneratorArgs(
        TORCH_SELECTIVE_SCHEMA(
@ -2839,7 +2839,7 @@ void hashValue(Stack& stack) {
 }

 static const std::vector<OperatorGeneratorArgs> opGenArgs2{
-    // registered as Any[] so that heterogenous tuples can be called with len()
+    // registered as Any[] so that heterogeneous tuples can be called with len()
    OperatorGeneratorArgs(
        TORCH_SELECTIVE_SCHEMA("aten::len.any(Any[] a) -> int"),
        listLen,
--- a/torch/csrc/jit/runtime/serialized_shape_function_registry.cpp
+++ b/torch/csrc/jit/runtime/serialized_shape_function_registry.cpp
@ -3204,7 +3204,7 @@ def _batch_norm_with_update(input: List[int],
 )=====")
 + std::string(R"=====(def broadcast_inplace(a: List[int],
    b: List[int]) -> List[int]:
-  _0 = "The dims of tensor b ({}) must be less than or equal tothe dims of tensor a ({}) "
+  _0 = "The dims of tensor b ({}) must be less than or equal to the dims of tensor a ({}) "
  _1 = "The size of tensor a {} must match the size of tensor b ({}) at non-singleton dimension {}"
  dimsA = torch.len(a)
  dimsB = torch.len(b)
--- a/torch/csrc/jit/runtime/static/README.md
+++ b/torch/csrc/jit/runtime/static/README.md
@ -71,7 +71,7 @@ Runtime instances in your code.
 Static runtime's memory planner does two things:

 1) Coalesces internal allocations for tensor storage
-2) Does static analysis to figure out how to efficiently re-use memory.
+2) Does static analysis to figure out how to efficiently reuse memory.

 ### Standard Resizing
 Static runtime will record the space required for each intermediate managed tensor it sees
--- a/torch/csrc/jit/runtime/static/impl.h
+++ b/torch/csrc/jit/runtime/static/impl.h
@ -70,7 +70,7 @@ TORCH_API inline bool borrowsOutputs(c10::Symbol kind) {
 //     The output aliases that end up here are as a result of aliasDb failing to
 //     recognize them as outputs due to collection object (e.g., Tuple) aliasing
 //     inputs.
-// Values that dont't show up in output_aliases or external_aliases are created
+// Values that don't show up in output_aliases or external_aliases are created
 // and consumed within the graph.
 class ValueGroup {
 public:
@ -111,7 +111,7 @@ class TORCH_API ManagedTensorRanges {

  // If true, then this node is the last use of at least one
  // managed tensor. availableTensorValuesAfterNode(node) will return a vector
-  // of the managed tensors that are available for re-use
+  // of the managed tensors that are available for reuse
  // in the nodes following this one.
  bool nodeFreesManagedTensors(Node* node) const;
  const std::vector<const Value*>& availableTensorValuesAfterNode(
@ -141,7 +141,7 @@ class TORCH_API ManagedTensorRanges {
  void extendInputLifetime(Node* node, size_t new_end);

  // Maps Node* to the set of managed tensors that are now available
-  // for re-use after this node.
+  // for reuse after this node.
  c10::FastMap<Node*, std::vector<const Value*>> node_to_newly_free_tensors_{};
  // Maps each Value* to its lifetime (start node index, end node index)
  c10::FastMap<const Value*, Lifetime> value_lifetimes_{};
--- a/torch/csrc/jit/runtime/static/memory_planner.cpp
+++ b/torch/csrc/jit/runtime/static/memory_planner.cpp
@ -76,7 +76,7 @@ std::vector<StorageGroup> assignStorageToManagedTensors(
  // This set maps each Value* to its assigned storage group.
  c10::FastMap<const Value*, size_t> storage_group_mapping;
  // On each iteration, this vector stores the set of storage groups that
-  // are available for re-use.
+  // are available for reuse.
  std::vector<size_t> free_storage_groups;

  auto makeNewStorageGroup = [&](const Value* value) {
--- a/torch/csrc/jit/runtime/static/native_ops.cpp
+++ b/torch/csrc/jit/runtime/static/native_ops.cpp
@ -529,7 +529,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::to, aten_to, [](Node* n) -> SROperator {
      const auto in1_i = p_node->Input(1).toOptional<at::ScalarType>();
      const auto in2_i = p_node->Input(2).toBool();
      const auto in3_i = p_node->Input(3).toBool();
-      // To mimick the behavior of the JIT interpreter, if both dtype
+      // To mimic the behavior of the JIT interpreter, if both dtype
      // and copy are not set, we return self. Otherwise, we assume
      // that dtype is set.
      if (!in1_i && !in3_i) {
--- a/torch/csrc/jit/serialization/export.h
+++ b/torch/csrc/jit/serialization/export.h
@ -214,7 +214,7 @@ struct TORCH_API BytecodeEmitMode {
 // true: instruction of default argument values (like LOADC) is emitted.
 // false: instruction of default argument values are not emitted. Instead
 // they are fetched from operator schema.
-// default_args_before_out_args (to forward compatibile support
+// default_args_before_out_args (to forward compatible support
 // operators allowing out arguments and default arguments):
 // true: the number of specified arguments will deserialized to (#all_args -
 // #default_args). false: the number of specified arguments will deserialized to
--- a/torch/csrc/jit/serialization/export_module.cpp
+++ b/torch/csrc/jit/serialization/export_module.cpp
@ -131,7 +131,7 @@ std::string get_named_tuple_str_or_default(
          // str() return "Tensor" and repr_str() return "Tensor (inferred)". If
          // it's not inferred type, str() return "Tensor[]" and repr_str()
          // return "Tensor". In cpp, repr_str() will always return "Tensor"
-          // regardless inferred type. When exporing custom type in bytecode,
+          // regardless inferred type. When exporting custom type in bytecode,
          // "Tensor" is the preferred way to deserialize Tensor type
          std::string named_tuple_type_str = it->is_inferred_type()
              ? named_tuple_type->str()
@ -554,7 +554,7 @@ void ScriptModuleSerializer::writeArchive(
    }
    WriteableTensorData writable_td = getWriteableTensorData(td);
    if (use_storage_context && serialized_tensors.count(tensor_name)) {
-      // storage has been serialzed already, skip
+      // storage has been serialized already, skip
      continue;
    }
    writer_.writeRecord(
@ -698,10 +698,10 @@ void ScriptModuleSerializer::writeByteCode(
    // debug handles.
    // The reason we save debug handles conditionally is so that
    // we dont end up with a model that has debug handles but has not
-    // debug map to correlate debug handels with.
+    // debug map to correlate debug handles with.
    // Once we have a model with both handles and debug map, we can
    // strip off debug map and have a lean model served to production.
-    // If exception ocurrs we have a model with debug map that can be
+    // If exception occurs we have a model with debug map that can be
    // used to symbolicate debug handles
    writeArchive(
        debug_info_telements,
--- a/torch/csrc/jit/serialization/python_print.cpp
+++ b/torch/csrc/jit/serialization/python_print.cpp
@ -212,7 +212,7 @@ struct PythonPrintImpl {
  //     and would appear in the same order when the expression tree is
  //     reparsed.
  // The last case can be checked
-  // because when we emit a expresion tree in the parser,
+  // because when we emit a expression tree in the parser,
  // we do a left-to-right postorder traversal of the expression tree (emit
  // children, then emit op). The reverse of this is a right-to-left preorder
  // traversal of the tree. By doing a right-to-left preorder traversal of the
@ -222,12 +222,12 @@ struct PythonPrintImpl {
  // expression.

  // The inductive step is that the right-most input should be produced by the
-  // node immediatly before the current node if it is in tree order.
+  // node immediately before the current node if it is in tree order.

  bool canInline(Value* v) {
    Node* n = v->node();
    // there must be only 1 values, otherwise we need an assignment to handle
-    // the multiple outout values
+    // the multiple output values
    if (n->outputs().size() != 1)
      return false;
    // if it is used more than once, then we need a variable
@ -651,7 +651,7 @@ struct PythonPrintImpl {
  // [reordering of inlines]
  // We inline anything that is semantically legal to inline, but sometimes
  // we find that these lines get too long. In that case we break the lines
-  /// and it  is important that we un-inline all the inputs preceeding the long
+  /// and it is important that we un-inline all the inputs preceding the long
  /// input:
  //   r = foo(x.add_(b), some_long + expression)
  //  wrong!
@ -1410,7 +1410,7 @@ struct PythonPrintImpl {
        enforce_importable_(enforce_importable) {}

  void printClass(const ClassTypePtr& classType) {
-    // If any of the methods are not Graph funtions, this indicates that
+    // If any of the methods are not Graph functions, this indicates that
    // this class is a custom-bound C++ class. Skip serialization
    // of this class, we will depend on the ClassType being defined
    // in the target process.
--- a/torch/csrc/jit/serialization/unpickler.cpp
+++ b/torch/csrc/jit/serialization/unpickler.cpp
@ -44,7 +44,7 @@ void restoreAccurateTypeTags(const IValue& root, const TypePtr& type_tag) {
    to_process.pop_back();
    // ensure we only scan each pointer value once, otherwise this
    // can become exponential (and if we allow recursive data in the future,
-    // it would not terminiate).
+    // it would not terminate).
    if (w.value.isPtrType()) {
      const void* key = w.value.internalToPointer();
      auto it = scanned.find(key);
@ -490,7 +490,7 @@ PickleOpCode Unpickler::readInstruction() {
          stack_.size(),
          " and start index is ",
          start,
-          ", but stack_ is iterated by two elemenst at a time");
+          ", but stack_ is iterated by two elements at a time");
      for (size_t i = start; i < stack_.size(); i += 2) {
        dict.insert_or_assign(stack_[i], stack_[i + 1]);
      }
--- a/torch/csrc/jit/tensorexpr/external_functions.cpp
+++ b/torch/csrc/jit/tensorexpr/external_functions.cpp
@ -1437,7 +1437,7 @@ void nnc_aten_embedding(
    r = at::embedding(weight, indices);
  } catch (...) {
  }
-  // TODO: have to copy output because at::embedding doesnt have an out
+  // TODO: have to copy output because at::embedding doesn't have an out
  // variant and NNC's external calls don't support allocations
  memcpy(buf_data[0], r.const_data_ptr(), r.element_size() * r.numel());
 }
--- a/torch/csrc/jit/tensorexpr/ir.cpp
+++ b/torch/csrc/jit/tensorexpr/ir.cpp
@ -125,7 +125,7 @@ Dtype Intrinsics::IntrinsicsDtype(
    IntrinsicsOp op_type,
    const std::vector<ExprPtr>& params) {
  // TODO: check the op_type and make a real decision
-  // Doesnt this fail with kRand?
+  // Doesn't this fail with kRand?
  if (params.empty()) {
    throw malformed_input("invalid params in Intrinsics");
  } else if (params.size() == 1) {
--- a/torch/csrc/jit/tensorexpr/ir_simplifier.cpp
+++ b/torch/csrc/jit/tensorexpr/ir_simplifier.cpp
@ -930,7 +930,7 @@ ExprPtr PolynomialTransformer::mutate(const MulPtr& v) {
    variable = lhs_new;
  }

-  // Handle special case mul by 1 since thats safe for floating point, even if
+  // Handle special case mul by 1 since that's safe for floating point, even if
  // it's Nan/Inf.
  if (scalar && immediateEquals(scalar, 1)) {
    auto c = alloc<Cast>(v->dtype(), variable);
@ -1105,8 +1105,8 @@ ExprPtr PolynomialTransformer::mutate(const DivPtr& v) {
    return lhs_new;
  }

-  // If numberator and denominator are equal the result is 1.
-  // Unless the demoninator could be zero.
+  // If numerator and denominator are equal the result is 1.
+  // Unless the denominator could be zero.
  // if (hasher_.hash(lhs_new) == hasher_.hash(rhs_new)) {
  //   return getImmediateByType(v->dtype(), 1);
  // }
@ -1745,7 +1745,7 @@ ExprPtr TermExpander::mutate(const TermPtr& v) {
  std::vector<ExprPtr> vars;
  std::vector<ExprPtr> multilaneVars;

-  // Assume we can reorder here because we wont merge floating terms.
+  // Assume we can reorder here because we won't merge floating terms.
  ExprPtr lastNode{nullptr};
  for (const auto& var : v->variables()) {
    ExprPtr node = var->accept_mutator(this);
@ -1830,7 +1830,7 @@ static ExprPtr polyGCD(const PolynomialPtr& poly) {
  ExprPtr scalar = poly->scalar();
  const std::vector<TermPtr>& variables = poly->variables();

-  // We ony want to factorize if we're saving complete operations, i.e. no
+  // We only want to factorize if we're saving complete operations, i.e. no
  // value in factorizing 6x + 4y into 2 * (3x + 2y) since we don't save work.
  int opsSaved = 1; // default to saving the scalar.
  long GCD = std::abs(immediateAs<long>(scalar));
@ -2088,7 +2088,7 @@ static ExprPtr simplifyRoundModPattern(const PolynomialPtr& poly) {

        // TODO: for now don't attempt partial factorization of this
        // optimization. E.g. it's possible to do: 2 * (x/y) * y + (x%y) => x +
-        // (x/y) * y but unsure thats actually much better, particularly with
+        // (x/y) * y but unsure that's actually much better, particularly with
        // CSE.
        if (!immediateEquals(
                evaluateOp(alloc<Sub>(r->scalar(), m->scalar())), 0)) {
--- a/torch/csrc/jit/tensorexpr/kernel.cpp
+++ b/torch/csrc/jit/tensorexpr/kernel.cpp
@ -1263,11 +1263,11 @@ Tensor TensorExprKernel::convertSymbolicOutputToCorrectStrides(
    const std::vector<size_t>& sorted_stride_indices_descending,
    const std::vector<ExprPtr>& strides,
    BufPtr& buf) {
-  // We need to convert the output tensor so that its values are layed
+  // We need to convert the output tensor so that its values are laid
  // so that when viewed from the output strides the values are correct.
-  // A contiguous Tensor of size(2, 3) with values 0-5 is layed out as:
+  // A contiguous Tensor of size(2, 3) with values 0-5 is laid out as:
  // [0] [1] [2] [3] [4] [5]
-  // The same valued tensor with strides (1, 2) would be layed out like
+  // The same valued tensor with strides (1, 2) would be laid out like
  // [0] [3] [1] [4] [2] [5]
  // When we are doing the re-ordering of values into the output tensor,
  // we are iterating per-element of the input, and we are fixed
@ -1378,7 +1378,7 @@ Tensor TensorExprKernel::convertStaticShapeOutputToCorrectStrides(
      tt->strides().concrete_sizes(),
      buildErrorMessage("Output strides are unknown."));
  const std::vector<int64_t> strides = *tt->strides().concrete_sizes();
-  // All Tensors in NNC are layed out in default, contiguous layout.
+  // All Tensors in NNC are laid out in default, contiguous layout.
  // If the output is also default contiguous we don't need to do anything
  if (strides == default_strides) {
    return Tensor(buf, nullptr);
--- a/torch/csrc/jit/tensorexpr/llvm_codegen.cpp
+++ b/torch/csrc/jit/tensorexpr/llvm_codegen.cpp
@ -780,7 +780,7 @@ void LLVMCodeGenImpl::emitKernel(
  GRAPH_DEBUG("\nLLVM generated assembly code\n\n", asmCode_, "\n");
 }

-// TODO: The binary ops are copypasta.
+// TODO: The binary ops are copypaste.

 void LLVMCodeGenImpl::visit(const AddPtr& v) {
  v->lhs()->accept(this);
@ -878,7 +878,7 @@ void LLVMCodeGenImpl::visit(const OrPtr& v) {
  bool rfp = rhs->getType()->isFPOrFPVectorTy();

  if (!lfp && !rfp) {
-    value_ = irb_.CreateOr(lhs, rhs);
+    value_ = irb_.CreateOr(lhs, rhs); // codespell:ignore
  } else {
    throw malformed_input("llvm_codegen: bad type in Or", v);
  }
@ -1225,7 +1225,7 @@ void LLVMCodeGenImpl::visit(const CastPtr& v) {
      }
      value_ = irb_.CreateFPCast(value_, dstType);
    } else if (dstType->isIntOrIntVectorTy()) {
-      // Strictly casting from Float -> i8 doesnt give correct results
+      // Strictly casting from Float -> i8 doesn't give correct results
      // set one bit true if the input float is not 0
      if (v->dtype().scalar_type() == ScalarType::Bool) {
        llvm::Value* zero =
--- a/torch/csrc/jit/tensorexpr/loopnest.cpp
+++ b/torch/csrc/jit/tensorexpr/loopnest.cpp
@ -987,7 +987,7 @@ void LoopNest::inlineIntermediateBufs(bool allow_duplicated_work) {
        }
      }

-      // all bufs will have at least one store (if they have > 1 they cant be
+      // all bufs will have at least one store (if they have > 1 they can't be
      // inlined anyway)
      size_t reads = uses.size() - 1;
      // if only one read, we can inline it without duplicating work
@ -1843,11 +1843,11 @@ bool LoopNest::hasLoopCarriedDependence(const ForPtr& loop) {
      auto bLoads = NodeFinder<Load>::find(*it2);
      // ReadAfterWrite
      for (auto& aStore : aStores) {
-        for (auto& bLoad : bLoads) {
+        for (auto& bLoad : bLoads) { // codespell:ignore
          if (aStore->buf() == bLoad->buf()) {
            if (!areIndicesLoopIndependent(
                    aStore->indices(), bLoad->indices(), outer_loop_vars)) {
-              if (isOverlapping(analyzer, aStore, bLoad)) {
+              if (isOverlapping(analyzer, aStore, bLoad)) { // codespell:ignore
                return true;
              }
            }
--- a/torch/csrc/jit/tensorexpr/mem_dependency_checker.h
+++ b/torch/csrc/jit/tensorexpr/mem_dependency_checker.h
@ -240,7 +240,7 @@ class TORCH_API MemDependencyChecker : public IRVisitor {
  std::unordered_set<std::shared_ptr<AccessInfo>> accessesWithin(
      const StmtPtr& A) const;
  // TODO: this will return only the AccessInfo for A. It's included for
-  // completeness but be aware it wont return accesses used in the computation
+  // completeness but be aware it won't return accesses used in the computation
  // of A.
  std::unordered_set<std::shared_ptr<AccessInfo>> accessesWithin(
      const ExprPtr& A) const;
--- a/torch/csrc/jit/tensorexpr/registerizer.cpp
+++ b/torch/csrc/jit/tensorexpr/registerizer.cpp
@ -225,7 +225,7 @@ void RegisterizerAnalysis::visit(const ForPtr& v) {
      // possible that an access at a higher scope could "unhide" the
      // conditional access, in which case we need to hoist. If there is no
      // access to this element at a higher scope then we cannot safely hoist.
-      // We cannot know at this level whether that will or wont occur.
+      // We cannot know at this level whether that will or won't occur.
      //
      // The solution we take here is to split the space-time continuum, and
      // keep both versions of the access handy. If the hoisted access is not
@ -542,7 +542,7 @@ void RegisterizerAnalysis::mergeCurrentScopeIntoParent() {
        closeAccessIntoScope(pCandidate, parent);
        parentAccesses.erase(parentIt);

-        // the childs access inserted into the parent scope.
+        // the children access inserted into the parent scope.
        closeAccessIntoScope(candidate, parent);
        continue;
      }
@ -567,7 +567,7 @@ void RegisterizerAnalysis::mergeCurrentScopeIntoParent() {
      ++it;
    }

-    // Insert the childs closed access into the parent scope.
+    // Insert the children closed access into the parent scope.
    closeAccessIntoScope(candidate, parent);
  }

--- a/torch/csrc/jit/tensorexpr/registerizer.h
+++ b/torch/csrc/jit/tensorexpr/registerizer.h
@ -186,7 +186,7 @@ class AccessInfo {
  bool firstUsageOverlapped_{false};

  // The cost in real ops that this access represents, to enable
-  // filtering accesses that wont save any loads or stores.
+  // filtering accesses that won't save any loads or stores.
  ExprPtr store_cost_;
  ExprPtr load_cost_;