diff --git a/.lintrunner.toml b/.lintrunner.toml
index edf6d6ffbee8..2833716ba037 100644
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@@ -1159,8 +1159,6 @@ exclude_patterns = [
     # These files are all grandfathered in, feel free to remove from this list
     # as necessary
     'aten/**',
-    'functorch/**',
-    'scripts/**',
     'test/**',
     'torch/**',
 ]
diff --git a/functorch/csrc/dim/dim.cpp b/functorch/csrc/dim/dim.cpp
index 33e1c080dabd..19270d2f9225 100644
--- a/functorch/csrc/dim/dim.cpp
+++ b/functorch/csrc/dim/dim.cpp
@@ -710,7 +710,7 @@ public:
             auto t = Tensor::wrap(run_torch_function(A, delayed_->orig, delayed_->args, true));
             tensor_ = t->tensor(A);
             delayed_.reset();
-            // don't force creation of batch tensor if it wasn't alreay provided.
+            // don't force creation of batch tensor if it wasn't already provided.
             batchtensor_ = t->batchtensor_;
             AT_ASSERT(levels() == t->levels());
         }
@@ -1739,7 +1739,7 @@ static mpy::object dot(Arena& A, TensorInfo lhs, TensorInfo rhs, Slice<DimEntry>
     if (lr_dims.dims.size() != sum.size()) {
         for (auto & d : sum) {
             if (!lhs.levels.contains(d) && !rhs.levels.contains(d)) {
-                mpy::raise_error(DimensionBindError(), "summing over non-existant dimension %S", d.dim().ptr());
+                mpy::raise_error(DimensionBindError(), "summing over non-existent dimension %S", d.dim().ptr());
             }
         }
     }
@@ -2206,7 +2206,7 @@ mpy::object index(Arena& A, mpy::handle self, mpy::handle dims, mpy::handle indi
         self_info.tensor = A.autorelease(rearranged->reshape(at::IntArrayRef(new_sizes.begin(), new_sizes.end())));
 
         self_info.levels = reshape_levels; // note: we are using the first level in a flattened group to represent the group for the rest of the op
-                                           // we need to be careful not to rely the dimensions size because it doesnt match the size of the whole group
+                                           // we need to be careful not to rely the dimensions size because it doesn't match the size of the whole group
     }
     bool has_dimpacks = false;
     for (auto idx : indices_list) {
@@ -2219,7 +2219,7 @@ mpy::object index(Arena& A, mpy::handle self, mpy::handle dims, mpy::handle indi
     return invoke_getitem(A, info);
 }
 
-// true -- the indices were flattend out of a tuple, list or sequence...
+// true -- the indices were flattened out of a tuple, list or sequence...
 
 Slice<mpy::handle> slice_from_sequence(Arena& A, mpy::handle value) {
     if (mpy::tuple_view::check(value)) {
@@ -2539,7 +2539,7 @@ IndexingInfo getsetitem_flat(Arena& A, TensorInfo self_info, Slice<mpy::handle>
              }
         } else if (Dim::check_exact(inp)) {
             auto d = Dim::unchecked_wrap(inp);
-            // dimesions used once are just binding operations
+            // dimensions used once are just binding operations
             if (1 == seen_dims_nuses[*seen_dims.index(d)]) {
                 flat_inputs[i] = no_slice;
                 result_levels.append(A, d);
@@ -2798,7 +2798,7 @@ PyObject* py_split(PyObject *_,
         if (!dim.ptr()) {
             dim = A.autorelease(mpy::from_int(0));
         }
-        mpy::raise_error(PyExc_TypeError, "tensor does not comtain dimension %R", dim.ptr());
+        mpy::raise_error(PyExc_TypeError, "tensor does not contain dimension %R", dim.ptr());
     }
     Slice<int64_t> indices;
 
diff --git a/functorch/csrc/dim/python_variable_simple.h b/functorch/csrc/dim/python_variable_simple.h
index fbd5cfd82815..d8c22ca312e3 100644
--- a/functorch/csrc/dim/python_variable_simple.h
+++ b/functorch/csrc/dim/python_variable_simple.h
@@ -6,7 +6,7 @@
 
 #pragma once
 // note: pytorch's python variable simple includes pybind which conflicts with minpybind
-// so this file just reproduces the minimial API needed to extract Tensors from python objects.
+// so this file just reproduces the minimal API needed to extract Tensors from python objects.
 
 #include <torch/csrc/python_headers.h>
 #include <ATen/core/Tensor.h>
diff --git a/functorch/dim/README.md b/functorch/dim/README.md
index 74c25d949c0b..517930cb844b 100644
--- a/functorch/dim/README.md
+++ b/functorch/dim/README.md
@@ -5,7 +5,7 @@ Named Tensors using First-class Dimensions in PyTorch
 
 _An implementation of [named tensors](https://namedtensor.github.io) with the functionality of [einsum](http://einops.rocks]http://einops.rocks) , batching ([vmap](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap), [xmap](https://jax.readthedocs.io/en/latest/notebooks/xmap_tutorial.html)), and tensor indexing by adding dimension objects to PyTorch_.
 
-The tensor input to a resnet might have the shape [8, 3, 224, 224] but informally we think of those dimensions as 'batch', 'channel', 'width', and 'height'. Eventhough 'width' and 'height' have the same _size_ we still think of them as separate dimensions, and if we have two _different_ images, we think of both as sharing the _same_ 'channel' dimension.
+The tensor input to a resnet might have the shape [8, 3, 224, 224] but informally we think of those dimensions as 'batch', 'channel', 'width', and 'height'. Even though 'width' and 'height' have the same _size_ we still think of them as separate dimensions, and if we have two _different_ images, we think of both as sharing the _same_ 'channel' dimension.
 
 Named tensors gives these dimensions names. [PyTorch's current implementation](https://pytorch.org/docs/stable/named_tensor.html) uses strings to name dimensions. Instead, this library introduces a Python object, a `Dim`, to represent the concept. By expanding the semantics of tensors with dim objects, in addition to naming dimensions, we can get behavior equivalent to batching transforms (xmap, vmap), einops-style rearrangement, and loop-style tensor indexing.
 
@@ -751,7 +751,7 @@ In this way, first-class dims are a way of adapting the nicer syntax of these ar
 
 Performance Expectations
 ========================
-First-class dimensions are not a compiler. They provide syntax for existing PyTorch operations such as advanced indexing that is easier to read and write. For large sized tensors, the performance of any statements including them will be the same as using the already existing operations. An important exception is the pattern matching of products and summation, where performance will be improved by issuing to a matrix-multiply kernel. The C++ implementation of dimensions adds a small overhead of around 2us on top of PyTorch's normal overhead of 8us to each function that uses them. In the future, the implementation can encorporate more fusion optimization to further improve performance of this style of code.
+First-class dimensions are not a compiler. They provide syntax for existing PyTorch operations such as advanced indexing that is easier to read and write. For large sized tensors, the performance of any statements including them will be the same as using the already existing operations. An important exception is the pattern matching of products and summation, where performance will be improved by issuing to a matrix-multiply kernel. The C++ implementation of dimensions adds a small overhead of around 2us on top of PyTorch's normal overhead of 8us to each function that uses them. In the future, the implementation can incorporate more fusion optimization to further improve performance of this style of code.
 
 
 ## License
diff --git a/functorch/dim/__init__.py b/functorch/dim/__init__.py
index 691b1b984f8d..f52d417d2ba2 100644
--- a/functorch/dim/__init__.py
+++ b/functorch/dim/__init__.py
@@ -58,7 +58,7 @@ TensorLike = (_Tensor, torch.Tensor)
 
 
 class Dim(_C.Dim, _Tensor):
-    # note that _C.Dim comes before tensor because we want the Dim API for things like size to take precendence.
+    # note that _C.Dim comes before tensor because we want the Dim API for things like size to take precedence.
     # Tensor defines format, but we want to print Dims with special formatting
     __format__ = object.__format__
 
diff --git a/functorch/dim/reference.py b/functorch/dim/reference.py
index 5c6178c0981c..fd934011d823 100644
--- a/functorch/dim/reference.py
+++ b/functorch/dim/reference.py
@@ -507,7 +507,7 @@ def t__getitem__(self, input):
     for i in reversed(dim_packs):
         input[i : i + 1] = input[i]
 
-    # currenty:
+    # currently:
     # input is flat, containing either Dim, or Tensor, or something valid for standard indexing
     # self may have first-class dims as well.
 
@@ -515,7 +515,7 @@ def t__getitem__(self, input):
     # drop the first class dims from self, they just become direct indices of their positions
 
     # figure out the dimensions of the indexing tensors: union of all the dims in the tensors in the index.
-    # these dimensions will appear and need to be bound at the first place tensor occures
+    # these dimensions will appear and need to be bound at the first place tensor occurs
 
     if isinstance(self, _Tensor):
         ptensor_self, levels = self._tensor, list(self._levels)
diff --git a/functorch/examples/ensembling/parallel_train.py b/functorch/examples/ensembling/parallel_train.py
index a674a24c738d..0a9abddc9cb5 100644
--- a/functorch/examples/ensembling/parallel_train.py
+++ b/functorch/examples/ensembling/parallel_train.py
@@ -138,7 +138,7 @@ step6()
 # Step 7: Now, the flaw with step 6 is that we were training on the same exact
 # data. This can lead to all of the models in the ensemble overfitting in the
 # same way. The solution that http://willwhitney.com/parallel-training-jax.html
-# applies is to randomly subset the data in a way that the models do not recieve
+# applies is to randomly subset the data in a way that the models do not receive
 # exactly the same data in each training step!
 # Because the goal of this doc is to show that we can use eager-mode vmap to
 # achieve similar things as JAX, the rest of this is left as an exercise to the reader.
diff --git a/functorch/examples/lennard_jones/lennard_jones.py b/functorch/examples/lennard_jones/lennard_jones.py
index 30a50c14a7f7..7d8a6be445ab 100644
--- a/functorch/examples/lennard_jones/lennard_jones.py
+++ b/functorch/examples/lennard_jones/lennard_jones.py
@@ -1,4 +1,4 @@
-# This example was adapated from https://github.com/muhrin/milad
+# This example was adapted from https://github.com/muhrin/milad
 # It is licensed under the GLPv3 license. You can find a copy of it
 # here: https://www.gnu.org/licenses/gpl-3.0.en.html .
 
diff --git a/functorch/notebooks/_src/plot_jacobians_and_hessians.py b/functorch/notebooks/_src/plot_jacobians_and_hessians.py
index 3faeaa9a1675..cab6a0d989ed 100644
--- a/functorch/notebooks/_src/plot_jacobians_and_hessians.py
+++ b/functorch/notebooks/_src/plot_jacobians_and_hessians.py
@@ -100,7 +100,7 @@ ft_jac_weight, ft_jac_bias = jacrev(predict, argnums=(0, 1))(weight, bias, x)
 #   vjp and vmap transforms.
 # - jacfwd uses forward-mode AD. It is implemented as a composition of our
 #   jvp and vmap transforms.
-# jacfwd and jacrev can be subsituted for each other and have different
+# jacfwd and jacrev can be substituted for each other and have different
 # performance characteristics.
 #
 # As a general rule of thumb, if you're computing the jacobian of an R^N -> R^M
diff --git a/functorch/notebooks/jacobians_hessians.ipynb b/functorch/notebooks/jacobians_hessians.ipynb
index 5b986a592b72..4acf2ec609ff 100644
--- a/functorch/notebooks/jacobians_hessians.ipynb
+++ b/functorch/notebooks/jacobians_hessians.ipynb
@@ -350,7 +350,7 @@
     {
       "cell_type": "markdown",
       "source": [
-        "Furthemore, it’s pretty easy to flip the problem around and say we want to compute Jacobians of the parameters to our model (weight, bias) instead of the input."
+        "Furthermore, it’s pretty easy to flip the problem around and say we want to compute Jacobians of the parameters to our model (weight, bias) instead of the input."
       ],
       "metadata": {
         "id": "EQAB99EQflUJ"
diff --git a/functorch/notebooks/per_sample_grads.ipynb b/functorch/notebooks/per_sample_grads.ipynb
index a34c80d07ac4..e2317351f7eb 100644
--- a/functorch/notebooks/per_sample_grads.ipynb
+++ b/functorch/notebooks/per_sample_grads.ipynb
@@ -123,7 +123,7 @@
         "predictions = model(data) # move the entire mini-batch through the model\n",
         "\n",
         "loss = loss_fn(predictions, targets)\n",
-        "loss.backward() # back propogate the 'average' gradient of this mini-batch"
+        "loss.backward() # back propagate the 'average' gradient of this mini-batch"
       ],
       "metadata": {
         "id": "WYjMx8QTUvRu"
diff --git a/scripts/build_android.sh b/scripts/build_android.sh
index de0bed7c26d4..43f11b86828d 100755
--- a/scripts/build_android.sh
+++ b/scripts/build_android.sh
@@ -157,7 +157,7 @@ if [ -n "${USE_VULKAN}" ]; then
   fi
 fi
 
-# Use-specified CMake arguments go last to allow overridding defaults
+# Use-specified CMake arguments go last to allow overriding defaults
 CMAKE_ARGS+=($@)
 
 # Patch pocketfft (as Android does not have aligned_alloc even if compiled with c++17
diff --git a/scripts/build_mobile.sh b/scripts/build_mobile.sh
index 06cae0dd41a3..7b1995a61ebc 100755
--- a/scripts/build_mobile.sh
+++ b/scripts/build_mobile.sh
@@ -80,7 +80,7 @@ if [ "${VERBOSE:-}" == '1' ]; then
   CMAKE_ARGS+=("-DCMAKE_VERBOSE_MAKEFILE=1")
 fi
 
-# Use-specified CMake arguments go last to allow overridding defaults
+# Use-specified CMake arguments go last to allow overriding defaults
 CMAKE_ARGS+=("$@")
 
 # Now, actually build the Android target.
diff --git a/scripts/jit/log_extract.py b/scripts/jit/log_extract.py
index 95d882b461d4..60aeaab92fc8 100644
--- a/scripts/jit/log_extract.py
+++ b/scripts/jit/log_extract.py
@@ -95,7 +95,7 @@ def run():
         "--no-nnc-dynamic",
         dest="nnc_dynamic",
         action="store_false",
-        help="DONT't benchmark nnc with dynamic shapes",
+        help="don't benchmark nnc with dynamic shapes",
     )
     parser.set_defaults(nnc_dynamic=False)
 
diff --git a/scripts/release_notes/apply_categories.py b/scripts/release_notes/apply_categories.py
index 786b1a95908b..9711737fc653 100644
--- a/scripts/release_notes/apply_categories.py
+++ b/scripts/release_notes/apply_categories.py
@@ -1,4 +1,4 @@
-# Quick scipt to apply categorized items to the
+# Quick script to apply categorized items to the
 # base commitlist . Useful if you are refactoring any code
 # but want to keep the previous data on categories
 
diff --git a/scripts/release_notes/classifier.py b/scripts/release_notes/classifier.py
index c64bad818e4e..a517ea7e77da 100644
--- a/scripts/release_notes/classifier.py
+++ b/scripts/release_notes/classifier.py
@@ -156,9 +156,9 @@ class CommitClassifier(nn.Module):
         elif isinstance(most_likely_index, torch.Tensor):
             return [self.categories[i] for i in most_likely_index]
 
-    def get_most_likely_category_name(self, inpt):
+    def get_most_likely_category_name(self, input):
         # Input will be a dict with title and author keys
-        logits = self.forward(inpt)
+        logits = self.forward(input)
         most_likely_index = torch.argmax(logits, dim=1)
         return self.convert_index_to_category_name(most_likely_index)
 
@@ -264,9 +264,9 @@ def generate_batch(batch):
 
 
 def train_step(batch, model, optimizer, loss):
-    inpt, targets = batch
+    input, targets = batch
     optimizer.zero_grad()
-    output = model(inpt)
+    output = model(input)
     l = loss(output, targets)
     l.backward()
     optimizer.step()
@@ -275,8 +275,8 @@ def train_step(batch, model, optimizer, loss):
 
 @torch.no_grad()
 def eval_step(batch, model, loss):
-    inpt, targets = batch
-    output = model(inpt)
+    input, targets = batch
+    output = model(input)
     l = loss(output, targets)
     return l
 
diff --git a/tools/linter/dictionary.txt b/tools/linter/dictionary.txt
index d825aec23c04..cdb8d4571239 100644
--- a/tools/linter/dictionary.txt
+++ b/tools/linter/dictionary.txt
@@ -1,4 +1,7 @@
 coo
+Din
+Dout
+dOut
 fro
 froms
 hsa