From fc0376e8b162d8fcf4375ae30566139eae7d48ed Mon Sep 17 00:00:00 2001
From: Xuehai Pan <XuehaiPan@pku.edu.cn>
Date: Wed, 9 Jul 2025 13:23:55 +0800
Subject: [PATCH] [BE][2/6] fix typos in test/ (test/test_*.py) (#157636)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/157636
Approved by: https://github.com/yewentao256, https://github.com/mlazos
ghstack dependencies: #156311, #156609
---
 .lintrunner.toml                          |  1 -
 test/test_autograd.py                     | 16 ++++-----
 test/test_binary_ufuncs.py                |  4 +--
 test/test_cpp_extensions_jit.py           | 10 +++---
 test/test_cuda.py                         | 22 ++++++------
 test/test_cuda_multigpu.py                |  4 +--
 test/test_cuda_nvml_based_avail.py        |  2 +-
 test/test_cuda_primary_ctx.py             |  2 +-
 test/test_custom_ops.py                   |  4 +--
 test/test_dataloader.py                   |  6 ++--
 test/test_datapipe.py                     |  6 ++--
 test/test_decomp.py                       |  2 +-
 test/test_dynamic_shapes.py               |  6 ++--
 test/test_fake_tensor.py                  |  6 ++--
 test/test_functionalization.py            |  4 +--
 test/test_functionalization_of_rng_ops.py |  4 +--
 test/test_fx.py                           |  6 ++--
 test/test_fx_passes.py                    | 10 +++---
 test/test_fx_reinplace_pass.py            |  2 +-
 test/test_indexing.py                     |  2 +-
 test/test_jit.py                          | 14 ++++----
 test/test_jit_autocast.py                 |  2 +-
 test/test_jit_fuser_te.py                 |  2 +-
 test/test_legacy_vmap.py                  |  2 +-
 test/test_license.py                      |  2 +-
 test/test_linalg.py                       | 42 +++++++++++------------
 test/test_masked.py                       |  2 +-
 test/test_matmul_cuda.py                  |  6 ++--
 test/test_meta.py                         |  2 +-
 test/test_mkldnn.py                       |  2 +-
 test/test_mps.py                          | 24 ++++++-------
 test/test_nestedtensor.py                 |  2 +-
 test/test_nn.py                           | 10 +++---
 test/test_openreg.py                      |  4 +--
 test/test_ops.py                          |  8 ++---
 test/test_ops_jit.py                      |  2 +-
 test/test_overrides.py                    |  4 +--
 test/test_per_overload_api.py             |  2 +-
 test/test_public_bindings.py              |  4 +--
 test/test_python_dispatch.py              |  2 +-
 test/test_reductions.py                   | 16 ++++-----
 test/test_scatter_gather_ops.py           |  2 +-
 test/test_segment_reductions.py           |  2 +-
 test/test_serialization.py                |  2 +-
 test/test_sparse.py                       | 26 +++++++-------
 test/test_sparse_csr.py                   | 14 ++++----
 test/test_stateless.py                    |  4 +--
 test/test_sympy_utils.py                  |  2 +-
 test/test_tensor_creation_ops.py          |  2 +-
 test/test_tensorexpr.py                   |  4 +--
 test/test_torch.py                        | 16 ++++-----
 test/test_transformers.py                 |  8 ++---
 test/test_type_promotion.py               |  2 +-
 test/test_typing.py                       |  4 +--
 test/test_unary_ufuncs.py                 | 24 ++++++-------
 test/test_view_ops.py                     |  2 +-
 test/test_weak.py                         |  2 +-
 57 files changed, 194 insertions(+), 195 deletions(-)

diff --git a/.lintrunner.toml b/.lintrunner.toml
index 487559a1d9b2..58e40fb3f14a 100644
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@@ -1167,7 +1167,6 @@ exclude_patterns = [
     'aten/src/ATen/native/[a-pA-P]*/**',
     'aten/src/ATen/[a-mA-M]*/**',
     'test/**',
-    'test/test_*',
     'test/[a-hA-h]*/**',
     'test/distributed/**',
     'torch/_*/**',
diff --git a/test/test_autograd.py b/test/test_autograd.py
index 6c9241192fc0..01929a276f56 100644
--- a/test/test_autograd.py
+++ b/test/test_autograd.py
@@ -4129,7 +4129,7 @@ class TestAutograd(TestCase):
         self.assertIsNone(y.grad_fn)
 
     def test_backward_copy(self):
-        # This tests checks backward engine for a very subtle bug that appreared
+        # This tests checks backward engine for a very subtle bug that appeared
         # in one of the initial versions of autograd. Gradients tensors were
         # simply stored in lists while the function waited for all its gradients
         # to be computed. However, sometimes an output was used multiple times,
@@ -4312,7 +4312,7 @@ class TestAutograd(TestCase):
                     ctx.output_var.sum().backward()
                 return ctx.x.grad * grad_output
 
-        # Reentrant starts on CPU thread, finishs on GPU thread
+        # Reentrant starts on CPU thread, finishes on GPU thread
         x = torch.randn(2, 2, requires_grad=True)
         out = Reenter.apply(x)
         out.sum().backward()
@@ -10728,7 +10728,7 @@ class TestAutogradForwardMode(TestCase):
             dual = fwAD.make_dual(foo, tangent)
             self.assertFalse(tangent_ref.expired())
 
-            # Make sure that the tangent we provided has been re-used as is
+            # Make sure that the tangent we provided has been reused as is
             self.assertTrue(fwAD.unpack_dual(dual)[1] is tangent)
 
             # Make sure that dual is keeping the tangent alive
@@ -11087,7 +11087,7 @@ class TestAutogradForwardMode(TestCase):
             self.assertEqual(
                 dual_tangent.storage().data_ptr(), bar.storage().data_ptr()
             )
-            # And the tangent is actually re-used as-is so it is still the same Tensor
+            # And the tangent is actually reused as-is so it is still the same Tensor
             self.assertIs(dual_tangent, bar)
 
             # Ensure we properly share the version counter
@@ -11969,19 +11969,19 @@ class TestAutogradDeviceType(TestCase):
                         (new_param**2).sum().backward()
                 return grad_output
 
-        # Reentrant starts on GPU thread, finishs on GPU thread
+        # Reentrant starts on GPU thread, finishes on GPU thread
         x = torch.randn(2, 2, device=device, requires_grad=True)
         out = ReentrantFunc.apply(x)
         out.sum().backward()
 
-        # Reentrant starts on CPU thread, finishs on GPU thread
+        # Reentrant starts on CPU thread, finishes on GPU thread
         x = torch.randn(2, 2, requires_grad=True)
         # set ReentrantFunc node to GPU to emit tasks to GPU queue
         ReentrantFunc._cpu_mode = False
         out = ReentrantFunc.apply(x)
         out.sum().backward()
 
-        # Reentrant starts on GPU thread, finishs on CPU thread
+        # Reentrant starts on GPU thread, finishes on CPU thread
         x = torch.randn(2, 2, device=device, requires_grad=True)
         # set ReentrantFunc node to CPU to emit tasks to CPU queue
         ReentrantFunc._cpu_mode = True
@@ -13665,7 +13665,7 @@ class TestMultithreadAutograd(TestCase):
                     y = x * x
                     if torch.cuda.device_count() >= 2:
                         # DataParallel is calling the forward in different threads
-                        # without progating TLS, so hooks should not be called here
+                        # without propagating TLS, so hooks should not be called here
                         _self.assertEqual(len(w), 0)
                     else:
                         # DataParallel only uses one thread
diff --git a/test/test_binary_ufuncs.py b/test/test_binary_ufuncs.py
index 05226def3b43..1c31d5445f91 100644
--- a/test/test_binary_ufuncs.py
+++ b/test/test_binary_ufuncs.py
@@ -79,7 +79,7 @@ if TEST_SCIPY:
 class TestBinaryUfuncs(TestCase):
     # Generic tests for elementwise binary (AKA binary universal (u) functions (funcs))
     # TODO: below contiguous tensor results are compared with a variety of noncontiguous results.
-    #   It would be interesting to have the lhs and rhs have different discontiguities.
+    #   It would be interesting to have the lhs and rhs have different discontinuities.
 
     # Helper for comparing torch tensors and NumPy arrays
     # TODO: should this or assertEqual also validate that strides are equal?
@@ -2521,7 +2521,7 @@ class TestBinaryUfuncs(TestCase):
             # Verify Value
             self.assertEqual(torch_result, expected)
             # Verify Sign
-            # Use double copysign to verify the correctnes of 0.0 and -0.0, since
+            # Use double copysign to verify the correctness of 0.0 and -0.0, since
             # it always True for self.assertEqual(0.0 == -0.0). So, we use 1 as the
             # magnitude to verify the sign between torch and numpy results, elementwise.
             # Special case: NaN conversions between FP32 and FP16 is not bitwise
diff --git a/test/test_cpp_extensions_jit.py b/test/test_cpp_extensions_jit.py
index c7e104963fa6..d671e3f874c9 100644
--- a/test/test_cpp_extensions_jit.py
+++ b/test/test_cpp_extensions_jit.py
@@ -1031,7 +1031,7 @@ class TestCppExtensionJIT(common.TestCase):
         t = torch.rand(2).double()
         cpp_tensor_name = r"CPUDoubleType"
 
-        # Without error handling, the warnings cannot be catched
+        # Without error handling, the warnings cannot be caught
         warn_mod = torch.utils.cpp_extension.load_inline(
             name="warn_mod",
             cpp_sources=[source],
@@ -1065,23 +1065,23 @@ class TestCppExtensionJIT(common.TestCase):
         )
 
         with warnings.catch_warnings(record=True) as w:
-            # Catched with no error should be detected
+            # Caught with no error should be detected
             warn_mod.foo(t, 0)
             self.assertEqual(len(w), 1)
 
-            # Catched with cpp error should also be detected
+            # Caught with cpp error should also be detected
             with self.assertRaisesRegex(TypeError, t.type()):
                 warn_mod.foo(t, 1)
             self.assertEqual(len(w), 2)
 
-            # Catched with python error should also be detected
+            # Caught with python error should also be detected
             with self.assertRaisesRegex(
                 SystemError, "bad argument to internal function"
             ):
                 warn_mod.foo(t, 2)
             self.assertEqual(len(w), 3)
 
-            # Catched with pybind error should also be detected
+            # Caught with pybind error should also be detected
             # Note that there is no type name translation for pybind errors
             with self.assertRaisesRegex(KeyError, cpp_tensor_name):
                 warn_mod.foo(t, 3)
diff --git a/test/test_cuda.py b/test/test_cuda.py
index d8375fe9429b..e190d827fdda 100644
--- a/test/test_cuda.py
+++ b/test/test_cuda.py
@@ -795,7 +795,7 @@ print(t.is_pinned())
             os.environ["TORCH_ALLOW_TF32_CUBLAS_OVERRIDE"]
         )
         # this is really just checking that the environment variable is respected during testing
-        # and not overwritten by another function that doesn't revert it to the intitial value
+        # and not overwritten by another function that doesn't revert it to the initial value
         if not skip_tf32_cublas:
             self.assertFalse(torch.backends.cuda.matmul.allow_tf32)
             self.assertEqual(torch.get_float32_matmul_precision(), "highest")
@@ -1143,7 +1143,7 @@ print(t.is_pinned())
             tmp2 = torch.cuda.FloatTensor(t.size())
             tmp2.zero_()
             self.assertNotEqual(
-                tmp2.data_ptr(), ptr[0], msg="allocation re-used to soon"
+                tmp2.data_ptr(), ptr[0], msg="allocation reused to soon"
             )
 
         self.assertEqual(result.tolist(), [1, 2, 3, 4])
@@ -1154,7 +1154,7 @@ print(t.is_pinned())
             torch.cuda.current_stream().synchronize()
             with torch.cuda.stream(stream):
                 tmp3 = torch.cuda.FloatTensor(t.size())
-                self.assertEqual(tmp3.data_ptr(), ptr[0], msg="allocation not re-used")
+                self.assertEqual(tmp3.data_ptr(), ptr[0], msg="allocation not reused")
 
     def test_record_stream_on_shifted_view(self):
         # See issue #27366
@@ -1235,20 +1235,20 @@ print(t.is_pinned())
     def test_caching_pinned_memory(self):
         cycles_per_ms = get_cycles_per_ms()
 
-        # check that allocations are re-used after deletion
+        # check that allocations are reused after deletion
         t = torch.FloatTensor([1]).pin_memory()
         ptr = t.data_ptr()
         del t
         t = torch.FloatTensor([1]).pin_memory()
         self.assertEqual(t.data_ptr(), ptr, msg="allocation not reused")
 
-        # check that the allocation is not re-used if it's in-use by a copy
+        # check that the allocation is not reused if it's in-use by a copy
         gpu_tensor = torch.cuda.FloatTensor([0])
         torch.cuda._sleep(int(1000 * cycles_per_ms))  # delay the copy by 1s
         gpu_tensor.copy_(t, non_blocking=True)
         del t
         t = torch.FloatTensor([1]).pin_memory()
-        self.assertNotEqual(t.data_ptr(), ptr, msg="allocation re-used too soon")
+        self.assertNotEqual(t.data_ptr(), ptr, msg="allocation reused too soon")
         self.assertEqual(list(gpu_tensor), [1])
 
     def test_caching_allocator_record_stream_oom(self):
@@ -1263,7 +1263,7 @@ print(t.is_pinned())
             x = torch.empty(40 * 1024 * 1024, device="cuda")
             with torch.cuda.stream(stream):
                 y += x
-            # delays re-use of `x` until after all operations in `stream`
+            # delays reuse of `x` until after all operations in `stream`
             x.record_stream(stream)
             del x
 
@@ -2970,7 +2970,7 @@ exit(2)
                     current = postcapture_stats[stat] - precapture_stats[stat]
 
                     # There will only ever be one expandable segment in each of the small and large pools. The way the
-                    # bookeeping is done in the allocator means that we never increment the number of segments.
+                    # bookkeeping is done in the allocator means that we never increment the number of segments.
                     if self.expandable_segments and "segment" in stat:
                         expected = 0
                     # These two cases hit an edge case where the PyTorch allocator won't immediately unmap part of an
@@ -3011,7 +3011,7 @@ exit(2)
                 current = postdel_stats[stat] - precapture_stats[stat]
 
                 # There will only ever be one expandable segment in each of the small and large pools. The way the
-                # bookeeping is done in the allocator means that we never increment the number of segments.
+                # bookkeeping is done in the allocator means that we never increment the number of segments.
                 if self.expandable_segments and "segment" in stat:
                     expected = 0
                 # These two cases hit an edge case where the PyTorch allocator won't immediately unmap part of an
@@ -3648,7 +3648,7 @@ exit(2)
         graph.replay()
         self.assertTrue(torch.all(x == 3.0))
 
-        # Check that graph capture can succeed after reseting.
+        # Check that graph capture can succeed after resetting.
         graph.reset()
 
         # Don't do x[:] = 0.0 because we want to capture a new address
@@ -5382,7 +5382,7 @@ class TestMemPool(TestCase):
             out_2 = torch.randn(nelem_1mb, device="cuda")
 
             # pool now should have 2 segments since the CUDACachingAllocator had
-            # to make a new 2 MB buffer to accomodate out_2
+            # to make a new 2 MB buffer to accommodate out_2
             self.assertEqual(len(pool.snapshot()), 2)
 
         self.assertEqual(len(pool.snapshot()), 2)
diff --git a/test/test_cuda_multigpu.py b/test/test_cuda_multigpu.py
index a5bc0e7ed0d0..2882b0f58808 100644
--- a/test/test_cuda_multigpu.py
+++ b/test/test_cuda_multigpu.py
@@ -967,7 +967,7 @@ class TestCudaMultiGPU(TestCase):
 
     @unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
     def test_caching_pinned_memory_multi_gpu(self):
-        # checks that the events preventing pinned memory from being re-used
+        # checks that the events preventing pinned memory from being reused
         # too early are recorded on the correct GPU
         cycles_per_ms = get_cycles_per_ms()
 
@@ -982,7 +982,7 @@ class TestCudaMultiGPU(TestCase):
 
         del t
         t = torch.FloatTensor([2]).pin_memory()
-        self.assertNotEqual(t.data_ptr(), ptr, msg="allocation re-used too soon")
+        self.assertNotEqual(t.data_ptr(), ptr, msg="allocation reused too soon")
 
         with torch.cuda.device(0):
             gpu_tensor0.copy_(t, non_blocking=True)
diff --git a/test/test_cuda_nvml_based_avail.py b/test/test_cuda_nvml_based_avail.py
index f890c35e15a1..c47607f4c7ac 100644
--- a/test/test_cuda_nvml_based_avail.py
+++ b/test/test_cuda_nvml_based_avail.py
@@ -138,7 +138,7 @@ class TestVisibleDeviceParses(TestCase):
             _transform_uuid_to_ordinals(["GPU-9e8d35e3", "GPU-123", "GPU-47"], uuids),
             [1],
         )
-        # First ambigous UUID aborts parsing
+        # First ambiguous UUID aborts parsing
         self.assertEqual(
             _transform_uuid_to_ordinals(["GPU-9e8d35e3", "GPU-e", "GPU-47"], uuids), [1]
         )
diff --git a/test/test_cuda_primary_ctx.py b/test/test_cuda_primary_ctx.py
index 9b044458b3e6..284d048e9e08 100644
--- a/test/test_cuda_primary_ctx.py
+++ b/test/test_cuda_primary_ctx.py
@@ -42,7 +42,7 @@ class TestCudaPrimaryCtx(TestCase):
         self.assertFalse(torch._C._cuda_hasPrimaryContext(0))
         torch.cuda.set_device(0)
         if _get_torch_cuda_version() >= (12, 0):
-            # Now after the device was set, the contex should present in CUDA 12.
+            # Now after the device was set, the context should present in CUDA 12.
             self.assertTrue(torch._C._cuda_hasPrimaryContext(0))
         else:
             # In CUDA 11 the context should not be created.
diff --git a/test/test_custom_ops.py b/test/test_custom_ops.py
index f9d231a7df85..bfb8dab25318 100644
--- a/test/test_custom_ops.py
+++ b/test/test_custom_ops.py
@@ -630,7 +630,7 @@ def _(x):
                 g(x)
 
     def test_invalid_schemas(self):
-        # function schmea validation goes through torchgen, so this is just a
+        # function schema validation goes through torchgen, so this is just a
         # basic test.
         with self.assertRaisesRegex(AssertionError, "Invalid function schema: foo"):
             custom_ops.custom_op(f"{TestCustomOp.test_ns}::foo", "(")
@@ -2712,7 +2712,7 @@ class TestCustomOpAPI(TestCase):
                 self.assertEqual(ctx.needs_input_grad, expected)
                 return list(grad.unbind(0))
 
-        # call two applys, do a backward on the first
+        # call two applies, do a backward on the first
         def t():
             return torch.randn([], requires_grad=True)
 
diff --git a/test/test_dataloader.py b/test/test_dataloader.py
index a0745deae987..931c32774004 100644
--- a/test/test_dataloader.py
+++ b/test/test_dataloader.py
@@ -734,12 +734,12 @@ class SleepDataset(Dataset):
     def __init__(self, size, sleep_sec):
         self.size = size
         self.sleep_sec = sleep_sec
-        self.sleeped = False
+        self.slept = False
 
     def __getitem__(self, idx):
-        if not self.sleeped:
+        if not self.slept:
             time.sleep(self.sleep_sec)
-            self.sleeped = True
+            self.slept = True
         return idx
 
     def __len__(self):
diff --git a/test/test_datapipe.py b/test/test_datapipe.py
index da335db2eb59..2a57bef2075b 100644
--- a/test/test_datapipe.py
+++ b/test/test_datapipe.py
@@ -573,7 +573,7 @@ class TestCaptureDataFrame(TestCase):
 
 class TestDataFramesPipes(TestCase):
     """
-    Most of test will fail if pandas instaled, but no dill available.
+    Most of test will fail if pandas installed, but no dill available.
     Need to rework them to avoid multiple skips.
     """
 
@@ -1887,7 +1887,7 @@ class TestFunctionalIterDataPipe(TestCase):
         with self.assertRaises(ValueError):
             list(filter_dp)
 
-        # Funtional Test: Specify input_col
+        # Functional Test: Specify input_col
         tuple_input_ds = dp.iter.IterableWrapper([(d - 1, d, d + 1) for d in range(10)])
 
         # Single input_col
@@ -3356,7 +3356,7 @@ class TestSharding(TestCase):
         with self.assertRaises(Exception):
             dp.apply_sharding(2, 1, sharding_group=SHARDING_PRIORITIES.DEFAULT)
 
-    # Test tud.datapipes.iter.grouping.SHARDING_PRIORITIES for backward compatbility
+    # Test tud.datapipes.iter.grouping.SHARDING_PRIORITIES for backward compatibility
     # TODO: Remove this test once tud.datapipes.iter.grouping.SHARDING_PRIORITIES is deprecated
     def test_sharding_groups_in_legacy_grouping_package(self):
         with self.assertWarnsRegex(
diff --git a/test/test_decomp.py b/test/test_decomp.py
index 07dcd8252c5b..5d641e32e422 100644
--- a/test/test_decomp.py
+++ b/test/test_decomp.py
@@ -854,7 +854,7 @@ def forward(self, scores_1, mask_1, value_1):
             #  de-functionalise the graph, as that would break AoTAutograd
             # We run the real function *after* the decomposition to make sure that the
             # decomposition does not modify any of the inputs in-place. If it does
-            # real_out should be differen than decom_out so we should catch this
+            # real_out should be different than decom_out so we should catch this
             real_out_unflat = func(*args, **kwargs)
             real_out = pytree.tree_leaves(real_out_unflat)
 
diff --git a/test/test_dynamic_shapes.py b/test/test_dynamic_shapes.py
index f9fc61af81d4..0f299cd6b6c7 100644
--- a/test/test_dynamic_shapes.py
+++ b/test/test_dynamic_shapes.py
@@ -3286,7 +3286,7 @@ def forward(self, arg0_1: "i64[1][1]cpu", arg1_1: "Sym(u1)", arg2_1: "i64[u1][1]
     def test_unbacked_reshape2(self):
         cnt = CompileCounterWithBackend("inductor")
 
-        # This reshape requires a clone when the input is not contiguous and we cant compute strides.
+        # This reshape requires a clone when the input is not contiguous and we can't compute strides.
         # reshape (u2, u3) -> (u0, u1)
         def func(x, y):
             u0, u1 = y.tolist()
@@ -3421,7 +3421,7 @@ def forward(self, arg0_1: "i64[2][1]cpu", arg1_1: "Sym(u2)", arg2_1: "Sym(u3)",
     def test_invalid_view_unbacked_view(self):
         cnt = CompileCounterWithBackend("inductor")
 
-        # This view (u2, u3) -> (u0, u1) cant happen in general unless we know that input is contigous or we have
+        # This view (u2, u3) -> (u0, u1) can't happen in general unless we know that input is contiguous or we have
         # hints to to compute strides.
         def func(x, y):
             u0, u1 = y.tolist()
@@ -3452,7 +3452,7 @@ def forward(self, arg0_1: "i64[2][1]cpu", arg1_1: "Sym(u2)", arg2_1: "Sym(u3)",
 
         func(torch.ones(5, 6, 7, 8))
         self.assertEqual(cnt.frame_count, 1)
-        # it can be dynamic in all dimentions except dim=2
+        # it can be dynamic in all dimensions except dim=2
         func(torch.ones(4, 9, 7, 10))
         self.assertEqual(cnt.frame_count, 1)
 
diff --git a/test/test_fake_tensor.py b/test/test_fake_tensor.py
index 017e41c114ee..b512920981e8 100644
--- a/test/test_fake_tensor.py
+++ b/test/test_fake_tensor.py
@@ -97,7 +97,7 @@ class FakeTensorTest(TestCase):
 
     @unittest.skipIf(not RUN_CUDA, "requires cuda")
     def test_cuda_initialized(self):
-        # doesnt error
+        # doesn't error
         with FakeTensorMode():
             p = torch.randn(4, 2, requires_grad=True, device="cuda")
             x = torch.randn(8, 4, device="cuda")
@@ -1471,7 +1471,7 @@ class FakeTensorOperatorInvariants(TestCase):
                 with torch._subclasses.CrossRefFakeMode():
                     Repro()(*args)
             except MetadataMismatchError as e:
-                # We expect the cross ref to succed for the first output to fail
+                # We expect the cross ref to succeed for the first output to fail
                 # for the rng state, see Note [Seed and Offset]
                 self.assertTrue("output[0]" not in str(e))
                 if self.__class__.__name__.startswith("PropagateRealTensors"):
@@ -2327,7 +2327,7 @@ class FakeTensorDispatchCache(TestCase):
             self.assertEqual(len(backend.fw_graphs), 1)
             mod = backend.fw_graphs[0]
 
-            # Ensure that we see hits everytime
+            # Ensure that we see hits every time
             with FakeTensorMode():
                 x = torch.randn(6, 4)
                 y = torch.randn(6, 4)
diff --git a/test/test_functionalization.py b/test/test_functionalization.py
index ed74465369e0..65e74297a531 100644
--- a/test/test_functionalization.py
+++ b/test/test_functionalization.py
@@ -199,7 +199,7 @@ class TestFunctionalization(TestCase):
             y.set_(x.storage())
             return y
 
-        # We should probaby get the crossref test to work,
+        # We should probably get the crossref test to work,
         # but fixing it for Storage() objects is annoying.
         r = _functionalize(f, reapply_views=True, crossref=False)(torch.ones(2))
         self.assertEqual(str(r.device), "cpu")
@@ -2318,7 +2318,7 @@ def forward(self, arg0_1):
     ]
 )
 @unittest.skipIf(
-    TEST_WITH_TORCHDYNAMO, "dynamo-ing code with proxy + fake doesnt work well"
+    TEST_WITH_TORCHDYNAMO, "dynamo-ing code with proxy + fake doesn't work well"
 )
 class TestCrossRefFunctionalization(TestFunctionalization):
     crossref = True
diff --git a/test/test_functionalization_of_rng_ops.py b/test/test_functionalization_of_rng_ops.py
index 3cc9f2722020..9b4542500d50 100644
--- a/test/test_functionalization_of_rng_ops.py
+++ b/test/test_functionalization_of_rng_ops.py
@@ -302,7 +302,7 @@ class TestFunctionalizationRngOps(TestCase):
         fwd_compiler = functools.partial(count_philox_rand, freq=1)
         bwd_compiler = functools.partial(count_philox_rand, freq=0)
         aot_fn = aot_function(fn, fwd_compiler, bwd_compiler)
-        # We cant check accuracy here because rand_like generated different rand numbers than dropout
+        # We can't check accuracy here because rand_like generated different rand numbers than dropout
         res = aot_fn(x, y)
         res.sum().backward()
 
@@ -316,7 +316,7 @@ class TestFunctionalizationRngOps(TestCase):
 
         # Ensure the decomp is happening
         aot_fn = aot_function(fn, functools.partial(count_philox_rand, freq=1))
-        # We cant check accuracy here because rand_like generated different rand numbers than dropout
+        # We can't check accuracy here because rand_like generated different rand numbers than dropout
         aot_fn(x)
 
 
diff --git a/test/test_fx.py b/test/test_fx.py
index 73cb89b6ae3d..19836147495f 100644
--- a/test/test_fx.py
+++ b/test/test_fx.py
@@ -908,7 +908,7 @@ class TestFX(JitTestCase):
             wrapper = WrapperModule(interpreter)
 
             # Create a graph that: 1) Takes function arguments 2) Invokes the interpreter
-            # 3) Returns the speficied return value
+            # 3) Returns the specified return value
 
             # FIXME: The following code could be greatly simplified by symbolic_trace'ing
             # the wrapper with a Tracer that considers the Wrapper instance a root
@@ -2225,8 +2225,8 @@ class TestFX(JitTestCase):
         foo_scripted = torch.jit.script(Foo())
         foo_scripted(Pair(torch.rand(5), torch.rand(5)), torch.rand(5), 3)
 
-        fxed = symbolic_trace(Foo())
-        fxed_scripted = torch.jit.script(fxed)
+        fixed = symbolic_trace(Foo())
+        fxed_scripted = torch.jit.script(fixed)
         fxed_scripted(Pair(torch.rand(5), torch.rand(5)), torch.rand(5), 3)
 
     def test_fn_type_annotation_empty(self):
diff --git a/test/test_fx_passes.py b/test/test_fx_passes.py
index 6d85b5faa53f..be22f8e61e50 100644
--- a/test/test_fx_passes.py
+++ b/test/test_fx_passes.py
@@ -110,7 +110,7 @@ class TestPartitionFunctions:
 
     @staticmethod
     def forward6(a, b, c):
-        # add should have its own partition, as neither branchs are supported
+        # add should have its own partition, as neither branches are supported
         add = a + 1
         # left branch
         relu = add.relu()
@@ -283,7 +283,7 @@ class TestFXGraphPasses(JitTestCase):
         (TestPartitionFunctions.forward15, [['add_1', 'add', 'permute_1', 'view', 'permute_2', 'permute_3', 'permute']], False),
         (TestPartitionFunctions.forward16, [["permute_1", "add_1", "add"]], True),
         (TestPartitionFunctions.forward16, [['add_1', 'add', 'permute_1', 'view', 'permute_2', 'permute_3', 'permute']], False),
-        # should be empty partition, not a partiton with empty nodes
+        # should be empty partition, not a partition with empty nodes
         (TestPartitionFunctions.forward18, [], False),
     ])
     def test_partitioner(self, fn, expected_partition, bookend_non_compute_pass):
@@ -344,9 +344,9 @@ class TestFXGraphPasses(JitTestCase):
         [['add', 'add_1', 'add_2']],  # vertical fusion
         [['add_2', 'add_3']],         # horizontal fusion
         [['add_3', 'add_4']],
-        [['add_6', 'add_5']],     # arbitray node order
-        [['add_4', 'add_1', 'add_3', 'add_2']],           # arbitray node order
-        [['add_5', 'add_6'], ['add_1', 'add_2', 'add_3', 'add_4']],  # arbitray partition order
+        [['add_6', 'add_5']],     # arbitrary node order
+        [['add_4', 'add_1', 'add_3', 'add_2']],           # arbitrary node order
+        [['add_5', 'add_6'], ['add_1', 'add_2', 'add_3', 'add_4']],  # arbitrary partition order
         [['add_5', 'linear2']],   # includes call_function + call_module node
         [['add_6', 'relu']],   # includes call_function + call_module node
         [['param', 'add_2']],   # includes get_attr + call_module nodes
diff --git a/test/test_fx_reinplace_pass.py b/test/test_fx_reinplace_pass.py
index 6d7258deb487..5db11af8e47a 100644
--- a/test/test_fx_reinplace_pass.py
+++ b/test/test_fx_reinplace_pass.py
@@ -43,7 +43,7 @@ def forward(self, x_1):
         def f(x):
             a = x.clone()
             a_view = a.view(-1)
-            # We shouldn't re-inplace the first add(), because an alias of a is re-used later in the program
+            # We shouldn't re-inplace the first add(), because an alias of a is reused later in the program
             b = a.add(1)  # noqa: F841
 
             # Second add() is fine to re-inplace
diff --git a/test/test_indexing.py b/test/test_indexing.py
index 987b3caa8108..fa7de92b9829 100644
--- a/test/test_indexing.py
+++ b/test/test_indexing.py
@@ -231,7 +231,7 @@ class TestIndexing(TestCase):
                 x[ri([0, 2, 4]),], torch.tensor([5, 4, 3], dtype=dtype, device=device)
             )
 
-        # Only validates indexing and setting for halfs
+        # Only validates indexing and setting for Halfs
         if dtype == torch.half:
             reference = consec((10,))
             validate_indexing(reference)
diff --git a/test/test_jit.py b/test/test_jit.py
index 16a66687e730..c86fb111bfb8 100644
--- a/test/test_jit.py
+++ b/test/test_jit.py
@@ -4764,7 +4764,7 @@ a")
         self.assertIsNot(fun_compiled, fun_compiled_2)
         self.assertEqual(fun_compiled_2(), 7)
 
-        # caching doesnt increase refcounts to function (holds weak reference)
+        # caching doesn't increase refcounts to function (holds weak reference)
         self.assertTrue(sys.getrefcount(fun), num_ref_counts)
 
     def test_string_ops(self):
@@ -7374,7 +7374,7 @@ a")
                     # tensor from empty list is type float in python and annotated type in torchscript
                     if "annotate" in li and "dtype" not in option:
                         continue
-                    # Skip unsigned tensor initializaton for signed values on 3.10
+                    # Skip unsigned tensor initialization for signed values on 3.10
                     if sys.version_info[:2] >= (3, 10) and "torch.uint8" in option and "-" in li:
                         continue
                     code = tensor_template.format(list_create=li, tensor_op=op, options=option)
@@ -7990,7 +7990,7 @@ dedent """
                 m += k
             return m
 
-        # use of k tests the pathway where we have to insert unitialized
+        # use of k tests the pathway where we have to insert uninitialized
         self.checkScript(test_varexit, (3,))
         self.checkScript(test_varexit, (2,))
 
@@ -10066,7 +10066,7 @@ dedent """
         buffer = io.BytesIO()
         torch.jit.save(cm, buffer)
         buffer.seek(0)
-        # when tensor is loaded as constant it isnt specialized
+        # when tensor is loaded as constant it isn't specialized
         cm_load = torch.jit.load(buffer)
         FileCheck().check_not("Float(1, 3)").run(cm_load.forward.graph)
 
@@ -10300,7 +10300,7 @@ dedent """
 
     def test_type_inferred_from_empty_annotation(self):
         """
-        Test that the type inferred from an empty or missing annotation is Torch.Tensor wtih `inferred=true`
+        Test that the type inferred from an empty or missing annotation is Torch.Tensor with `inferred=true`
         """
         @torch.jit.script
         def fn(x):
@@ -15606,7 +15606,7 @@ dedent """
                 a = hasattr(self, "fee")
                 b = hasattr(self, "foo")
                 c = hasattr(self, "hi")
-                d = hasattr(self, "nonexistant")
+                d = hasattr(self, "nonexistent")
                 return (a, b, c, d)
 
             def foo(self):
@@ -16044,7 +16044,7 @@ EXCLUDE_TYPE_CHECK = {
 # chunk returns a list in scripting and we don't unpack the list,
 # Thus it won't be replaced by ConstantChunk and run AD.
 # It's explicitly checked in test_chunk_constant_script_ad
-# Similary for split, it's replaced by split_with_sizes in tracing,
+# Similarly for split, it's replaced by split_with_sizes in tracing,
 # but we don't have AD formula for aten::split(Tensor, int[], int),
 # an op registered in JIT so AD is not triggered in scripting.
 EXCLUDE_SCRIPT_AD_CHECK = {
diff --git a/test/test_jit_autocast.py b/test/test_jit_autocast.py
index 8b9e360ab53a..b3cf4d9bee8f 100644
--- a/test/test_jit_autocast.py
+++ b/test/test_jit_autocast.py
@@ -319,7 +319,7 @@ class TestAutocast(JitTestCase):
 
     # TODO: fix and enable this test?
     #   (we could technically fix this, but is it really worth it?)
-    @unittest.skipIf(True, "unsuported autocast syntax")
+    @unittest.skipIf(True, "unsupported autocast syntax")
     def test_reused_autocast_expr(self):
         @torch.jit.script
         def fn(a, b, c, d):
diff --git a/test/test_jit_fuser_te.py b/test/test_jit_fuser_te.py
index 17c83cc7264e..8d3a8090c67a 100644
--- a/test/test_jit_fuser_te.py
+++ b/test/test_jit_fuser_te.py
@@ -126,7 +126,7 @@ class TestTEFuser(JitTestCase):
         super().setUp()
         self.tensorexpr_options = TensorExprTestOptions()
 
-        # note: `self.dynamic_shapes` instatiated in specialization of class
+        # note: `self.dynamic_shapes` instantiated in specialization of class
         # defined below
 
         fusion_strategy = [("DYNAMIC", 20)] if self.dynamic_shapes else [("STATIC", 20)]
diff --git a/test/test_legacy_vmap.py b/test/test_legacy_vmap.py
index 882838b6391a..bfd1075b25ed 100644
--- a/test/test_legacy_vmap.py
+++ b/test/test_legacy_vmap.py
@@ -1679,7 +1679,7 @@ class TestVmapOperatorsLegacy(Namespace.TestVmapBaseLegacy):
 
             # Interesting case #2: Batch dim at end of tensor, success cases
             # view_as_complex requires that the dim with size 2 have stride 1
-            # in order for the view to function propertly
+            # in order for the view to function properly
             test(op, [get([B0, 2]).transpose(0, 1)], in_dims=1)
             test(vmap(op, in_dims=1), [get([B0, B1, 2]).movedim(1, 2)])
             test(vmap(op, in_dims=2), [get([B0, 3, B1, 2]).movedim(2, 3)])
diff --git a/test/test_license.py b/test/test_license.py
index 516cb78f1202..6f289a15bb4e 100644
--- a/test/test_license.py
+++ b/test/test_license.py
@@ -45,7 +45,7 @@ class TestLicense(TestCase):
                 'Found too many "torch-*dist-info" directories '
                 f'in "{site_packages}, expected only one'
             )
-        # setuptools renamed *dist-info/LICENSE to *dist-info/licenses/LICENSE sicne 77.0
+        # setuptools renamed *dist-info/LICENSE to *dist-info/licenses/LICENSE since 77.0
         license_file = os.path.join(distinfo[0], "licenses", "LICENSE")
         if not os.path.exists(license_file):
             license_file = os.path.join(distinfo[0], "LICENSE")
diff --git a/test/test_linalg.py b/test/test_linalg.py
index 108a5f590079..abbf7d6f6e9e 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -135,7 +135,7 @@ class TestLinalg(TestCase):
 
     @contextlib.contextmanager
     def _tunableop_ctx(self):
-        # Inialize and then tear down TunableOp
+        # Initialize and then tear down TunableOp
         import glob
         import os
         self._set_tunableop_defaults()
@@ -4261,7 +4261,7 @@ class TestLinalg(TestCase):
             output = torch.einsum(equation, tensors)
             self.assertEqual(output, torch.tensor(expected_output, dtype=torch.float32, device=device))
 
-        # Test equation variantions
+        # Test equation variations
         check(' ', 1, expected_output=1)
         check(' -> ', 1, expected_output=1)
         check(' , ', 2, 2, expected_output=4)
@@ -4770,7 +4770,7 @@ class TestLinalg(TestCase):
         with self._tunableop_ctx():
             torch.cuda.tunable.set_rotating_buffer_size(0)
             # Numerical check adds significant overhead, unsure if this is needed
-            # or if there was a transiet problem at the time.
+            # or if there was a transient problem at the time.
             # if dtype is torch.half:
             #     os.environ["PYTORCH_TUNABLEOP_NUMERICAL_CHECK"] = "1"
             ordinal = torch.cuda.current_device()
@@ -5009,7 +5009,7 @@ class TestLinalg(TestCase):
             torch.cuda.tunable.tune_gemm_in_file(untuned_filename)
             new_results = len(torch.cuda.tunable.get_results())
 
-            # This stores total number of cummulative results
+            # This stores total number of cumulative results
             total_num_results = new_results - ref_results
 
             # Rowwise case will have an extra solution
@@ -5202,7 +5202,7 @@ class TestLinalg(TestCase):
         # Validator,ROCBLAS_VERSION,X.Y,Z
         # Validator,HIPBLASLT_VERSION,X,Y.Z
         # Validator,ROCM_Version,X,Y.Z
-        # Validator,GCN_ARCH_NAME,<architecutre name>
+        # Validator,GCN_ARCH_NAME,<architecture name>
         validator_num_lines = 5
 
         with self._tunableop_ctx():
@@ -5242,7 +5242,7 @@ class TestLinalg(TestCase):
             B = torch.randn(K, M, device=device, dtype=dtype)
             C = torch.matmul(A, B)
 
-            # This stores total number of cummulative results
+            # This stores total number of cumulative results
             total_num_results = len(torch.cuda.tunable.get_results())
 
             # There must be a new tuning result
@@ -5270,7 +5270,7 @@ class TestLinalg(TestCase):
                     B = torch.randn(K, M, device=device, dtype=dtype)
                     C = torch.matmul(A, B)
 
-            # This stores total number of cummulative results
+            # This stores total number of cumulative results
             total_num_results = len(torch.cuda.tunable.get_results())
 
             # Take the difference to calculate the number of results from
@@ -5303,7 +5303,7 @@ class TestLinalg(TestCase):
             B = torch.randn(K, M, device=device, dtype=dtype)
             C = torch.matmul(A, B)
 
-            # This stores total number of cummulative results
+            # This stores total number of cumulative results
             total_num_results = len(torch.cuda.tunable.get_results())
 
             # Take the difference to calculate the number of results from
@@ -5326,7 +5326,7 @@ class TestLinalg(TestCase):
 
             # Take the difference to calculate the number of results from
             # this test. There should be no change in the number of results
-            # since tuning is disabe.
+            # since tuning is disable.
             self.assertEqual((total_num_results - ref_num_results), 0)
 
     @onlyCUDA
@@ -5335,7 +5335,7 @@ class TestLinalg(TestCase):
         # Test that the TunableOp results file is created
         # and is NOT empty.
         # To test this we create a subprocess and then
-        # execut a matmul from within the subprocess
+        # execute a matmul from within the subprocess
         import os
         import multiprocessing as mp
 
@@ -5384,7 +5384,7 @@ class TestLinalg(TestCase):
 
             torch.nn.functional.linear(X, matA, bias)
 
-            # This stores total number of cummulative results
+            # This stores total number of cumulative results
             total_num_results = len(torch.cuda.tunable.get_results())
 
             # There must be a new tuning result
@@ -5438,7 +5438,7 @@ class TestLinalg(TestCase):
             torch.cuda.tunable.tune_gemm_in_file(untuned_filename)
             new_results = len(torch.cuda.tunable.get_results())
 
-            # This stores total number of cummulative results
+            # This stores total number of cumulative results
             total_num_results = new_results - ref_results
 
             # There must be a new tuning results
@@ -5514,7 +5514,7 @@ class TestLinalg(TestCase):
                 scaleB = torch.ones((1, matB.shape[1]), device=device)
                 torch._scaled_mm(matA, matB, scale_a=scaleA, scale_b=scaleB, out_dtype=torch.bfloat16)
 
-            # This stores total number of cummulative results
+            # This stores total number of cumulative results
             total_num_results = len(torch.cuda.tunable.get_results())
 
             # Rowwise case will have an extra solution
@@ -5638,7 +5638,7 @@ class TestLinalg(TestCase):
                 torch.cuda.tunable.tune_gemm_in_file(untuned_filename)
                 new_results = len(torch.cuda.tunable.get_results())
 
-                # This stores total number of cummulative results
+                # This stores total number of cumulative results
                 total_num_results = new_results - ref_results
 
                 # There must be a new tuning results
@@ -5879,7 +5879,7 @@ class TestLinalg(TestCase):
             torch.cuda.tunable.tune_gemm_in_file(untuned_filename)
             new_results = len(torch.cuda.tunable.get_results())
 
-            # This stores total number of cummulative results
+            # This stores total number of cumulative results
             total_num_results = new_results - ref_results
 
             # There must be a new tuning results
@@ -6700,7 +6700,7 @@ class TestLinalg(TestCase):
         with self.assertRaisesRegex(RuntimeError, "torch.int32 dtype"):
             torch.lu_unpack(lu_data, lu_pivots.long())
 
-        # check that onces flags are unset, Nones are returned
+        # check that once flags are unset, Nones are returned
         p, l, u = torch.lu_unpack(lu_data, lu_pivots, unpack_data=False)
         self.assertTrue(l.numel() == 0 and u.numel() == 0)
         p, l, u = torch.lu_unpack(lu_data, lu_pivots, unpack_pivots=False)
@@ -6919,7 +6919,7 @@ class TestLinalg(TestCase):
             lambdas1.append(worker.E[:])
 
         tol = 1e-8
-        # tol for scipy lobpcg will be choosed so that the number of
+        # tol for scipy lobpcg will be chosen so that the number of
         # iterations will be equal or very close to pytorch lobpcg
         # (that is around 170-180)
 
@@ -6999,7 +6999,7 @@ scipy_lobpcg  | {elapsed_scipy_ms:10.2f}  | {elapsed_general_scipy_ms:10.2f}  |
 -(input size: {m:4}, eigenpairs:{k:2}, units: ms per call)-
         ''')
 
-        # Handling of very small tolerence
+        # Handling of very small tolerance
         tol = 1e-100
 
         lambdas1 = []
@@ -8025,7 +8025,7 @@ scipy_lobpcg  | {eq_err_scipy:10.2e}  | {eq_err_general_scipy:10.2e}  | {iters2:
         if self.device_type == 'cuda' and dtype is torch.bfloat16 and not SM53OrLater:
             # cuBLAS does not guarantee BFloat16 support on SM < 53.
             # So on PyTorch, we consider BFloat16 support on SM < 53 as
-            # undefined bahavior
+            # undefined behavior
             return
 
         batch_sizes = [1, 10]
@@ -8138,7 +8138,7 @@ scipy_lobpcg  | {eq_err_scipy:10.2e}  | {eq_err_general_scipy:10.2e}  | {iters2:
         if self.device_type == 'cuda' and dtype is torch.bfloat16 and not SM53OrLater:
             # cuBLAS does not guarantee BFloat16 support on SM < 53.
             # So on PyTorch, we consider BFloat16 support on SM < 53 as
-            # undefined bahavior
+            # undefined behavior
             return
 
         num_batches = 2
@@ -8212,7 +8212,7 @@ scipy_lobpcg  | {eq_err_scipy:10.2e}  | {eq_err_general_scipy:10.2e}  | {iters2:
         if self.device_type == 'cuda' and dtype is torch.bfloat16 and not SM53OrLater:
             # cuBLAS does not guarantee BFloat16 support on SM < 53.
             # So on PyTorch, we consider BFloat16 support on SM < 53 as
-            # undefined bahavior
+            # undefined behavior
             return
 
         num_batches = 10
diff --git a/test/test_masked.py b/test/test_masked.py
index c5aee472a9a8..1c6bd42ab763 100644
--- a/test/test_masked.py
+++ b/test/test_masked.py
@@ -57,7 +57,7 @@ def apply_masked_reduction_along_dim(op, input, *args, **kwargs):
        [[op([1, 2], *args0, **kwargs, dim=None, keepdim=False)]
         [op([3, 4, 5], *args0, **kwargs, dim=None, keepdim=False)]]
 
-      where args0 is args where dim value is replased with None if
+      where args0 is args where dim value is replaced with None if
       present.
 
       Using the same example data, if the op is called with dim=(0, 1)
diff --git a/test/test_matmul_cuda.py b/test/test_matmul_cuda.py
index c7c75cdb7927..cfdea5c203b5 100644
--- a/test/test_matmul_cuda.py
+++ b/test/test_matmul_cuda.py
@@ -595,7 +595,7 @@ class TestMatmulCuda(TestCase):
                     -2, -1
                 )[:, :n, :]
         else:
-            raise AssertionError(f"Invaild op: {op}")
+            raise AssertionError(f"Invalid op: {op}")
 
         C_ref = f_ref(A, B.transpose(-2, -1), offs=offs)
         C = f(A, B.transpose(-2, -1), offs=offs)
@@ -1284,7 +1284,7 @@ class TestFP8Matmul(TestCase):
                 out_dtype=torch.bfloat16,
             )
 
-        # Note re.compile is used, not re.escape. This is to accomodate fn vs fnuz type message.
+        # Note re.compile is used, not re.escape. This is to accommodate fn vs fnuz type message.
         with self.assertRaisesRegex(
             RuntimeError,
             r"Expected b\.dtype\(\) == at::kFloat8_e4m3fnu?z? to be true, but got false\.",
@@ -1754,7 +1754,7 @@ class TestFP8Matmul(TestCase):
 
     # Testing only _scaled_grouped_mm() with multiple shapes, as
     # _scaled_mm() already has more combinations of parameters than
-    # _scaled_grouped_mm(), for supporing more than one inputs layout
+    # _scaled_grouped_mm(), for supporting more than one inputs layout
     # combinations.
 
     @unittest.skipIf(TEST_WITH_ROCM, "ROCm doesn't support CUTLASS")
diff --git a/test/test_meta.py b/test/test_meta.py
index 22bee196a3fd..b5b7cc812192 100644
--- a/test/test_meta.py
+++ b/test/test_meta.py
@@ -1502,7 +1502,7 @@ class TestMeta(TestCase):
     def test_fill__alias_relationship(self):
         inps = torch.rand(2**52, device='meta')
         r = torch.ops.aten.fill_(inps, 1.0)
-        # aten.fill_ returns an aliase
+        # aten.fill_ returns an alias
         self.assertEqual(id(inps), id(r))
 
         # aten.fill returns a new tensor
diff --git a/test/test_mkldnn.py b/test/test_mkldnn.py
index 0f73a71c182a..23788653cc6c 100644
--- a/test/test_mkldnn.py
+++ b/test/test_mkldnn.py
@@ -492,7 +492,7 @@ class TestMkldnn(TestCase):
             C = torch.randint(1, 3, (1,)).item() * groups
             x_shape = (N, C) + input_shapes[dim]
             data = torch.randn(x_shape, dtype=torch.float32)
-            # conv: mkldnn tranpose conv fp32
+            # conv: mkldnn transpose conv fp32
             # conv_ref: thnn transpose conv fp32
             conv = conv_module[dim](in_channels=C,
                                     out_channels=M,
diff --git a/test/test_mps.py b/test/test_mps.py
index 77d6540b765c..5c450b067ebe 100644
--- a/test/test_mps.py
+++ b/test/test_mps.py
@@ -640,7 +640,7 @@ class MPSLeakyReluTest(TestCaseMPS):
         mps_x = cpu_x.detach().clone().to('mps')
 
         if not contiguous and not (0 in shape or len(shape) < 2):
-            # Tranposing will make the tensor non-contiguous
+            # Transposing will make the tensor non-contiguous
             cpu_x = cpu_x.transpose(0, 1)
             mps_x = mps_x.transpose(0, 1)
             assert not mps_x.is_contiguous()
@@ -940,7 +940,7 @@ class TestMPS(TestCaseMPS):
             x.requires_grad = True
             d = torch.cdist(x, y)
             d.backward(dist_grad)
-            # Check that the backward passs does not contain invalid
+            # Check that the backward pass does not contain invalid
             # values such as nan or inf
             assert torch.isfinite(x.grad).all()
 
@@ -1195,7 +1195,7 @@ class TestMPS(TestCaseMPS):
             torch.nn.functional.linear(torch.rand(size, device='mps'),
                                        torch.randint(-10, 10, size, dtype=torch.int8, device='mps'))
 
-        # Weigths on wrong device
+        # Weights on wrong device
         with self.assertRaisesRegex(RuntimeError, "argument weight is on cpu but expected on mps"):
             torch.nn.functional.linear(torch.rand(size, device='mps'),
                                        torch.rand(size, device='cpu'))
@@ -6285,7 +6285,7 @@ class TestMPS(TestCaseMPS):
             x = cpu_x.detach().clone().to('mps')
 
             if not contiguous and (0 not in shape and len(shape) >= 2):
-                # Tranposing will make the tensor non-contiguous
+                # Transposing will make the tensor non-contiguous
                 cpu_x = cpu_x.transpose(0, 1)
                 x = x.transpose(0, 1)
                 assert not x.is_contiguous()
@@ -6441,7 +6441,7 @@ class TestMPS(TestCaseMPS):
             x = cpu_x.detach().clone().to('mps')
 
             if not contiguous and (0 not in shape and len(shape) >= 2):
-                # Tranposing will make the tensor non-contiguous
+                # Transposing will make the tensor non-contiguous
                 cpu_x = cpu_x.transpose(0, 1)
                 x = x.transpose(0, 1)
                 assert not x.is_contiguous()
@@ -6481,7 +6481,7 @@ class TestMPS(TestCaseMPS):
             x = cpu_x.detach().clone().to('mps')
 
             if not contiguous and (0 not in shape and len(shape) >= 2):
-                # Tranposing will make the tensor non-contiguous
+                # Transposing will make the tensor non-contiguous
                 cpu_x = cpu_x.transpose(0, 1)
                 x = x.transpose(0, 1)
                 assert not x.is_contiguous()
@@ -7706,13 +7706,13 @@ class TestMPS(TestCaseMPS):
     # Test exponential
     @unittest.skip("This does not test anything")
     def test_exponential(self):
-        def helper(shape, lamda, dtype=torch.float32):
+        def helper(shape, lambda_, dtype=torch.float32):
 
             mps_out = torch.zeros(shape, device='mps', dtype=dtype)
-            mps_out.exponential_(lamda)
+            mps_out.exponential_(lambda_)
 
-            print(mps_out.to('cpu').float().mean(), 1 / lamda)
-            print(mps_out.to('cpu').float().std() ** 2, 1 / (lamda**2))
+            print(mps_out.to('cpu').float().mean(), 1 / lambda_)
+            print(mps_out.to('cpu').float().std() ** 2, 1 / (lambda_**2))
 
         for dtype in [torch.float32, torch.float16]:
             helper([100, 100], 2, dtype)
@@ -8179,7 +8179,7 @@ class TestLogical(TestCaseMPS):
             self.assertEqual(torch.isin(x, 2.0), torch.tensor([False, False, True, False], device="mps"))
             self.assertEqual(torch.isin(x, 1.0, invert=True), torch.tensor([True, False, True, True], device="mps"))
             self.assertEqual(torch.isin(x, 8.0), torch.tensor([False, False, False, False], device="mps"))
-            # Scalar.Tensor varaiant(alaises to Scalar.Scalar), not covered by OpInfo
+            # Scalar.Tensor variant(alaises to Scalar.Scalar), not covered by OpInfo
             self.assertEqual(torch.isin(2.0, x), torch.tensor(True, device="mps"))
 
     def test_isin_asserts(self):
@@ -10437,7 +10437,7 @@ class TestConvolutionMPS(TestCaseMPS):
         grad_in_cl = torch.empty(1, f, oc, device="mps").transpose(1, 2)
         grad_in_cl[:] = grad_in
 
-        # It does not matter whether grad_in contigous, or channels last, results should equal to each other
+        # It does not matter whether grad_in contiguous, or channels last, results should equal to each other
         grad_rc = torch.autograd.grad((out,), (inp, conv.weight, conv.bias), (grad_in,), retain_graph=True)
         grad_rc_cl = torch.autograd.grad((out,), (inp, conv.weight, conv.bias), (grad_in_cl,), retain_graph=True)
 
diff --git a/test/test_nestedtensor.py b/test/test_nestedtensor.py
index f53268cb24d3..55a86a11f8aa 100644
--- a/test/test_nestedtensor.py
+++ b/test/test_nestedtensor.py
@@ -7190,7 +7190,7 @@ torch.cuda.synchronize()
 
         query = torch.rand(bs, d1, d3, device=device)
         value = torch.rand(30, d2, requires_grad=True, device=device)
-        # total_length must > than max_length otherwise flash_attn backwark will fail
+        # total_length must > than max_length otherwise flash_attn backward will fail
         offsets = torch.tensor([0, 2, 3, 30], device=device)
 
         m = mha(use_legacy_api)
diff --git a/test/test_nn.py b/test/test_nn.py
index b9a56698edc6..0323080728b3 100644
--- a/test/test_nn.py
+++ b/test/test_nn.py
@@ -2013,7 +2013,7 @@ tensor(..., device='meta', size=(1,), requires_grad=True)""")
                 eval_out0 = wrapped_m(input)
                 # assert eval gives same result as last training iteration
                 self.assertEqual(eval_out0, last_train_out)
-                # assert doing more iteartion in eval don't change things
+                # assert doing more iteration in eval don't change things
                 self.assertEqual(eval_out0, wrapped_m(input))
                 self.assertEqual(last_train_u, m.weight_u)
                 self.assertEqual(last_train_v, m.weight_v)
@@ -8911,7 +8911,7 @@ class TestNNDeviceType(NNTestCase):
         # Should raise error when negative padding results in negative output shape
         self.assertRaises(RuntimeError, lambda: F.pad(inputs, (-3, -2), mode='circular'))
 
-        # assert that relfection padding errors when pad >= input size
+        # assert that reflection padding errors when pad >= input size
         expected_err_msg = r"Padding size should be less than the corresponding input dimension"
         inputs = torch.randn(1, 1, 2, 3, device=device, dtype=dtype)
         self.assertRaisesRegex(RuntimeError, expected_err_msg,
@@ -11018,7 +11018,7 @@ class TestNNDeviceType(NNTestCase):
     @onlyCUDA
     @dtypes(torch.double)
     def test_lstmcell_backward_only_one_output_grad(self, device, dtype):
-        # checks that undefined gradients doen't hamper the backward
+        # checks that undefined gradients doesn't hamper the backward
         # see #11872
         l = torch.nn.LSTMCell(2, 3).to(device).to(dtype=dtype)
         s = torch.randn(1, 2, device=device, dtype=dtype, requires_grad=True)
@@ -11967,7 +11967,7 @@ class TestNNDeviceType(NNTestCase):
     def test_softmax_bfloat16(self, device):
         for dim in [0, 1, 2, 3]:
             _test_bfloat16_ops(self, torch.nn.Softmax(dim=dim), device, inp_dims=(16, 33, 15, 16), prec=1e-2)
-            # test softmax with large input value which casues exp() to overflow
+            # test softmax with large input value which causes exp() to overflow
             _test_bfloat16_ops(self, torch.nn.Softmax(dim=dim), device, inp_dims=(16, 33, 15, 16), prec=0.05, scale_factor=1000.0)
 
     def test_nll_loss_mismatched_batch(self, device):
@@ -12298,7 +12298,7 @@ if __name__ == '__main__':
             input = torch.randn(N, C, *other_dims, device=device, requires_grad=True)
             target = torch.empty(N, *other_dims, dtype=torch.long, device=device).random_(0, C)
 
-            # construct target probablity that should have the same result as label_smoothing
+            # construct target probability that should have the same result as label_smoothing
             target_proba = F.one_hot(target, num_classes=C)
             # Need to put the C dim at index 1.
             target_proba = target_proba.permute(0, -1, *range(1, target_proba.dim() - 1))
diff --git a/test/test_openreg.py b/test/test_openreg.py
index 59e1c4bfac41..1fab8c4261c7 100644
--- a/test/test_openreg.py
+++ b/test/test_openreg.py
@@ -205,7 +205,7 @@ class TestPrivateUse1(TestCase):
 
 
 class TestOpenReg(TestCase):
-    """Tests of mimick accelerator named OpenReg based on PrivateUse1"""
+    """Tests of mimic accelerator named OpenReg based on PrivateUse1"""
 
     # Stream & Event
     def test_stream_synchronize(self):
@@ -475,7 +475,7 @@ class TestOpenReg(TestCase):
                     with torch.serialization.skip_data():
                         torch.save(sd, f)
 
-    # Opeartors
+    # Operators
     def test_factory(self):
         x = torch.empty(3, device="openreg")
         self.assertEqual(x.device.type, "openreg")
diff --git a/test/test_ops.py b/test/test_ops.py
index f5d848532a13..26f8865b3a00 100644
--- a/test/test_ops.py
+++ b/test/test_ops.py
@@ -87,7 +87,7 @@ _variant_ops = partial(
 # Get names of all the operators which have ref in their entry in OpInfo (testing infra)
 #   except for elementwise unary operators (separately implemented in test/test_unary_ufuncs.py),
 #   elementwise binary operators (separately implemented in test_binary_ufuncs.py),
-#   reduction operations (separately impelemented in test_reductions.py),
+#   reduction operations (separately implemented in test_reductions.py),
 #   and Spectral Functions (separately implemented for only 1D as of now, in test/test_spectral_ops.py)
 _ref_test_ops = tuple(
     filter(
@@ -373,7 +373,7 @@ class TestCommon(TestCase):
 
             # output_process_fn_grad has a very unfortunate name
             # We use this function in linalg extensively to postprocess the inputs of functions
-            # that are not completely well-defined. Think svd and muliplying the singular vectors by -1.
+            # that are not completely well-defined. Think svd and multiplying the singular vectors by -1.
             # CPU and CUDA implementations of the SVD can return valid SVDs that are different.
             # We use this function to compare them.
             cuda_results = sample.output_process_fn_grad(cuda_results)
@@ -580,7 +580,7 @@ class TestCommon(TestCase):
 
     # Tests that experimental Python References perform the same computation
     # as the operators they reference, when operator calls in the torch
-    # namesapce are remapped to the refs namespace (torch.foo becomes refs.foo).
+    # namespace are remapped to the refs namespace (torch.foo becomes refs.foo).
     @onlyNativeDeviceTypesAnd(["hpu"])
     @ops(python_ref_db)
     @skipIfTorchInductor("Takes too long for inductor")
@@ -759,7 +759,7 @@ class TestCommon(TestCase):
                 else tuple(n_inp) + n_args
             )
 
-            # Filter the elemnts that are tensors that require grad
+            # Filter the elements that are tensors that require grad
             t_input_tensors = [
                 t for t in t_inputs if isinstance(t, torch.Tensor) and t.requires_grad
             ]
diff --git a/test/test_ops_jit.py b/test/test_ops_jit.py
index a9a7ec5339b7..9dfb75cc6a8f 100644
--- a/test/test_ops_jit.py
+++ b/test/test_ops_jit.py
@@ -188,7 +188,7 @@ class TestJit(JitCommonTestCase):
             # Note: only runs in float32 because schema isn't affected by dtype,
             #   so running it on all dtypes is would be excessive
             if dtype == torch.float32:
-                # TODO: no reason why we cant run this with tracing graph
+                # TODO: no reason why we can't run this with tracing graph
                 if support_script and op.name != "rsub":
                     check_alias_annotation(
                         name,
diff --git a/test/test_overrides.py b/test/test_overrides.py
index 8575bb90271c..4db042297f05 100644
--- a/test/test_overrides.py
+++ b/test/test_overrides.py
@@ -77,7 +77,7 @@ def quux(a):
 # dictionary are function names in the torch API and the values are
 # function implementations. Implementations are added to
 # HANDLED_FUNCTION_DIAGONAL by decorating a python function with
-# implements_diagonal. See the overrides immediately below the defintion
+# implements_diagonal. See the overrides immediately below the definition
 # of DiagonalTensor for usage examples.
 HANDLED_FUNCTIONS_DIAGONAL = {}
 
@@ -133,7 +133,7 @@ class DiagonalTensor:
         https://numpy.org/devdocs/user/basics.dispatch.html
     """
     # This is defined as a class attribute so that SubDiagonalTensor
-    # below which subclasses DiagonalTensor can re-use DiagonalTensor's
+    # below which subclasses DiagonalTensor can reuse DiagonalTensor's
     # __torch_function__ implementation.
     handled_functions = HANDLED_FUNCTIONS_DIAGONAL
 
diff --git a/test/test_per_overload_api.py b/test/test_per_overload_api.py
index e47c55f284a3..e5cf2aa1d567 100644
--- a/test/test_per_overload_api.py
+++ b/test/test_per_overload_api.py
@@ -7,7 +7,7 @@ from torch.testing._internal.common_utils import run_tests, TestCase
 
 class TestPerOverloadAPI(TestCase):
     def test_basics_opoverloadpacket(self):
-        # add is ony used as an example here. It is ok to update the test
+        # add is only used as an example here. It is ok to update the test
         # if the semantics of add are modified in the future.
         add_packet = torch.ops.aten.add
 
diff --git a/test/test_public_bindings.py b/test/test_public_bindings.py
index 039898cc1600..09bbbcbadcc8 100644
--- a/test/test_public_bindings.py
+++ b/test/test_public_bindings.py
@@ -512,7 +512,7 @@ class TestPublicBindings(TestCase):
                             "does not have `__all__` defined"
                         )
                         fix_is_public = (
-                            f"remove it from the modules's (`{modname}`) `__all__`"
+                            f"remove it from the modules' (`{modname}`) `__all__`"
                             if is_all
                             else f"either define a `__all__` for `{modname}` or add a `_` at the beginning of the name"
                         )
@@ -522,7 +522,7 @@ class TestPublicBindings(TestCase):
                             f"it is not inside the module's (`{modname}`) `__all__`"
                         )
                         fix_is_public = (
-                            f"add it from the modules's (`{modname}`) `__all__`"
+                            f"add it from the modules' (`{modname}`) `__all__`"
                         )
                     if looks_public:
                         why_looks_public = (
diff --git a/test/test_python_dispatch.py b/test/test_python_dispatch.py
index 9349612575df..f1a13e3db1a2 100644
--- a/test/test_python_dispatch.py
+++ b/test/test_python_dispatch.py
@@ -156,7 +156,7 @@ class TestPythonRegistration(TestCase):
                 # New dispatcher call should hit the first callback again
                 self.assertFalse(first_called)
                 a, b = args
-                # Make a substraction here instead of add !
+                # Make a subtraction here instead of add !
                 c = a - b
                 self.assertTrue(first_called)
                 return c
diff --git a/test/test_reductions.py b/test/test_reductions.py
index dc59b76c24f0..f0ec8b434535 100644
--- a/test/test_reductions.py
+++ b/test/test_reductions.py
@@ -735,7 +735,7 @@ class TestReductions(TestCase):
         res2 = x1.sum(axis=(0, 2), keepdims=True)
         self.assertEqual(res1, res2)
 
-    # TODO: kill this ane replace with common creation ops
+    # TODO: kill this and replace with common creation ops
     def _make_tensors(self, shape, val_range=(-100, 100), use_floating=True, use_integral=True,
                       use_complex=False) -> dict[str, list[torch.Tensor]]:
         float_types = [torch.double,
@@ -1629,7 +1629,7 @@ class TestReductions(TestCase):
                 RuntimeError, "only when boundaries tensor dimension is 1"):
             torch.searchsorted(boundaries, 1)
 
-        # incompatiable output tensor's dtype
+        # incompatible output tensor's dtype
         def test_output_dtype(dtype, is_int32):
             output = values_1d.to(dtype)
             with self.assertRaisesRegex(
@@ -2018,7 +2018,7 @@ class TestReductions(TestCase):
                 with self.assertRaisesRegex(RuntimeError, error_msg):
                     op(x, dim=dim)
 
-    # TODO: update this test to comapre against NumPy
+    # TODO: update this test to compare against NumPy
     @onlyCUDA
     def test_var(self, device):
         cpu_tensor = torch.randn(2, 3, 3)
@@ -2513,7 +2513,7 @@ class TestReductions(TestCase):
             k = int((t.numel() - 1) / 2)
             self.assertEqual(res, t.view(-1).sort()[0][k])
             if t.numel() % 2 == 1:
-                # We can only test agains numpy for odd reductions because numpy
+                # We can only test against numpy for odd reductions because numpy
                 # returns the mean of the two medians and torch returns the lower
                 self.assertEqual(res.cpu().numpy(), np.median(t_numpy))
             for dim in range(t.ndim):
@@ -2524,7 +2524,7 @@ class TestReductions(TestCase):
                 self.assertEqual(res[0], (t.sort(dim)[0]).select(dim, k).unsqueeze_(dim))
                 self.assertEqual(res[0], t.gather(dim, res[1]))
                 if size % 2 == 1:
-                    # We can only test agains numpy for odd reductions because numpy
+                    # We can only test against numpy for odd reductions because numpy
                     # returns the mean of the two medians and torch returns the lower
                     self.assertEqual(res[0].cpu().numpy(), np.median(t_numpy, dim, keepdims=True), exact_dtype=False)
 
@@ -2548,7 +2548,7 @@ class TestReductions(TestCase):
                     k = int((t.numel() - num_nan - 1) / 2)
                 self.assertEqual(res, t.view(-1).sort()[0][k])
                 if (t.numel() - num_nan) % 2 == 1:
-                    # We can only test agains numpy for odd reductions because numpy
+                    # We can only test against numpy for odd reductions because numpy
                     # returns the mean of the two medians and torch returns the lower
                     self.assertEqual(res.item(), numpy_op(t.cpu().numpy()))
                 for dim in range(t.ndim):
@@ -2561,7 +2561,7 @@ class TestReductions(TestCase):
                         k = ((size - num_nan - 1) / 2).type(torch.long)
                     self.assertEqual(res[0], (t.sort(dim)[0]).gather(dim, k))
                     self.assertEqual(res[0], t.gather(dim, res[1]))
-                    # We can only test agains numpy for odd reductions because numpy
+                    # We can only test against numpy for odd reductions because numpy
                     # returns the mean of the two medians and torch returns the lower
                     mask = (size - num_nan) % 2 == 1
                     res = res[0].masked_select(mask).cpu()
@@ -3526,7 +3526,7 @@ as the input tensor excluding its innermost dimension'):
     # raises an error if no `dim` parameter is specified. This exists separately from tests in
     # test_tensot_compare_ops_empty because not specifying a `dim` parameter in the former tests does
     # not throw errors. Also, checking the return type of argmax requires supplying a different dtype
-    # argument than that for the input tensor. There is also variantion in numpy testing.
+    # argument than that for the input tensor. There is also variation in numpy testing.
     def test_tensor_compare_ops_argmax_argmix_kthvalue_dim_empty(self, device):
         shape = (2, 0, 4)
         master_input = torch.randn(shape, device=device)
diff --git a/test/test_scatter_gather_ops.py b/test/test_scatter_gather_ops.py
index 4acff8fab3bd..d2a0e8bd1ccc 100644
--- a/test/test_scatter_gather_ops.py
+++ b/test/test_scatter_gather_ops.py
@@ -455,7 +455,7 @@ class TestScatterGather(TestCase):
         helper([50, 8, 7], 100)
         helper([50, 3, 4, 5], 100)
 
-# Generic Device Test Framework instantation, see
+# Generic Device Test Framework instantiation, see
 #   https://github.com/pytorch/pytorch/wiki/Running-and-writing-tests
 #   for details.
 instantiate_device_type_tests(TestScatterGather, globals())
diff --git a/test/test_segment_reductions.py b/test/test_segment_reductions.py
index 9118674c763d..0b269595db21 100644
--- a/test/test_segment_reductions.py
+++ b/test/test_segment_reductions.py
@@ -558,7 +558,7 @@ class TestSegmentReductions(TestCase):
         lengths = torch.tensor([0, 2, 3, 0], device=device, dtype=length_type)
         data = torch.arange(6, dtype=torch.float, device=device)
 
-        # test for error on 1-D lenghts
+        # test for error on 1-D lengths
         with self.assertRaisesRegex(RuntimeError, "Expected all rows of lengths along axis"):
             torch._segment_reduce(data, 'sum', lengths=lengths, axis=0, unsafe=False)
 
diff --git a/test/test_serialization.py b/test/test_serialization.py
index e92fc4018b0c..3413366608f4 100644
--- a/test/test_serialization.py
+++ b/test/test_serialization.py
@@ -746,7 +746,7 @@ class SerializationMixin:
                                           'readinto() stress test')
 
     def test_serialization_filelike_uses_readinto(self):
-        # For maximum effiency, when reading a file-like object,
+        # For maximum efficiency, when reading a file-like object,
         # ensure the C API calls readinto instead of read.
         a = torch.randn(5, 4)
 
diff --git a/test/test_sparse.py b/test/test_sparse.py
index 8967f89fd916..608b5ef13c1b 100644
--- a/test/test_sparse.py
+++ b/test/test_sparse.py
@@ -458,7 +458,7 @@ class TestSparse(TestSparseBase):
                         torch.autograd.gradcheck(func, (t._indices(), t._values().requires_grad_(True), shape, True))
 
     @dtypes(*floating_and_complex_types_and(torch.float16, torch.bfloat16))
-    @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupport triggers assertion error")
+    @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error")
     @gradcheck_semantics()
     def test_to_dense_with_gradcheck(self, device, dtype, gradcheck):
 
@@ -594,7 +594,7 @@ class TestSparse(TestSparseBase):
         self.assertEqual(torch.empty((3, 0), dtype=dtype, device=device), self.safeToDense(x))
 
     @dtypes(torch.double, torch.cdouble)
-    @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupport triggers assertion error")
+    @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error")
     @gradcheck_semantics()
     def test_to_dense_hybrid(self, device, dtype, gradcheck):
 
@@ -950,7 +950,7 @@ class TestSparse(TestSparseBase):
 
     @coalescedonoff
     @dtypes(torch.double, torch.cdouble)
-    @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupport triggers assertion error")
+    @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error")
     @gradcheck_semantics()
     def test_permute(self, device, dtype, coalesced, gradcheck):
         # trivial checks
@@ -1240,7 +1240,7 @@ class TestSparse(TestSparseBase):
             # NOTE: indices are negative
             idx_dim_d_range = list(range(-sizes[d], 0))
             for idx_len in range(sizes[d], sizes[d] + 1):
-                # creates all possible valid indices into dim d of lenght idx_len
+                # creates all possible valid indices into dim d of length idx_len
                 for idx in itertools.product(*itertools.repeat(idx_dim_d_range, idx_len)):
                     t_idx = torch.tensor(idx, dtype=torch.long, device=device)
 
@@ -1619,7 +1619,7 @@ class TestSparse(TestSparseBase):
 
     @coalescedonoff
     @dtypes(torch.double)
-    @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupport triggers assertion error")
+    @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error")
     def test_sparse_mm(self, device, dtype, coalesced):
         def test_shape(d1, d2, d3, nnz, transposed):
             if transposed:
@@ -1641,7 +1641,7 @@ class TestSparse(TestSparseBase):
 
     @coalescedonoff
     @dtypes(torch.double)
-    @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupport triggers assertion error")
+    @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error")
     @gradcheck_semantics()
     def test_sparse_mul(self, device, dtype, coalesced, gradcheck):
         # https://github.com/pytorch/pytorch/issues/79914
@@ -3600,13 +3600,13 @@ class TestSparse(TestSparseBase):
 
 
     @dtypes(torch.double, torch.float)
-    @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupport triggers assertion error")
+    @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error")
     def test_softmax_zero_nnz(self, device, dtype):
         self._check_zero_nnz_softmax_op(torch.sparse.softmax, 1, device, dtype)
         self._check_zero_nnz_softmax_op(torch.sparse.softmax, 10, device, dtype)
 
     @dtypes(torch.double, torch.float)
-    @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupport triggers assertion error")
+    @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error")
     def test_log_softmax_zero_nnz(self, device, dtype):
         self._check_zero_nnz_softmax_op(torch.sparse.log_softmax, 1, device, dtype)
         self._check_zero_nnz_softmax_op(torch.sparse.log_softmax, 10, device, dtype)
@@ -3985,11 +3985,11 @@ class TestSparse(TestSparseBase):
             # some normal cases
             yield (make_diags((1, 5)), make_offsets([0]), (5, 5))
             yield (make_diags((3, 3)), make_offsets([-1, 0, 1]), (4, 4))
-            # noncontigous diags
+            # non-contiguous diags
             yield (make_diags((5, 4), noncontiguous=True), make_offsets([-1, 1, 0, 2, -2]), (5, 5))
-            # noncontigous offsets
+            # non-contiguous offsets
             yield (make_diags((3, 4)), make_offsets([1, -1, 0, -2, 2])[::2], (5, 5))
-            # noncontigous diags + offsets
+            # non-contiguous diags + offsets
             yield (make_diags((3, 4), noncontiguous=True), make_offsets([1, -1, 0, -2, 2])[::2], (5, 5))
             # correct dimensionality, 2d, 2d , and shapes match, but the number of diagonals is zero
             yield (make_diags((0, 3)), make_offsets([]), (3, 3))
@@ -4624,7 +4624,7 @@ class TestSparseAny(TestCase):
 
             # However, invariants check can be disabled via
             # constructor's optional argument so that the invalid
-            # tensor is succesfully constructed:
+            # tensor is successfully constructed:
             r = create_invalid_tensor(check_invariants=False)
             self.assertEqual(r.layout, layout)
 
@@ -4646,7 +4646,7 @@ class TestSparseAny(TestCase):
             self.assertTrue(torch.sparse.check_sparse_tensor_invariants.is_enabled())
         self.assertFalse(torch.sparse.check_sparse_tensor_invariants.is_enabled())
 
-        # Test an attempt to re-use an activate context manager instance
+        # Test an attempt to reuse an activate context manager instance
         check_ctx2 = torch.sparse.check_sparse_tensor_invariants(True)
         with check_ctx:
             self.assertTrue(torch.sparse.check_sparse_tensor_invariants.is_enabled())
diff --git a/test/test_sparse_csr.py b/test/test_sparse_csr.py
index 33b4d7da6037..cc313c586a09 100644
--- a/test/test_sparse_csr.py
+++ b/test/test_sparse_csr.py
@@ -2791,7 +2791,7 @@ class TestSparseCSR(TestCase):
             raise ValueError("Expected at least one 2D tensor in samples.")
 
         for sample in samples:
-            # We must skip samples of low dimensionality, we can't covert them to sparsed compressed layouts
+            # We must skip samples of low dimensionality, we can't convert them to sparsed compressed layouts
             if sample.input.ndim < 2:
                 continue
             sparse_input = sample.input.to_sparse_csr().requires_grad_(True)
@@ -3255,7 +3255,7 @@ class TestSparseCSR(TestCase):
         # helpers
 
         def _check_against_scipy_matrix(pt_matrix, dense, blocksize, **kwargs):
-            # scipy has no bsc layout, so we check against the bsr layout of the tranposed dense
+            # scipy has no bsc layout, so we check against the bsr layout of the transposed dense
             if layout == torch.sparse_bsc:
                 sp_matrix = self._construct_sp_matrix(dense.t(), layout=torch.sparse_bsr, blocksize=blocksize[::-1])
             else:
@@ -3272,7 +3272,7 @@ class TestSparseCSR(TestCase):
             self.assertEqual(torch.tensor(sp_matrix.indptr, dtype=torch.int64), compressed_indices_mth(pt_matrix))
             self.assertEqual(torch.tensor(sp_matrix.indices, dtype=torch.int64), plain_indices_mth(pt_matrix))
             if layout == torch.sparse_bsc:
-                # we must tranpose the blocks before comparing
+                # we must transpose the blocks before comparing
                 self.assertEqual(torch.tensor(sp_matrix.data), pt_matrix.values().transpose(-2, -1))
             else:
                 self.assertEqual(torch.tensor(sp_matrix.data), pt_matrix.values())
@@ -3371,7 +3371,7 @@ class TestSparseCSR(TestCase):
 
         # special cases for batched tensors
         if batched:
-            # batched sparse tensors need only have the same number of non-zeros in each batch not nessesarily the
+            # batched sparse tensors need only have the same number of non-zeros in each batch not necessarily the
             # same sparsity pattern in each batch
             sparse_shape = sparse_sizes[0]
             hybrid_shape = hybrid_sizes[0]
@@ -3382,7 +3382,7 @@ class TestSparseCSR(TestCase):
             # number of elements/blocks in each batch (total not nnz)
             batch_mask_shape = sparse_shape
             if layout in blocked_layouts:
-                # if we are blocked the mask is genereated for the block valued elemetns
+                # if we are blocked the mask is generated for the block valued elements
                 batch_mask_shape = sparse_shape[0] // blocksize[0], sparse_shape[1] // blocksize[1]
 
             # random bool vector w/ length equal to max possible nnz for the sparse_shape
@@ -3815,7 +3815,7 @@ class TestSparseCompressedTritonKernels(TestCase):
                 input_broadcasted_clone.col_indices(),
                 # For testing `out=` let's make values to have "weird" strides
                 # so that if the kernel modifies values to it's needs, the result
-                # is being compied into out.values.
+                # is being copied into out.values.
                 input_broadcasted_clone.values().transpose(-3, -2).contiguous().transpose(-3, -2),
                 layout=input_broadcasted_clone.layout,
                 size=input_broadcasted_clone.shape
@@ -3930,7 +3930,7 @@ class TestSparseCompressedTritonKernels(TestCase):
                     try:
                         result = bsr_scatter_mm(bsr, dense, indices_data=indices_data)
                     except triton.compiler.OutOfResources:
-                        # ensure that there was at least one succesful test:
+                        # ensure that there was at least one successful test:
                         assert SPLIT_N < SPLIT_N_list[0]
                         break
 
diff --git a/test/test_stateless.py b/test/test_stateless.py
index 983872992e46..d24194ed460e 100644
--- a/test/test_stateless.py
+++ b/test/test_stateless.py
@@ -210,7 +210,7 @@ class TestStatelessFunctionalAPI(TestCase):
         prev_buffer = module.buffer.clone()
         res = functional_call(module, parameters, x, tie_weights=False)
         self.assertEqual(x, res)
-        # check that the weights remain unmodified and were correctly accesed
+        # check that the weights remain unmodified and were correctly accessed
         cur_weight = module.l1.weight
         cur_buffer = module.buffer
         self.assertEqual(cur_weight, prev_weight)
@@ -753,7 +753,7 @@ class TestStatelessFunctionalAPI(TestCase):
         res = torch.func.functional_call(mod, (), x)
         self.assertEqual(res, mod(x))
 
-        # three dictonaries
+        # three dictionaries
         a = ({'l1.weight': torch.ones(1, 1)}, {'l1.bias': torch.ones(1)}, {'buffer': torch.zeros(1)})
         res = torch.func.functional_call(mod, a, x)
         self.assertEqual(res, x + 1)
diff --git a/test/test_sympy_utils.py b/test/test_sympy_utils.py
index 220ad2c1c2f3..5343e2e0a9fb 100644
--- a/test/test_sympy_utils.py
+++ b/test/test_sympy_utils.py
@@ -423,7 +423,7 @@ class TestSympyInterp(TestCase):
                 sargs = [sympy.sympify(a) for a in args]
                 sympy_expr = getattr(ReferenceAnalysis, fn)(*symbols)
                 ref_r = getattr(ReferenceAnalysis, fn)(*sargs)
-                # Yes, I know this is a longwinded way of saying xreplace; the
+                # Yes, I know this is a long-winded way of saying xreplace; the
                 # point is to test sympy_interp
                 r = sympy_interp(
                     ReferenceAnalysis, dict(zip(symbols, sargs)), sympy_expr
diff --git a/test/test_tensor_creation_ops.py b/test/test_tensor_creation_ops.py
index 2108b13c0be3..02cb1d31d563 100644
--- a/test/test_tensor_creation_ops.py
+++ b/test/test_tensor_creation_ops.py
@@ -1531,7 +1531,7 @@ class TestTensorCreation(TestCase):
         expected = torch.empty(0, 5, dtype=a.dtype, device=device)
         self.assertEqual(c, expected)
 
-        # test empty imput
+        # test empty input
         a = torch.empty(0, device=device)
         c1 = torch.combinations(a)
         c2 = torch.combinations(a, with_replacement=True)
diff --git a/test/test_tensorexpr.py b/test/test_tensorexpr.py
index 3872fc1a3213..17d3a58535d6 100644
--- a/test/test_tensorexpr.py
+++ b/test/test_tensorexpr.py
@@ -695,12 +695,12 @@ class TestTensorExprFuser(BaseTestClass):
             _atol = 2e-3
             _rtol = 1e-5
             if data_type is torch.bfloat16:
-                # Compared to aten logic, NNC coudl save addtional BF16/Fp32 conversion.
+                # Compared to aten logic, NNC could save additional BF16/Fp32 conversion.
                 # Take d = a + b - c as an example, the aten logic is as follows at
                 # operator level:
                 #    tmp = to_bf16(to_fp32(a) + to_fp32(b))
                 #    d = to_bf16(to_fp32(tmp) + to_fp32(c))
-                # But NNC could fuse the compression and remove the redudant conversions.
+                # But NNC could fuse the compression and remove the redundant conversions.
                 # The final statement is as follows
                 #    d = to_bf16(to_fp32(a) + to_fp32(b) + to_fp32(c))
                 # Hence, we simulate NNC computation by feeding fp32 tensors and converting
diff --git a/test/test_torch.py b/test/test_torch.py
index d613b8fb4a1d..d171f0d313b4 100644
--- a/test/test_torch.py
+++ b/test/test_torch.py
@@ -1091,7 +1091,7 @@ class TestTorchDeviceType(TestCase):
             small2_expanded = small2.expand(*dims_full)
 
         if small.is_cuda and fn in ['map', 'map2']:
-            # map and map2 are not implementd on CUDA tensors
+            # map and map2 are not implemented on CUDA tensors
             return
 
         if hasattr(large_expanded, fn):
@@ -2677,7 +2677,7 @@ else:
             x.requires_grad = True
             d = torch.cdist(x, y)
             d.backward(dist_grad)
-            # Check that the backward passs does not contain invalid
+            # Check that the backward pass does not contain invalid
             # values such as nan or inf
             assert torch.isfinite(x.grad).all()
 
@@ -2709,7 +2709,7 @@ else:
                                              [0, 0, 0],
                                              [1, 2, 3]]))
 
-        # Check that cummulative sum over a zero length dimension doesn't crash on backprop.
+        # Check that cumulative sum over a zero length dimension doesn't crash on backprop.
         # Also check that cumsum over other dimensions in a tensor with a zero-length
         # dimensiuon also works
         # Also include a basic suite of similar tests for other bases cases.
@@ -2761,7 +2761,7 @@ else:
                                              [0, 0, 0],
                                              [1, 1, 1]]))
 
-        # Check that cummulative prod over a zero length dimension doesn't crash on backprop.
+        # Check that cumulative prod over a zero length dimension doesn't crash on backprop.
         # Also check that cumprod over other dimensions in a tensor with a zero-length
         # dimensiuon also works
         # Also include a basic suite of similar tests for other bases cases.
@@ -3806,7 +3806,7 @@ else:
         # Test for parallel adds with accumulate == True
         low_precision = dtype == torch.half or dtype == torch.bfloat16
         # Less numbers to avoid overflow with low_precision
-        # Grainsize is 3000 for the for_loop to be parallized on CPU
+        # Grainsize is 3000 for the for_loop to be parallelized on CPU
         sizes = ((100,)) if low_precision else ((200,), (3002,))
         # Bfloat16 has a particularly bad performance here
         # This operation is nondeterministic on GPU, so we are generous with the rtol
@@ -7063,7 +7063,7 @@ class TestTorch(TestCase):
                 dest.index_add(0, index, source)
 
     def test_linspace_logspace(self):
-        # Ensure the output does not require grad regardless of inputs requiring gard or not.
+        # Ensure the output does not require grad regardless of inputs requiring guard or not.
         # The output of factory functions should not be part of any computational graph.
         start = 0.0
         end = 3.0
@@ -8700,7 +8700,7 @@ tensor([[[1.+1.j, 1.+1.j, 1.+1.j,  ..., 1.+1.j, 1.+1.j, 1.+1.j],
         self.assertEqual(2 * size, (1, 2, 3, 1, 2, 3))
 
     def test_Size_concat_non_tuple_sequence(self):
-        # check that TypeError get's raised on adding non-tuple sequences.
+        # check that TypeError gets raised on adding non-tuple sequences.
         from collections.abc import Sequence
 
         class DummySequence(Sequence):
@@ -11104,7 +11104,7 @@ def add_neg_dim_tests():
         assert not hasattr(TestTorch, test_name), "Duplicated test name: " + test_name
         setattr(TestTorch, test_name, make_neg_dim_test(name, tensor_arg, arg_constr, types, extra_dim))
 
-# TODO: these empy classes are temporarily instantiated for XLA compatibility
+# TODO: these empty classes are temporarily instantiated for XLA compatibility
 #   once XLA updates their test suite it should be removed
 class TestViewOps(TestCase):
     pass
diff --git a/test/test_transformers.py b/test/test_transformers.py
index e7073d3bcbb2..07715560801f 100644
--- a/test/test_transformers.py
+++ b/test/test_transformers.py
@@ -98,7 +98,7 @@ def _check_equal(
     """
     Compare test tensor against golden and reference tensors.
     Golden is the highest precision possible serving as the "ground truth"
-    Refernce is the same precision as test and should also serve as less precisie ground truth.
+    Reference is the same precision as test and should also serve as less precisie ground truth.
     We calcculate the "reference error" by comparing the golden to reference and use this as the
     measruing stick for the test tensor.
 
@@ -1693,7 +1693,7 @@ class TestSDPAFailureModes(NNTestCase):
     @onlyCUDA
     @unittest.skipIf(not PLATFORM_SUPPORTS_FLASH_ATTENTION, "Does not support fused SDPA or pre-SM80 hardware")
     def test_unaligned_tensors(self, device):
-        # The alignment is depdent on arch so we specifiy SM80OrLater
+        # The alignment is dependent on arch so we specify SM80OrLater
         dtype = torch.float16
         size = SdpaShape(2, 2, 8, 5)
         make_tensor = partial(torch.rand, size, device=device, dtype=dtype)
@@ -3042,7 +3042,7 @@ class TestSDPACudaOnly(NNTestCase):
 
         # Cast up and compare
         # Since we are doing the compute on fp16 we have to bump the tolerance
-        # Bump down the tolearnce for blfoat16
+        # Bump down the tolerance for blfoat16
         atol = 7e-4 if dtype == torch.float16 else 7e-3
         rtol = 7e-4 if dtype == torch.float16 else 7e-3
         if TEST_WITH_ROCM:
@@ -3525,7 +3525,7 @@ class TestSDPACudaOnly(NNTestCase):
                     query, key, value, is_causal=is_causal, scale=scale, enable_gqa=enable_gqa)
         else:
             # Problem: We pad sizes in the composite region of the top level SDPA. But we need the
-            # Debug mask when have dropout. So I am going to manualy pad up here when testing dropout
+            # Debug mask when have dropout. So I am going to manually pad up here when testing dropout
             q_padded, q_og_size = pad_last_dim(query, 8)
             k_padded, k_og_size = pad_last_dim(key, 8)
             v_padded, v_og_size = pad_last_dim(value, 8)
diff --git a/test/test_type_promotion.py b/test/test_type_promotion.py
index 1548b882fa04..88fcdd3a5dca 100644
--- a/test/test_type_promotion.py
+++ b/test/test_type_promotion.py
@@ -1052,7 +1052,7 @@ class TestTypePromotion(TestCase):
                 torch.cat([x, y], out=out)
                 self.assertEqual(out, expected_out, exact_dtype=True)
 
-    # Verfies that unary ops require matching out types
+    # Verifies that unary ops require matching out types
     @onlyNativeDeviceTypes
     @dtypes(*itertools.product((torch.int64,
                                 torch.float32, torch.float64,
diff --git a/test/test_typing.py b/test/test_typing.py
index 6c265526e2cb..f28091fa8d04 100644
--- a/test/test_typing.py
+++ b/test/test_typing.py
@@ -35,7 +35,7 @@ CACHE_DIR = os.path.join(DATA_DIR, ".mypy_cache")
 
 
 def _key_func(key: str) -> str:
-    """Split at the first occurance of the ``:`` character.
+    """Split at the first occurrence of the ``:`` character.
 
     Windows drive-letters (*e.g.* ``C:``) are ignored herein.
     """
@@ -135,7 +135,7 @@ def _parse_reveals(file: IO[str]) -> list[str]:
     comments = "/n".join(comments_array)
 
     # Only search for the `{*}` pattern within comments,
-    # otherwise there is the risk of accidently grabbing dictionaries and sets
+    # otherwise there is the risk of accidentally grabbing dictionaries and sets
     key_set = set(re.findall(r"\{(.*?)\}", comments))
     kwargs = {
         k: FORMAT_DICT.get(k, f"<UNRECOGNIZED FORMAT KEY {k!r}>") for k in key_set
diff --git a/test/test_unary_ufuncs.py b/test/test_unary_ufuncs.py
index 8d29c504d878..855bbfd7f251 100644
--- a/test/test_unary_ufuncs.py
+++ b/test/test_unary_ufuncs.py
@@ -1080,7 +1080,7 @@ class TestUnaryUfuncs(TestCase):
     def test_silu_complex(self, device, dtype):
         atol = 1e-6
         rtol = 1e-6
-        inouts = [
+        inp_outs = [
             (0.2 + 0.3j, 0.08775215595960617065 + 0.18024823069572448730j),
             (1e-19 + 1e-18j, 4.99999984132761269448e-20 + 5.00000022906852482872e-19j),
             (-1.0 + 2.0j, -0.78546208143234252930 + -0.44626939296722412109j),
@@ -1088,7 +1088,7 @@ class TestUnaryUfuncs(TestCase):
             (2.0j, -1.55740761756896972656 + 0.99999988079071044922j),
         ]
 
-        for inp, out in inouts:
+        for inp, out in inp_outs:
             res = torch.nn.functional.silu(
                 torch.tensor(inp, dtype=dtype, device=device)
             )
@@ -1096,7 +1096,7 @@ class TestUnaryUfuncs(TestCase):
             self.assertEqual(res.real, out.real, atol=atol, rtol=rtol)
             self.assertEqual(res.imag, out.imag, atol=atol, rtol=rtol)
 
-        for inp, out in inouts:
+        for inp, out in inp_outs:
             res = torch.nn.functional.silu(
                 torch.tensor(inp, dtype=dtype, device=device), inplace=True
             )
@@ -1170,7 +1170,7 @@ class TestUnaryUfuncs(TestCase):
         # Not using numpy's log1p here because by the time of writing this,
         # np.log1p has precision problems for small complex input values, see here:
         # https://github.com/numpy/numpy/issues/22609
-        inouts = [
+        inp_outs = [
             (0.2 + 0.3j, 0.21263386770217202 + 0.24497866312686414j),
             (1e-19 + 1e-18j, 1e-19 + 1e-18j),
             (1e-18 + 0.1j, 0.00497517 + 0.0996687j),
@@ -1184,7 +1184,7 @@ class TestUnaryUfuncs(TestCase):
         ]
         # test the extreme values
         if dtype == torch.complex128:
-            inouts += [
+            inp_outs += [
                 (-1 + 1e250j, 575.6462732485114 + 1.5707963267948966j),
                 (1e250 + 1j, 575.6462732485114 + 1e-250j),
                 (1e250 + 1e250j, 575.9928468387914 + 0.7853981633974483j),
@@ -1193,7 +1193,7 @@ class TestUnaryUfuncs(TestCase):
                 (1e250 + 1e-250j, 575.6462732485114 + 0.0j),
             ]
         elif dtype == torch.complex64:
-            inouts += [
+            inp_outs += [
                 (-1 + 1e30j, 69.07755278982137 + 1.5707963267948966j),
                 (1e30 + 1j, 69.07755278982137 + 1e-30j),
                 (1e30 + 1e30j, 69.42412638010134 + 0.7853981633974483j),
@@ -1203,7 +1203,7 @@ class TestUnaryUfuncs(TestCase):
             ]
 
         # test the log1p individually
-        for inp, out in inouts:
+        for inp, out in inp_outs:
             res = torch.log1p(torch.tensor(inp, dtype=dtype, device=device))
             self.assertFalse(torch.any(torch.isnan(res)))
             # setting up atol == 0.0 because some part has very small values
@@ -1211,7 +1211,7 @@ class TestUnaryUfuncs(TestCase):
             self.assertEqual(res.imag, out.imag, atol=0.0, rtol=1e-6)
 
         # test the log1p in tensor
-        inp_lst, out_lst = (list(elmt) for elmt in zip(*inouts))
+        inp_lst, out_lst = (list(elmt) for elmt in zip(*inp_outs))
         inp_tens = torch.tensor(inp_lst, dtype=dtype, device=device)
         out_tens = torch.tensor(out_lst, dtype=dtype, device=device)
         res_tens = torch.log1p(inp_tens)
@@ -1292,7 +1292,7 @@ class TestUnaryUfuncs(TestCase):
         zero_to_large = torch.tensor([0.0, 1.0, 1e3], **tkwargs)
         small_to_inf = torch.tensor([1e-3, 1.0, float("inf")], **tkwargs)
         nans = torch.zeros((3,), **tkwargs) + float("nan")
-        inpouts = [
+        inp_outs = [
             # (a    ,    x),       out
             ((zeros, small_to_inf), ones),
             ((small_to_inf, zeros), zeros),
@@ -1302,7 +1302,7 @@ class TestUnaryUfuncs(TestCase):
             ((infs, infs), nans),
             ((-small_to_inf, small_to_inf), nans),
         ]
-        for inputs, output in inpouts:
+        for inputs, output in inp_outs:
             input0, input1 = inputs
             calc = torch.igamma(input0, input1)
             if torch.all(torch.isnan(output)):
@@ -1321,7 +1321,7 @@ class TestUnaryUfuncs(TestCase):
         zero_to_large = torch.tensor([0.0, 1.0, 1e3], **tkwargs)
         small_to_inf = torch.tensor([1e-3, 1.0, float("inf")], **tkwargs)
         nans = torch.zeros((3,), **tkwargs) + float("nan")
-        inpouts = [
+        inp_outs = [
             # (a    ,    x),       out
             ((zeros, small_to_inf), zeros),
             ((small_to_inf, zeros), ones),
@@ -1331,7 +1331,7 @@ class TestUnaryUfuncs(TestCase):
             ((infs, infs), nans),
             ((-small_to_inf, small_to_inf), nans),
         ]
-        for inputs, output in inpouts:
+        for inputs, output in inp_outs:
             input0, input1 = inputs
             calc = torch.igammac(input0, input1)
             if torch.all(torch.isnan(output)):
diff --git a/test/test_view_ops.py b/test/test_view_ops.py
index 2e113a9e044b..5aa30483deba 100644
--- a/test/test_view_ops.py
+++ b/test/test_view_ops.py
@@ -1955,7 +1955,7 @@ class TestOldViewOps(TestCase):
             with self.assertRaises(numpy_err, msg=msg):
                 np.array_split(a.cpu().numpy(), sections_or_indices, dim)
 
-        # addtional tests for tensor_split with tensor_indices_or_sections
+        # additional tests for tensor_split with tensor_indices_or_sections
         with self.assertRaisesRegex(
             RuntimeError,
             r"tensor_split expected tensor_indices_or_sections to have dtype of long, but got Float",
diff --git a/test/test_weak.py b/test/test_weak.py
index 3c53f08f235d..629ed12db326 100644
--- a/test/test_weak.py
+++ b/test/test_weak.py
@@ -159,7 +159,7 @@ class WeakTest(TestCase):
         self.assertRaises(KeyError, d.__delitem__, o)
         self.assertRaises(KeyError, d.__getitem__, o)
 
-        # If a key isn't of a weakly referencable type, __getitem__ and
+        # If a key isn't of a weakly referenceable type, __getitem__ and
         # __setitem__ raise TypeError.  __delitem__ should too.
         self.assertRaises(TypeError, d.__delitem__, 13)
         self.assertRaises(TypeError, d.__getitem__, 13)