[BE][6/6] fix typos in test/ (test/distributed/) (#157640)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/157640 Approved by: https://github.com/yewentao256, https://github.com/malfet
2025-10-21 05:34:18 +08:00 · 2025-07-11 15:04:47 +08:00
parent 4283d96bcd
commit 0d17029fea
28 changed files with 63 additions and 54 deletions
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@ -1169,7 +1169,6 @@ exclude_patterns = [
    'aten/src/ATen/[a-mA-M]*/**',
    'test/**',
    'test/[a-hA-h]*/**',
-    'test/distributed/**',
    'torch/_*/**',
    'torch/distributed/tensor/**',
 ]
--- a/test/distributed/_composable/test_replicate.py
+++ b/test/distributed/_composable/test_replicate.py
@ -69,7 +69,7 @@ class ReplicateStateDictTest(MultiProcessTestCase):

    def test_replicate_non_root_multiple_save_load(self):
        """
-        Tests tha replicate() on multiple submodules matches
+        Tests the replicate() on multiple submodules matches
        local module state_dict.
        """
        self._init_pg()
--- a/test/distributed/_shard/sharded_tensor/test_sharded_tensor.py
+++ b/test/distributed/_shard/sharded_tensor/test_sharded_tensor.py
@ -1733,7 +1733,7 @@ class TestShardedTensorEnumerable(ShardedTensorTestBase):
            self.assertEqual(remote_device_before.rank(), remote_device_after.rank())
            self.assertEqual(str(remote_device_after.device()), "cpu")

-        # ensure metdata also get changed to CPU
+        # ensure metadata also get changed to CPU
        metas = new_st.metadata().shards_metadata
        for meta in metas:
            self.assertEqual(str(meta.placement.device()), "cpu")
@ -1764,7 +1764,7 @@ class TestShardedTensorEnumerable(ShardedTensorTestBase):
            self.assertEqual(remote_device_before.rank(), remote_device_after.rank())
            self.assertEqual(str(remote_device_after.device()), "cpu")

-        # ensure metdata also get changed to CPU
+        # ensure metadata also get changed to CPU
        metas = new_st.metadata().shards_metadata
        for meta in metas:
            self.assertEqual(str(meta.placement.device()), "cpu")
@ -1820,7 +1820,7 @@ class TestShardedTensorEnumerable(ShardedTensorTestBase):
            self.assertEqual(str(remote_device_before.device().type), "cpu")
            self.assertEqual(str(remote_device_after.device().type), "cuda")

-        # ensure metdata also get changed to GPU
+        # ensure metadata also get changed to GPU
        metas = new_st_gpu.metadata().shards_metadata
        for meta in metas:
            self.assertEqual(str(meta.placement.device().type), "cuda")
--- a/test/distributed/_tools/test_fsdp2_mem_tracker.py
+++ b/test/distributed/_tools/test_fsdp2_mem_tracker.py
@ -129,7 +129,7 @@ class TestTrackerFullyShard1DTrainingCore(FSDPTest):
    @skip_if_lt_x_gpu(2)
    def test_tracker_non_root_forward_backward(self):
        """
-        Tests tracker accracy when running forward/backward through a non-root.
+        Tests tracker accuracy when running forward/backward through a non-root.
        """
        debug = False
        dev = torch.device(torch.cuda.current_device())
--- a/test/distributed/_tools/test_sac_ilp.py
+++ b/test/distributed/_tools/test_sac_ilp.py
@ -211,7 +211,7 @@ class TestSACILP(TestCase):


 class TestOptimalCheckpointingPolicy(TestCase):
-    # tests are adpated from tests in xformers
+    # tests are adapted from tests in xformers
    # https://github.com/facebookresearch/xformers/blob/c6c0ac31f1b08542a0bc27278c6ed10f825f6963/tests/test_checkpoint.py#L222
    def setUp(self):
        super().setUp()
--- a/test/distributed/checkpoint/e2e/test_fsdp_ep.py
+++ b/test/distributed/checkpoint/e2e/test_fsdp_ep.py
@ -72,7 +72,7 @@ class TestFSDPWithEP(DTensorTestBase, VerifyStateDictMixin):
        mesh_fsdp_tp = init_device_mesh(
            self.device_type, (2, 4), mesh_dim_names=("dp", "tp")
        )
-        # TODO: we are using an internal API atm. Change to a publich API once it is ready.
+        # TODO: we are using an internal API atm. Change to a public API once it is ready.
        mesh_fsdp_ep = _mesh_resources.create_child_mesh(mesh_fsdp_tp, ("dp",))
        del _mesh_resources.child_to_parent_mapping[mesh_fsdp_ep]

--- a/test/distributed/checkpoint/test_state_dict.py
+++ b/test/distributed/checkpoint/test_state_dict.py
@ -109,7 +109,7 @@ class TestStateDict(DTensorTestBase, VerifyStateDictMixin):
            for d_optim in _dist_optim:
                d_optim.step()

-        # We need to ensure gradients don't exist, this the invarient of using DSD.
+        # We need to ensure gradients don't exist, this the invariant of using DSD.
        optim.zero_grad()

        # Get the state_dict, and compare the result
@ -135,7 +135,7 @@ class TestStateDict(DTensorTestBase, VerifyStateDictMixin):
            # We won't be able to load the partial state_dict back.
            return
        # Since we already have the state_dict saved before, no need to call DCP.
-        # We can directly load them back. This asser is to ensure that optimizer
+        # We can directly load them back. This assert is to ensure that optimizer
        # state storage are initialized.
        # self.assertEqual(len(curr_dist_osd[STATE]), len(dist_osd[STATE]))
        set_model_state_dict(
--- a/test/distributed/elastic/rendezvous/api_test.py
+++ b/test/distributed/elastic/rendezvous/api_test.py
@ -140,7 +140,17 @@ class RendezvousParametersTest(TestCase):
                self.assertFalse(params.get_as_bool("dummy_param"))

    def test_get_as_bool_raises_error_if_value_is_invalid(self) -> None:
-        for value in ["01", "Flse", "Ture", "g", "4", "_", "truefalse", 2, -1]:
+        for value in [
+            "01",
+            "Flse",  # codespell:ignore
+            "Ture",  # codespell:ignore
+            "g",
+            "4",
+            "_",
+            "truefalse",
+            2,
+            -1,
+        ]:
            with self.subTest(value=value):
                self._kwargs["dummy_param"] = value

--- a/test/distributed/elastic/test_control_plane.py
+++ b/test/distributed/elastic/test_control_plane.py
@ -71,9 +71,9 @@ class WorkerServerTest(TestCase):
            self.assertEqual(resp.status, 200)
            self.assertIn("ping", json.loads(resp.data))

-            resp = pool.request("POST", "/handler/nonexistant")
+            resp = pool.request("POST", "/handler/nonexistent")
            self.assertEqual(resp.status, 404)
-            self.assertIn(b"Handler nonexistant not found:", resp.data)
+            self.assertIn(b"Handler nonexistent not found:", resp.data)

    @requires_cuda
    def test_dump_nccl_trace_pickle(self) -> None:
@ -207,8 +207,8 @@ class WorkerServerTest(TestCase):
    def test_get_handler_nonexistant(self) -> None:
        from torch._C._distributed_c10d import _get_handler

-        with self.assertRaisesRegex(ValueError, "Failed to find handler nonexistant"):
-            _get_handler("nonexistant")
+        with self.assertRaisesRegex(ValueError, "Failed to find handler nonexistent"):
+            _get_handler("nonexistent")

    def test_get_handler_names(self) -> None:
        from torch._C._distributed_c10d import _get_handler_names
--- a/test/distributed/fsdp/test_fsdp_memory.py
+++ b/test/distributed/fsdp/test_fsdp_memory.py
@ -158,7 +158,7 @@ class TestFSDPMemory(FSDPTest):
        output = cmp(results, expected)
        self.assertEqual(output, "")

-    @unittest.skipIf(TEST_HPU, "Memory will be differnt for CUDA and HPU, skipping")
+    @unittest.skipIf(TEST_HPU, "Memory will be different for CUDA and HPU, skipping")
    @skip_if_lt_x_gpu(2)
    @parametrize("ckpt", ["no_ckpt", "ckpt"])
    def test_fsdp_memory(self, ckpt):
--- a/test/distributed/pipelining/schedule_registry.py
+++ b/test/distributed/pipelining/schedule_registry.py
@ -45,7 +45,7 @@ class ScheduleVShaped(PipelineScheduleMulti):
        )

        # Go through one microbatch
-        # Note(whc) - it might be easier to work with thes schedules by writing them as a list of
+        # Note(whc) - it might be easier to work with this schedules by writing them as a list of
        # ["0F0", ...] and then parsing them in the test infra to turn them into actions.
        self.pipeline_order = {
            0: [
--- a/test/distributed/rpc/test_faulty_agent.py
+++ b/test/distributed/rpc/test_faulty_agent.py
@ -22,7 +22,7 @@ from torch.testing._internal.distributed.rpc_utils import (


 # On CircleCI these tests are already run on CPU jobs, thus to save resources do
-# not run them on GPU jobs, since thet wouldn't provide additional test signal.
+# not run them on GPU jobs, since they wouldn't provide additional test signal.
 if not (IS_CI and torch.cuda.is_available()):
    globals().update(
        generate_tests(
--- a/test/distributed/rpc/test_tensorpipe_agent.py
+++ b/test/distributed/rpc/test_tensorpipe_agent.py
@ -23,7 +23,7 @@ from torch.testing._internal.distributed.rpc_utils import (


 # On CircleCI these tests are already run on CPU jobs, thus to save resources do
-# not run them on GPU jobs, since thet wouldn't provide additional test signal.
+# not run them on GPU jobs, since they wouldn't provide additional test signal.
 if not (IS_CI and torch.cuda.is_available()):
    globals().update(
        generate_tests(
--- a/test/distributed/tensor/experimental/test_local_map.py
+++ b/test/distributed/tensor/experimental/test_local_map.py
@ -90,7 +90,7 @@ class TestLocalMap(DTensorTestBase):
        )  # row-wisely sharded W tensor

        # Test 1: use the function returned from calling local_map
-        # get the function wrapped with DTensor/Tensor convertion
+        # get the function wrapped with DTensor/Tensor conversion
        # mm_allreduce_forward is a function that applies to Tensors with manual collective
        # local_mm_allreduce_forward is the function that does the same but applies to
        # DTensors' `_local_tensor`.
--- a/test/distributed/tensor/experimental/test_tp_transform.py
+++ b/test/distributed/tensor/experimental/test_tp_transform.py
@ -85,7 +85,7 @@ class TensorParallelTest(DTensorTestBase):
        with torch.no_grad():
            tp_res = tp_model(*inputs)
        self.assertEqual(res, tp_res)
-        # Expect all_gather to be inserted to distributed sharded fc resutls
+        # Expect all_gather to be inserted to distributed sharded fc results
        self.assert_has_c10d_ops(
            tp_exported_program.graph_module,
            {
--- a/test/distributed/tensor/test_dtensor.py
+++ b/test/distributed/tensor/test_dtensor.py
@ -438,7 +438,7 @@ class DTensorTest(DTensorTestBase):
        self.assertEqual(type(out_view), AsyncCollectiveTensor)
        self.assertFalse(out.completed)

-        # Use the daa, requiring a sync
+        # Use the data, requiring a sync
        ref = torch.ones((4, 2), device=self.device_type) + 1
        ref = ref.view(-1)
        out_data = out_view + 1
--- a/test/distributed/tensor/test_dtensor_compile.py
+++ b/test/distributed/tensor/test_dtensor_compile.py
@ -220,7 +220,7 @@ def forward(self, b_parametrizations_buffer_original0, x):
            group1 = x.get_group(mesh_dim=1)
            return size, coord, group0, group1

-        # Cant be fullgraph=True because ProcessGroup is not reconstructible in dynamo
+        # Can't be fullgraph=True because ProcessGroup is not reconstructible in dynamo
        compiled_fn = torch.compile(backend="aot_eager")(fn)

        mesh = DeviceMesh(self.device_type, torch.arange(self.world_size).unsqueeze(1))
--- a/test/distributed/tensor/test_embedding_ops.py
+++ b/test/distributed/tensor/test_embedding_ops.py
@ -193,7 +193,7 @@ class TestEmbeddingOp(DTensorTestBase):

        from torch.distributed.tensor._ops._embedding_ops import _MaskPartial

-        # case 1: two embeddings with the same shape, thus sharing the underying _MaskPartial
+        # case 1: two embeddings with the same shape, thus sharing the underlying _MaskPartial
        # and MaskBuffer, because of cache hit from sharding propagation

        emb1 = torch.nn.Embedding(10, 23, device=self.device_type)
--- a/test/distributed/tensor/test_op_strategy.py
+++ b/test/distributed/tensor/test_op_strategy.py
@ -65,7 +65,7 @@ class TestEinsumDims(TestCase):
        self.assertEqual(edims.lhs_out_only_dims, ["c"])
        self.assertEqual(edims.rhs_out_only_dims, [])

-        equation = "abd,bf->abfd"
+        equation = "abd,bf->abfd"  # codespell:ignore
        input_dims, output_dim = EinsumDims.parse_equation(equation)
        edims = EinsumDims.parse_dims(input_dims, output_dim)

--- a/test/distributed/tensor/test_redistribute.py
+++ b/test/distributed/tensor/test_redistribute.py
@ -635,7 +635,7 @@ class MultiDimRedistributeTest(DTensorTestBase):
                dt = distribute_tensor(full_tensor, device_mesh, repl_inputs)

                if repl_inputs != inputs:
-                    # create a new DTensor reinterpreting some of the replicated entires as "Partial"
+                    # create a new DTensor reinterpreting some of the replicated entries as "Partial"
                    dt = DTensor.from_local(
                        dt.to_local(), device_mesh, inputs, run_check=False
                    )
--- a/test/distributed/tensor/test_xla_integration.py
+++ b/test/distributed/tensor/test_xla_integration.py
@ -150,7 +150,7 @@ class DTensorXLAIntegrationTest(TestCase):

        def shard_params(mod_name, mod, mesh):
            shard_spec = [Shard(0)]
-            # annoate fc1 and fc2
+            # annotate fc1 and fc2
            if isinstance(mod, nn.Linear):
                for _, param in mod.named_parameters():
                    # annotate the parameter tensors directly
--- a/test/distributed/test_c10d_nccl.py
+++ b/test/distributed/test_c10d_nccl.py
@ -601,7 +601,7 @@ class ProcessGroupNCCLGroupTest(MultiProcessTestCase):

    def _helper_test_extra_cuda_context_by_nvml(self):
        """
-        A helper for `test_extra_cuda_context`, if pynvml is avaiable.
+        A helper for `test_extra_cuda_context`, if pynvml is available.
        pynvml provides python bindings for NVIDIA NVML functionalities.
        Here we are interested in: nvmlDeviceGetComputeRunningProcesses
        """
@ -634,7 +634,7 @@ class ProcessGroupNCCLGroupTest(MultiProcessTestCase):

    def _helper_test_extra_cuda_context_by_memory(self):
        """
-        A helper for `test_extra_cuda_context`, if pynvml is NOT avaiable.
+        A helper for `test_extra_cuda_context`, if pynvml is NOT available.
        If extra context is created, it would manifest into device 0's memory usage.
        """
        device = torch.device(f"cuda:{self.rank:d}")
@ -1112,7 +1112,7 @@ class ProcessGroupNCCLGroupTest(MultiProcessTestCase):
        os.environ["TORCH_NCCL_NONBLOCKING_TIMEOUT"] = "100"
        store = c10d.FileStore(self.file_name, self.world_size)
        device = torch.device(f"cuda:{self.rank}")
-        # bound device to triger eager init mode
+        # bound device to trigger eager init mode
        pg = self._create_process_group_nccl(store, self.opts(), device_id=device)
        backend = pg._get_backend(torch.device(device))
        self.assertEqual(backend.comm_split_count(), 0)
@ -2995,7 +2995,7 @@ class NcclErrorHandlingTest(MultiProcessTestCase):
            time.sleep(4)
            self.assertEqual(process_group.get_error(), ErrorType.REMOTE_ERROR)

-        # Mimicing all ranks sensing the timeout, abort
+        # Mimicking all ranks sensing the timeout, abort
        process_group.abort()

        if prev_nccl_async_error_handling is not None:
@ -4291,7 +4291,7 @@ class NCCLTraceTestBase(MultiProcessTestCase):

    def _join_processes(self, fn):
        # We need to patch sys.exit() as skip_if will use sys.exit() and
-        # the exit code from the this process will not be catched.
+        # the exit code from the this process will not be caught.
        with mock.patch("sys.exit"):
            fn()
        super()._join_processes(fn)
--- a/test/distributed/test_dynamo_distributed.py
+++ b/test/distributed/test_dynamo_distributed.py
@ -1814,7 +1814,7 @@ class TestSingleProc(DynamoDistributedSingleProcTestCase):

        Note: comptime prints the guards before the time they get installed or not installed, so in both cases
        (skip or no skip) the same guards get printed.  The difference is that in the skip case, they show up
-        with a special 'guard source' which will cuase them to not be installed.  So all we check for is the expected
+        with a special 'guard source' which will cause them to not be installed.  So all we check for is the expected
        guard source 'local_fsdp_module'.
        """
        global GUARDS_FILE
@ -1871,7 +1871,7 @@ class TestSingleProc(DynamoDistributedSingleProcTestCase):

    def test_fsdp_skip_register_attr_or_module(self):
        """
-        ensure FSDP module is not registered as attrbutes
+        ensure FSDP module is not registered as attributes
        in the fx graph
        see `not source.guard_source().is_fsdp_module()`
        before calling `register_attr_or_module`
--- a/test/distributed/test_inductor_collectives.py
+++ b/test/distributed/test_inductor_collectives.py
@ -824,7 +824,7 @@ class TestCollectivesInductor(DynamoDistributedSingleProcTestCase):
        compiled = torch.compile(func)
        out = compiled(inputs, **self.get_world_trs())
        code = run_and_get_triton_code(compiled, inputs, **self.get_world_trs())
-        # NOTE: Make sure we are not unneccessarily copying the outputs of
+        # NOTE: Make sure we are not unnecessarily copying the outputs of
        # wait_tensors before they are returned from the graph.
        (
            FileCheck()
@ -891,7 +891,7 @@ class TestCollectivesInductor(DynamoDistributedSingleProcTestCase):

        compiled = torch.compile(func)
        code = run_and_get_triton_code(compiled, inputs, **self.get_world_trs())
-        # NOTE: Make sure we are not unneccessarily copying the outputs of
+        # NOTE: Make sure we are not unnecessarily copying the outputs of
        # wait_tensors before they are returned from the graph.
        (
            FileCheck()
@ -1356,7 +1356,7 @@ class TestCollectivesInductor(DynamoDistributedSingleProcTestCase):

        compiled = torch.compile(func)
        code = run_and_get_triton_code(compiled, inputs, **self.get_world_trs())
-        # NOTE: Make sure we are not unneccessarily copying the outputs of
+        # NOTE: Make sure we are not unnecessarily copying the outputs of
        # wait_tensors before they are returned from the graph.
        (
            FileCheck()
@ -1403,7 +1403,7 @@ class TestCollectivesInductor(DynamoDistributedSingleProcTestCase):
        compiled = torch.compile(func)
        code = run_and_get_triton_code(compiled, inputs, **self.get_world_trs())
        # NOTE: The first return value should be the output of the first wait_tensor.
-        # We want to make sure no unneccessary copy is made.
+        # We want to make sure no unnecessary copy is made.
        (
            FileCheck()
            .check("buf0 = empty_strided")
@ -1474,7 +1474,7 @@ class TestCollectivesInductor(DynamoDistributedSingleProcTestCase):
            compiled = torch.compile(func)
            code = run_and_get_triton_code(compiled, inputs, **self.get_world_trs())
        # NOTE: The first return value should be the output of the first wait_tensor.
-        # We want to make sure no unneccessary copy is made.
+        # We want to make sure no unnecessary copy is made.
        (
            FileCheck()
            .check("buf0 = empty_strided")
@ -1548,7 +1548,7 @@ class TestCollectivesInductor(DynamoDistributedSingleProcTestCase):
            compiled = torch.compile(func)
            code = run_and_get_triton_code(compiled, *inputs, **self.get_world_trs())
        # NOTE: The first return value should be the output of the first wait_tensor.
-        # We want to make sure no unneccessary copy is made.
+        # We want to make sure no unnecessary copy is made.
        (FileCheck().check("all_gather_into_tensor_out").run(code))
        out = compiled(*inputs, **self.get_world_trs())
        correct = func(*inputs, **self.get_world_trs())
@ -1598,7 +1598,7 @@ class TestCollectivesInductor(DynamoDistributedSingleProcTestCase):
            compiled = torch.compile(func)
            code = run_and_get_triton_code(compiled, *inputs, **self.get_world_trs())
        # NOTE: The first return value should be the output of the first wait_tensor.
-        # We want to make sure no unneccessary copy is made.
+        # We want to make sure no unnecessary copy is made.
        (
            FileCheck()
            .check_count(
@ -1689,7 +1689,7 @@ class TestCollectivesInductor(DynamoDistributedSingleProcTestCase):
            compiled = torch.compile(func)
            code = run_and_get_triton_code(compiled, *inputs, **self.get_world_trs())
        # NOTE: The first return value should be the output of the first wait_tensor.
-        # We want to make sure no unneccessary copy is made.
+        # We want to make sure no unnecessary copy is made.
        (
            FileCheck()
            .check_count(
@ -1785,7 +1785,7 @@ class TestCollectivesInductor(DynamoDistributedSingleProcTestCase):
            compiled = torch.compile(func)
            code = run_and_get_triton_code(compiled, inputs, **self.get_world_trs())
        # NOTE: The first return value should be the output of the first wait_tensor.
-        # We want to make sure no unneccessary copy is made.
+        # We want to make sure no unnecessary copy is made.
        (
            FileCheck()
            .check("all_gather")
--- a/test/distributed/test_nvshmem.py
+++ b/test/distributed/test_nvshmem.py
@ -232,7 +232,7 @@ class NVSHMEMSymmetricMemoryTest(MultiProcContinousTest):
        )
        out = symm_mem.empty(max_out_numel, dtype=dtype, device=self.device).fill_(-1)
        # 3 rows: input splits, output splits, output offsets
-        # Initiallizing all values to -1 to check if they are updated
+        # Initializing all values to -1 to check if they are updated
        in_out_splits = symm_mem.empty(
            (3, nsplits), dtype=torch.int64, device=self.device
        ).fill_(-1)
--- a/test/distributed/test_pg_wrapper.py
+++ b/test/distributed/test_pg_wrapper.py
@ -376,7 +376,7 @@ if not TEST_WITH_DEV_DBG_ASAN:
            ):
                self._create_wrapper_pg(with_new_group=True)
                # nothing to assert, isinstance(pg, _ProcessGroupWrapper)
-                # should never be invoked since it is preceeded by
+                # should never be invoked since it is proceeded by
                # _GLOO_AVAILABLE check, this test will fail on
                # an unexpected NameError if not.

--- a/test/distributed/test_store.py
+++ b/test/distributed/test_store.py
@ -837,9 +837,9 @@ class RendezvousTCPTest(TestCase):
        # not respected, it will take much longer to timeout.
        start = time.time()
        with self.assertRaisesRegex(
-            DistStoreError, "wait timeout after 100ms, keys: /nonexistant key"
+            DistStoreError, "wait timeout after 100ms, keys: /nonexistent key"
        ):
-            store0.get("nonexistant key")
+            store0.get("nonexistent key")

        end = time.time()
        time_diff = end - start
@ -1066,7 +1066,7 @@ class TimeoutTest(TestCase):
            wait_for_workers=False,
        )

-        ths = []
+        threads = []
        for i in range(2):
            t = threading.Thread(
                target=run,
@ -1076,16 +1076,16 @@ class TimeoutTest(TestCase):
                ),
            )
            t.start()
-            ths.append(t)
+            threads.append(t)

        def handler(a, b):
            pass

        signal.signal(signal.SIGUSR1, handler)
        time.sleep(1)
-        signal.pthread_kill(ths[1].ident, signal.SIGUSR1)
+        signal.pthread_kill(threads[1].ident, signal.SIGUSR1)

-        for t in ths:
+        for t in threads:
            t.join()
        self.assertTrue(rank_res[0], "rank0")
        self.assertTrue(rank_res[1], "rank1")
--- a/test/distributed/test_symmetric_memory.py
+++ b/test/distributed/test_symmetric_memory.py
@ -664,7 +664,7 @@ class SymmMemNegativeTest(MultiProcessTestCase):

    # These timeout tests are skipped on ROCm because timeout calls trap(), which
    # is handled differently inside hip runtime. It collects gpu coredump and causes
-    # the linux kernel to create a core dump of the host application. The funcitonality
+    # the linux kernel to create a core dump of the host application. The functionality
    # is there, meaning timeout is happening correctly. However, there isn't a nice way
    # to test it as the current executing thread will coredump and exit.
    @skipIfRocm
@ -690,7 +690,7 @@ class SymmMemNegativeTest(MultiProcessTestCase):

    # These timeout tests are skipped on ROCm because timeout calls trap(), which
    # is handled differently inside hip runtime. It collects gpu coredump and causes
-    # the linux kernel to create a core dump of the host application. The funcitonality
+    # the linux kernel to create a core dump of the host application. The functionality
    # is there, meaning timeout is happening correctly. However, there isn't a nice way
    # to test it as the current executing thread will coredump and exit.
    @skipIfRocm
@ -719,7 +719,7 @@ class SymmMemNegativeTest(MultiProcessTestCase):

    # These timeout tests are skipped on ROCm because timeout calls trap(), which
    # is handled differently inside hip runtime. It collects gpu coredump and causes
-    # the linux kernel to create a core dump of the host application. The funcitonality
+    # the linux kernel to create a core dump of the host application. The functionality
    # is there, meaning timeout is happening correctly. However, there isn't a nice way
    # to test it as the current executing thread will coredump and exit.
    @skipIfRocm