[ROCm] Enabling several UTs (#161715)

All these UTs are working as is, just removing the skip - test_p2p_ipc - test_repros.py: working, added fp8 support - test_activation_checkpointing.py - test_content_store.py - test_cuda_multigpu.py - test_compute_comm_reordering.py - test_segment_reductions.py - test_dataloader.py - test_math_ops.py - test_loop_ordering.py - test_control_flow.py - distributed_test.py - test_mem_tracker.py - test_fsdp_optim_state.py - test_fully_shard_mixed_precision.py: skippped for < ROCm7.0 - test_aot_inductor_custom_ops.py - test_c10d_ops_nccl.py - test_eager_transforms.py - test_sparse_csr.py - test_inductor_collectives.py - test_fake_tensor.py - test_cupy_as_tensor.py - test_cuda.py: enable UTs that are working - test_matmul_cuda.py: enable UTs that are working Fixes #ISSUE_NUMBER Pull Request resolved: https://github.com/pytorch/pytorch/pull/161715 Approved by: https://github.com/pruthvistony, https://github.com/jeffdaily
2025-10-20 21:14:14 +08:00 · 2025-09-04 20:42:59 +00:00
parent d5b38410b5
commit b9ba612f7a
25 changed files with 24 additions and 82 deletions
--- a/test/test_cuda_multigpu.py
+++ b/test/test_cuda_multigpu.py
@ -31,7 +31,6 @@ from torch.testing._internal.common_utils import (
    run_tests,
    serialTest,
    skipCUDANonDefaultStreamIf,
-    skipIfRocm,
    TEST_CUDA,
    TestCase,
 )
@ -777,8 +776,6 @@ class TestCudaMultiGPU(TestCase):
            p2c.get()
            c2p.put(sync_func(self, TestCudaMultiGPU.FIFTY_MIL_CYCLES))

-    # Skip the test for ROCm as per https://github.com/pytorch/pytorch/issues/53190
-    @skipIfRocm
    @unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
    def test_stream_event_nogil(self):
        for sync_func in [
@ -819,7 +816,6 @@ class TestCudaMultiGPU(TestCase):
            self.assertGreater(parent_time + child_time, total_time * 1.3)

    # This test is flaky for ROCm, see issue #62602
-    @skipIfRocm
    @unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
    def test_events_wait(self):
        d0 = torch.device("cuda:0")
@ -888,7 +884,6 @@ class TestCudaMultiGPU(TestCase):
            self.assertTrue(e1.query())

    @unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
-    @skipIfRocm
    def test_events_multi_gpu_elapsed_time(self):
        d0 = torch.device("cuda:0")
        d1 = torch.device("cuda:1")