[ROCm] Enabling several UTs (#161715)

All these UTs are working as is, just removing the skip
- test_p2p_ipc
- test_repros.py: working, added fp8 support
- test_activation_checkpointing.py
- test_content_store.py
- test_cuda_multigpu.py
- test_compute_comm_reordering.py
- test_segment_reductions.py
- test_dataloader.py
- test_math_ops.py
- test_loop_ordering.py
- test_control_flow.py
- distributed_test.py
- test_mem_tracker.py
- test_fsdp_optim_state.py
- test_fully_shard_mixed_precision.py: skippped for < ROCm7.0
- test_aot_inductor_custom_ops.py
- test_c10d_ops_nccl.py
- test_eager_transforms.py
- test_sparse_csr.py
- test_inductor_collectives.py
- test_fake_tensor.py
- test_cupy_as_tensor.py
- test_cuda.py: enable UTs that are working
- test_matmul_cuda.py: enable UTs that are working

Fixes #ISSUE_NUMBER

Pull Request resolved: https://github.com/pytorch/pytorch/pull/161715
Approved by: https://github.com/pruthvistony, https://github.com/jeffdaily
This commit is contained in:
Prachi Gupta
2025-09-04 20:42:59 +00:00
committed by PyTorch MergeBot
parent d5b38410b5
commit b9ba612f7a
25 changed files with 24 additions and 82 deletions

View File

@ -31,7 +31,6 @@ from torch.testing._internal.common_utils import (
run_tests,
serialTest,
skipCUDANonDefaultStreamIf,
skipIfRocm,
TEST_CUDA,
TestCase,
)
@ -777,8 +776,6 @@ class TestCudaMultiGPU(TestCase):
p2c.get()
c2p.put(sync_func(self, TestCudaMultiGPU.FIFTY_MIL_CYCLES))
# Skip the test for ROCm as per https://github.com/pytorch/pytorch/issues/53190
@skipIfRocm
@unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
def test_stream_event_nogil(self):
for sync_func in [
@ -819,7 +816,6 @@ class TestCudaMultiGPU(TestCase):
self.assertGreater(parent_time + child_time, total_time * 1.3)
# This test is flaky for ROCm, see issue #62602
@skipIfRocm
@unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
def test_events_wait(self):
d0 = torch.device("cuda:0")
@ -888,7 +884,6 @@ class TestCudaMultiGPU(TestCase):
self.assertTrue(e1.query())
@unittest.skipIf(not TEST_MULTIGPU, "detected only one GPU")
@skipIfRocm
def test_events_multi_gpu_elapsed_time(self):
d0 = torch.device("cuda:0")
d1 = torch.device("cuda:1")