Revert "[CI] Add Compiled DDP and Compiled FSDP2 tests to test_inductor_distributed (#138178)"

This reverts commit 8cb91109061648497ca09d6f1f9b9e13a2f5557e.

Reverted https://github.com/pytorch/pytorch/pull/138178 on behalf of https://github.com/yf225 due to because https://github.com/pytorch/pytorch/pull/138174 is reverted, we need to revert this too ([comment](https://github.com/pytorch/pytorch/pull/138178#issuecomment-2422961292))
This commit is contained in:
PyTorch MergeBot
2024-10-18 17:51:54 +00:00
parent 59158f640c
commit ada7a8c217
3 changed files with 4 additions and 15 deletions

View File

@ -320,7 +320,6 @@ test_inductor_distributed() {
python test/run_test.py -i distributed/test_c10d_functional_native.py --verbose
python test/run_test.py -i distributed/_tensor/test_dtensor_compile.py --verbose
python test/run_test.py -i distributed/tensor/parallel/test_micro_pipeline_tp.py --verbose
python test/run_test.py -i distributed/_composable/test_replicate_with_compiler.py --verbose
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_comm.py --verbose
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_multi_group --verbose
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_with_activation_checkpointing --verbose
@ -332,7 +331,6 @@ test_inductor_distributed() {
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_compute_dtype --verbose
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_reduce_dtype --verbose
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_clip_grad_norm_.py -k test_clip_grad_norm_2d --verbose
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_compile.py --verbose
python test/run_test.py -i distributed/fsdp/test_fsdp_tp_integration.py -k test_fsdp_tp_integration --verbose
# this runs on both single-gpu and multi-gpu instance. It should be smart about skipping tests that aren't supported

View File

@ -613,8 +613,7 @@ val.shape: {[node.meta['val'].shape for node in aliased_graph_inputs]},
@skipIfRocm
@unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch")
def test_nested_fully_shard_backend_aot_eager(self):
# TODO: fix fwd_fullgraph=False case
for fwd_fullgraph in [True]:
for fwd_fullgraph in [True, False]:
self._test_traceable_fsdp(
*self._create_nested_fully_shard_factory_fns(
fwd_fullgraph=fwd_fullgraph
@ -626,8 +625,7 @@ val.shape: {[node.meta['val'].shape for node in aliased_graph_inputs]},
@skipIfRocm
@unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch")
def test_nested_fully_shard_backend_aot_eager_decomp_partition(self):
# TODO: fix fwd_fullgraph=False case
for fwd_fullgraph in [True]:
for fwd_fullgraph in [True, False]:
self._test_traceable_fsdp(
*self._create_nested_fully_shard_factory_fns(
fwd_fullgraph=fwd_fullgraph
@ -732,7 +730,6 @@ val.shape: {[node.meta['val'].shape for node in aliased_graph_inputs]},
)
file_check.run(bwd_code)
@unittest.skip("TODO: fix fwd_fullgraph=False case")
@skipIfRocm
@unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch")
def test_nested_fully_shard_backend_inductor_fullgraph_False(self):
@ -813,9 +810,8 @@ val.shape: {[node.meta['val'].shape for node in aliased_graph_inputs]},
@skipIfRocm
@unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch")
def test_transformer_backend_aot_eager(self):
# TODO: fix fwd_fullgraph=False case
for fwd_fullgraph, all_requires_grad in itertools.product(
[True], [True, False]
[True, False], [True, False]
):
with self._maybe_add_graph_break_to_sdpa(
fwd_fullgraph
@ -833,9 +829,8 @@ val.shape: {[node.meta['val'].shape for node in aliased_graph_inputs]},
# TODO: native_dropout has worse accuracy after decomp, need to figure out why
@torch._inductor.config.patch(fallback_random=True)
def test_transformer_backend_aot_eager_decomp_partition(self):
# TODO: fix fwd_fullgraph=False case
for fwd_fullgraph, all_requires_grad in itertools.product(
[True], [True, False]
[True, False], [True, False]
):
with self._maybe_add_graph_break_to_sdpa(fwd_fullgraph):
self._test_traceable_fsdp(
@ -951,7 +946,6 @@ val.shape: {[node.meta['val'].shape for node in aliased_graph_inputs]},
)
file_check.run(bwd_code)
@unittest.skip("TODO: fix fwd_fullgraph=False case")
@skipIfRocm
@unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch")
# TODO: native_dropout causes CUDA IMA error, need to figure out why

View File

@ -385,9 +385,6 @@ class DDP_TP_Test(InductorTestCase):
def tearDown(self):
dist.destroy_process_group()
@unittest.skip(
"Temporarily disabled due to SymInt error: `unhashable type: non-nested SymInt`"
)
@unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch")
@skipIfRocm
def test_ddp_tp(self):