mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Revert "[CI] Add Compiled DDP and Compiled FSDP2 tests to test_inductor_distributed (#138178)"
This reverts commit 8cb91109061648497ca09d6f1f9b9e13a2f5557e. Reverted https://github.com/pytorch/pytorch/pull/138178 on behalf of https://github.com/yf225 due to because https://github.com/pytorch/pytorch/pull/138174 is reverted, we need to revert this too ([comment](https://github.com/pytorch/pytorch/pull/138178#issuecomment-2422961292))
This commit is contained in:
@ -320,7 +320,6 @@ test_inductor_distributed() {
|
||||
python test/run_test.py -i distributed/test_c10d_functional_native.py --verbose
|
||||
python test/run_test.py -i distributed/_tensor/test_dtensor_compile.py --verbose
|
||||
python test/run_test.py -i distributed/tensor/parallel/test_micro_pipeline_tp.py --verbose
|
||||
python test/run_test.py -i distributed/_composable/test_replicate_with_compiler.py --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_comm.py --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_multi_group --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_with_activation_checkpointing --verbose
|
||||
@ -332,7 +331,6 @@ test_inductor_distributed() {
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_compute_dtype --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_reduce_dtype --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_clip_grad_norm_.py -k test_clip_grad_norm_2d --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_compile.py --verbose
|
||||
python test/run_test.py -i distributed/fsdp/test_fsdp_tp_integration.py -k test_fsdp_tp_integration --verbose
|
||||
|
||||
# this runs on both single-gpu and multi-gpu instance. It should be smart about skipping tests that aren't supported
|
||||
|
@ -613,8 +613,7 @@ val.shape: {[node.meta['val'].shape for node in aliased_graph_inputs]},
|
||||
@skipIfRocm
|
||||
@unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch")
|
||||
def test_nested_fully_shard_backend_aot_eager(self):
|
||||
# TODO: fix fwd_fullgraph=False case
|
||||
for fwd_fullgraph in [True]:
|
||||
for fwd_fullgraph in [True, False]:
|
||||
self._test_traceable_fsdp(
|
||||
*self._create_nested_fully_shard_factory_fns(
|
||||
fwd_fullgraph=fwd_fullgraph
|
||||
@ -626,8 +625,7 @@ val.shape: {[node.meta['val'].shape for node in aliased_graph_inputs]},
|
||||
@skipIfRocm
|
||||
@unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch")
|
||||
def test_nested_fully_shard_backend_aot_eager_decomp_partition(self):
|
||||
# TODO: fix fwd_fullgraph=False case
|
||||
for fwd_fullgraph in [True]:
|
||||
for fwd_fullgraph in [True, False]:
|
||||
self._test_traceable_fsdp(
|
||||
*self._create_nested_fully_shard_factory_fns(
|
||||
fwd_fullgraph=fwd_fullgraph
|
||||
@ -732,7 +730,6 @@ val.shape: {[node.meta['val'].shape for node in aliased_graph_inputs]},
|
||||
)
|
||||
file_check.run(bwd_code)
|
||||
|
||||
@unittest.skip("TODO: fix fwd_fullgraph=False case")
|
||||
@skipIfRocm
|
||||
@unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch")
|
||||
def test_nested_fully_shard_backend_inductor_fullgraph_False(self):
|
||||
@ -813,9 +810,8 @@ val.shape: {[node.meta['val'].shape for node in aliased_graph_inputs]},
|
||||
@skipIfRocm
|
||||
@unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch")
|
||||
def test_transformer_backend_aot_eager(self):
|
||||
# TODO: fix fwd_fullgraph=False case
|
||||
for fwd_fullgraph, all_requires_grad in itertools.product(
|
||||
[True], [True, False]
|
||||
[True, False], [True, False]
|
||||
):
|
||||
with self._maybe_add_graph_break_to_sdpa(
|
||||
fwd_fullgraph
|
||||
@ -833,9 +829,8 @@ val.shape: {[node.meta['val'].shape for node in aliased_graph_inputs]},
|
||||
# TODO: native_dropout has worse accuracy after decomp, need to figure out why
|
||||
@torch._inductor.config.patch(fallback_random=True)
|
||||
def test_transformer_backend_aot_eager_decomp_partition(self):
|
||||
# TODO: fix fwd_fullgraph=False case
|
||||
for fwd_fullgraph, all_requires_grad in itertools.product(
|
||||
[True], [True, False]
|
||||
[True, False], [True, False]
|
||||
):
|
||||
with self._maybe_add_graph_break_to_sdpa(fwd_fullgraph):
|
||||
self._test_traceable_fsdp(
|
||||
@ -951,7 +946,6 @@ val.shape: {[node.meta['val'].shape for node in aliased_graph_inputs]},
|
||||
)
|
||||
file_check.run(bwd_code)
|
||||
|
||||
@unittest.skip("TODO: fix fwd_fullgraph=False case")
|
||||
@skipIfRocm
|
||||
@unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch")
|
||||
# TODO: native_dropout causes CUDA IMA error, need to figure out why
|
||||
|
@ -385,9 +385,6 @@ class DDP_TP_Test(InductorTestCase):
|
||||
def tearDown(self):
|
||||
dist.destroy_process_group()
|
||||
|
||||
@unittest.skip(
|
||||
"Temporarily disabled due to SymInt error: `unhashable type: non-nested SymInt`"
|
||||
)
|
||||
@unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch")
|
||||
@skipIfRocm
|
||||
def test_ddp_tp(self):
|
||||
|
Reference in New Issue
Block a user