Revert "Fix decorators skipping NCCL tests (#158846)"

This reverts commit 57024913c409764f129d6a7792625f5b05462e31.

Reverted https://github.com/pytorch/pytorch/pull/158846 on behalf of https://github.com/ZainRizvi due to Sorry but this is breaking trunk. See distributed/_composable/fsdp/test_fully_shard_logging.py::LoggingTests::test_fsdp_logging [GH job link](https://github.com/pytorch/pytorch/actions/runs/16472103496/job/46564570609) [HUD commit link](57024913c4) ([comment](https://github.com/pytorch/pytorch/pull/158846#issuecomment-3109553414))
This commit is contained in:
PyTorch MergeBot
2025-07-23 17:47:35 +00:00
parent 41b6cdaf76
commit 30b0ad5c68
5 changed files with 100 additions and 37 deletions

View File

@ -118,17 +118,14 @@ def requires_ddp_rank(device):
return device in DDP_RANK_DEVICES
def exit_if_lt_x_gpu(x):
if torch.cuda.device_count() < x:
sys.exit(TEST_SKIPS[f"multi-gpu-{x}"].exit_code)
def skip_if_no_gpu(func):
"""Skips if the world size exceeds the number of GPUs, ensuring that if the
test is run, each rank has its own GPU via ``torch.cuda.device(rank)``."""
@wraps(func)
def wrapper(*args, **kwargs):
if not (TEST_CUDA or TEST_HPU or TEST_XPU):
sys.exit(TEST_SKIPS["no_cuda"].exit_code)
world_size = int(os.environ["WORLD_SIZE"])
if TEST_CUDA and torch.cuda.device_count() < world_size:
sys.exit(TEST_SKIPS[f"multi-gpu-{world_size}"].exit_code)
@ -139,9 +136,7 @@ def skip_if_no_gpu(func):
return func(*args, **kwargs)
return unittest.skipUnless(
TEST_CUDA or TEST_HPU or TEST_XPU, TEST_SKIPS["no_cuda"].message
)(wrapper)
return wrapper
# TODO (kwen2501): what is the purpose of this decorator? Tests with this
@ -173,16 +168,33 @@ def skip_if_odd_worldsize(func):
def require_n_gpus_for_nccl_backend(n, backend):
return skip_if_lt_x_gpu(n) if backend == "nccl" else unittest.skipIf(False, None)
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
if backend == "nccl" and torch.cuda.device_count() < n:
sys.exit(TEST_SKIPS[f"multi-gpu-{n}"].exit_code)
else:
return func(*args, **kwargs)
return wrapper
return decorator
def import_transformers_or_skip():
try:
from transformers import AutoModelForMaskedLM, BertConfig # noqa: F401
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
try:
from transformers import AutoModelForMaskedLM, BertConfig # noqa: F401
return unittest.skipIf(False)
except ImportError:
return unittest.skip(TEST_SKIPS["importerror"].message)
return func(*args, **kwargs)
except ImportError:
sys.exit(TEST_SKIPS["importerror"].exit_code)
return wrapper
return decorator
def at_least_x_gpu(x):
@ -196,7 +208,36 @@ def at_least_x_gpu(x):
def skip_if_lt_x_gpu(x):
return unittest.skipUnless(at_least_x_gpu(x), TEST_SKIPS[f"multi-gpu-{x}"].message)
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
if torch.cuda.is_available() and torch.cuda.device_count() >= x:
return func(*args, **kwargs)
if TEST_HPU and torch.hpu.device_count() >= x:
return func(*args, **kwargs)
if TEST_XPU and torch.xpu.device_count() >= x:
return func(*args, **kwargs)
sys.exit(TEST_SKIPS[f"multi-gpu-{x}"].exit_code)
return wrapper
return decorator
# This decorator helps avoiding initializing cuda while testing other backends
def nccl_skip_if_lt_x_gpu(backend, x):
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
if backend != "nccl":
return func(*args, **kwargs)
if torch.cuda.is_available() and torch.cuda.device_count() >= x:
return func(*args, **kwargs)
sys.exit(TEST_SKIPS[f"multi-gpu-{x}"].exit_code)
return wrapper
return decorator
def verify_ddp_error_logged(model_DDP, err_substr):
@ -372,7 +413,14 @@ def requires_multicast_support():
def skip_if_rocm_multiprocess(func):
"""Skips a test for ROCm"""
func.skip_if_rocm_multiprocess = True
return unittest.skipUnless(TEST_WITH_ROCM, TEST_SKIPS["skipIfRocm"].message)(func)
@wraps(func)
def wrapper(*args, **kwargs):
if not TEST_WITH_ROCM:
return func(*args, **kwargs)
sys.exit(TEST_SKIPS["skipIfRocm"].exit_code)
return wrapper
def skip_if_win32():