mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Revert "Fix decorators skipping NCCL tests (#158846)"
This reverts commit 57024913c409764f129d6a7792625f5b05462e31.
Reverted https://github.com/pytorch/pytorch/pull/158846 on behalf of https://github.com/ZainRizvi due to Sorry but this is breaking trunk. See distributed/_composable/fsdp/test_fully_shard_logging.py::LoggingTests::test_fsdp_logging [GH job link](https://github.com/pytorch/pytorch/actions/runs/16472103496/job/46564570609) [HUD commit link](57024913c4
) ([comment](https://github.com/pytorch/pytorch/pull/158846#issuecomment-3109553414))
This commit is contained in:
@ -118,17 +118,14 @@ def requires_ddp_rank(device):
|
||||
return device in DDP_RANK_DEVICES
|
||||
|
||||
|
||||
def exit_if_lt_x_gpu(x):
|
||||
if torch.cuda.device_count() < x:
|
||||
sys.exit(TEST_SKIPS[f"multi-gpu-{x}"].exit_code)
|
||||
|
||||
|
||||
def skip_if_no_gpu(func):
|
||||
"""Skips if the world size exceeds the number of GPUs, ensuring that if the
|
||||
test is run, each rank has its own GPU via ``torch.cuda.device(rank)``."""
|
||||
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
if not (TEST_CUDA or TEST_HPU or TEST_XPU):
|
||||
sys.exit(TEST_SKIPS["no_cuda"].exit_code)
|
||||
world_size = int(os.environ["WORLD_SIZE"])
|
||||
if TEST_CUDA and torch.cuda.device_count() < world_size:
|
||||
sys.exit(TEST_SKIPS[f"multi-gpu-{world_size}"].exit_code)
|
||||
@ -139,9 +136,7 @@ def skip_if_no_gpu(func):
|
||||
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return unittest.skipUnless(
|
||||
TEST_CUDA or TEST_HPU or TEST_XPU, TEST_SKIPS["no_cuda"].message
|
||||
)(wrapper)
|
||||
return wrapper
|
||||
|
||||
|
||||
# TODO (kwen2501): what is the purpose of this decorator? Tests with this
|
||||
@ -173,16 +168,33 @@ def skip_if_odd_worldsize(func):
|
||||
|
||||
|
||||
def require_n_gpus_for_nccl_backend(n, backend):
|
||||
return skip_if_lt_x_gpu(n) if backend == "nccl" else unittest.skipIf(False, None)
|
||||
def decorator(func):
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
if backend == "nccl" and torch.cuda.device_count() < n:
|
||||
sys.exit(TEST_SKIPS[f"multi-gpu-{n}"].exit_code)
|
||||
else:
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
def import_transformers_or_skip():
|
||||
try:
|
||||
from transformers import AutoModelForMaskedLM, BertConfig # noqa: F401
|
||||
def decorator(func):
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
try:
|
||||
from transformers import AutoModelForMaskedLM, BertConfig # noqa: F401
|
||||
|
||||
return unittest.skipIf(False)
|
||||
except ImportError:
|
||||
return unittest.skip(TEST_SKIPS["importerror"].message)
|
||||
return func(*args, **kwargs)
|
||||
except ImportError:
|
||||
sys.exit(TEST_SKIPS["importerror"].exit_code)
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
def at_least_x_gpu(x):
|
||||
@ -196,7 +208,36 @@ def at_least_x_gpu(x):
|
||||
|
||||
|
||||
def skip_if_lt_x_gpu(x):
|
||||
return unittest.skipUnless(at_least_x_gpu(x), TEST_SKIPS[f"multi-gpu-{x}"].message)
|
||||
def decorator(func):
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
if torch.cuda.is_available() and torch.cuda.device_count() >= x:
|
||||
return func(*args, **kwargs)
|
||||
if TEST_HPU and torch.hpu.device_count() >= x:
|
||||
return func(*args, **kwargs)
|
||||
if TEST_XPU and torch.xpu.device_count() >= x:
|
||||
return func(*args, **kwargs)
|
||||
sys.exit(TEST_SKIPS[f"multi-gpu-{x}"].exit_code)
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
# This decorator helps avoiding initializing cuda while testing other backends
|
||||
def nccl_skip_if_lt_x_gpu(backend, x):
|
||||
def decorator(func):
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
if backend != "nccl":
|
||||
return func(*args, **kwargs)
|
||||
if torch.cuda.is_available() and torch.cuda.device_count() >= x:
|
||||
return func(*args, **kwargs)
|
||||
sys.exit(TEST_SKIPS[f"multi-gpu-{x}"].exit_code)
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
def verify_ddp_error_logged(model_DDP, err_substr):
|
||||
@ -372,7 +413,14 @@ def requires_multicast_support():
|
||||
def skip_if_rocm_multiprocess(func):
|
||||
"""Skips a test for ROCm"""
|
||||
func.skip_if_rocm_multiprocess = True
|
||||
return unittest.skipUnless(TEST_WITH_ROCM, TEST_SKIPS["skipIfRocm"].message)(func)
|
||||
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
if not TEST_WITH_ROCM:
|
||||
return func(*args, **kwargs)
|
||||
sys.exit(TEST_SKIPS["skipIfRocm"].exit_code)
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
def skip_if_win32():
|
||||
|
Reference in New Issue
Block a user