Revert "[distributed] Replace assert statements with AssertionError exceptions (#165216)"

This reverts commit 74db92b21868b7e9e77cc966e5d57a8246723cbd.

Reverted https://github.com/pytorch/pytorch/pull/165216 on behalf of https://github.com/clee2000 due to I think this broke distributed/test_pg_wrapper.py::ProcessGroupNCCLWrapperTest::test_debug_level_detail_no_gloo [GH job link](https://github.com/pytorch/pytorch/actions/runs/18492765290/job/52693842750) [HUD commit link](74db92b218), note to self: bad TD ([comment](https://github.com/pytorch/pytorch/pull/165216#issuecomment-3402838765))
This commit is contained in:
PyTorch MergeBot
2025-10-14 17:05:16 +00:00
parent 5eddbb5e47
commit d2494cbb2b
11 changed files with 136 additions and 222 deletions

View File

@ -792,12 +792,8 @@ def _get_logs_specs_class(logs_specs_name: Optional[str]) -> type[LogsSpecs]:
def config_from_args(args) -> tuple[LaunchConfig, Union[Callable, str], list[str]]:
# If ``args`` not passed, defaults to ``sys.argv[:1]``
min_nodes, max_nodes = parse_min_max_nnodes(args.nnodes)
if not (0 < min_nodes <= max_nodes):
raise AssertionError(
f"min_nodes must be > 0 and <= max_nodes, got min_nodes={min_nodes}, max_nodes={max_nodes}"
)
if args.max_restarts < 0:
raise AssertionError("max_restarts must be >= 0")
assert 0 < min_nodes <= max_nodes
assert args.max_restarts >= 0
if (
hasattr(args, "master_addr")
@ -837,8 +833,7 @@ def config_from_args(args) -> tuple[LaunchConfig, Union[Callable, str], list[str
if args.local_ranks_filter:
try:
ranks = set(map(int, args.local_ranks_filter.split(",")))
if not ranks:
raise AssertionError("ranks set cannot be empty")
assert ranks
except Exception as e:
raise ValueError(
"--local_ranks_filter must be a comma-separated list of integers e.g. --local_ranks_filter=0,1,2"