Files
pytorch/torch/distributed/_distributed_c10d.py
Edward Yang 6c334885d4 [RELAND] Always build USE_DISTRIBUTED (#160449) and Make distributed modules importable even when backend not built (#159889) (#162594)
Summary:
Original: D81957844 and D81957923

Also, https://github.com/pytorch/pytorch/pull/162142 is patched in as well

#buildall

Test Plan:
sandcastle and oss ci

Rollback Plan:

Reviewed By: H-Huang

Pull Request resolved: https://github.com/pytorch/pytorch/pull/162594
Approved by: https://github.com/H-Huang, https://github.com/dcci
2025-09-12 10:54:42 +00:00

246 lines
6.2 KiB
Python

# mypy: disable-error-code="assignment"
# noqa: F401
"""
Centralized module for importing and re-exporting torch._C._distributed_c10d components.
IMPORTANT PATTERN:
Never access torch._C._distributed_c10d directly in code. Always import from and use
torch.distributed._distributed_c10d which is guaranteed to have all functions available.
Example:
# WRONG: torch._C._distributed_c10d._set_global_rank(rank)
# RIGHT:
from torch.distributed._distributed_c10d import _set_global_rank
_set_global_rank(rank)
"""
from typing import TYPE_CHECKING
# Import all core distributed components from the C extension
# NB: This list has to be spelled out because the _C module doesn't have __all__
from torch._C._distributed_c10d import (
_allow_inflight_collective_as_graph_input,
_broadcast_coalesced,
_compute_bucket_assignment_by_size,
_ControlCollectives,
_current_process_group,
_DEFAULT_FIRST_BUCKET_BYTES,
_DEFAULT_PG_TIMEOUT,
_DistributedBackendOptions,
_make_nccl_premul_sum,
_register_builtin_comm_hook,
_register_comm_hook,
_register_process_group,
_register_work,
_resolve_process_group,
_set_allow_inflight_collective_as_graph_input,
_set_global_rank,
_set_process_group,
_StoreCollectives,
_test_python_store,
_unregister_all_process_groups,
_unregister_process_group,
_verify_params_across_processes,
_WorkerServer,
AllgatherOptions,
AllreduceCoalescedOptions,
AllreduceOptions,
AllToAllOptions,
Backend,
BarrierOptions,
BroadcastOptions,
BuiltinCommHookType,
DebugLevel,
FakeProcessGroup,
FakeWork,
FileStore,
GatherOptions,
get_debug_level,
GradBucket,
Logger,
PrefixStore,
ProcessGroup,
ReduceOp,
ReduceOptions,
Reducer,
ReduceScatterOptions,
ScatterOptions,
set_debug_level,
set_debug_level_from_env,
Store,
TCPStore,
Work,
)
# Backend-specific components that may not be available
_MPI_AVAILABLE = False
_NCCL_AVAILABLE = False
_GLOO_AVAILABLE = False
_UCC_AVAILABLE = False
_XCCL_AVAILABLE = False
# HashStore
try:
from torch._C._distributed_c10d import HashStore
except ImportError:
if not TYPE_CHECKING:
from torch.distributed._C_stubs import HashStore
# NVSHMEM/SymmetricMemory components
# There are multiple backends for SymmetricMemory, as a result,
# _SymmetricMemory should not be imported together with NVSHMEM related modules.
try:
from torch._C._distributed_c10d import _SymmetricMemory
except ImportError:
if not TYPE_CHECKING:
from torch.distributed._C_stubs import _SymmetricMemory
try:
from torch._C._distributed_c10d import (
_is_nvshmem_available,
_nvshmemx_cumodule_init,
)
except ImportError:
if not TYPE_CHECKING:
from torch.distributed._C_stubs import (
_is_nvshmem_available,
_nvshmemx_cumodule_init,
)
# MPI backend
try:
from torch._C._distributed_c10d import ProcessGroupMPI
_MPI_AVAILABLE = True
except ImportError:
if not TYPE_CHECKING:
from torch.distributed._C_stubs import ProcessGroupMPI
# NCCL backend
try:
from torch._C._distributed_c10d import (
_DEFAULT_PG_NCCL_TIMEOUT,
_dump_nccl_trace,
_dump_nccl_trace_json,
_hash_tensors,
ProcessGroupNCCL,
)
_NCCL_AVAILABLE = True
except ImportError:
if not TYPE_CHECKING:
from torch.distributed._C_stubs import (
_DEFAULT_PG_NCCL_TIMEOUT,
_dump_nccl_trace,
_dump_nccl_trace_json,
_hash_tensors,
ProcessGroupNCCL,
)
# Gloo backend
try:
from torch._C._distributed_c10d import _ProcessGroupWrapper, ProcessGroupGloo
_GLOO_AVAILABLE = True
except ImportError:
if not TYPE_CHECKING:
from torch.distributed._C_stubs import _ProcessGroupWrapper, ProcessGroupGloo
# UCC backend
try:
from torch._C._distributed_c10d import ProcessGroupUCC
_UCC_AVAILABLE = True
except ImportError:
if not TYPE_CHECKING:
from torch.distributed._C_stubs import ProcessGroupUCC
# XCCL backend
try:
from torch._C._distributed_c10d import ProcessGroupXCCL
_XCCL_AVAILABLE = True
except ImportError:
if not TYPE_CHECKING:
from torch.distributed._C_stubs import ProcessGroupXCCL
# Provide backwards compatibility by making all symbols available at module level
__all__ = [
# Basic components
"_broadcast_coalesced",
"_compute_bucket_assignment_by_size",
"_ControlCollectives",
"_DEFAULT_FIRST_BUCKET_BYTES",
"_DEFAULT_PG_TIMEOUT",
"_DEFAULT_PG_NCCL_TIMEOUT",
"_make_nccl_premul_sum",
"_register_builtin_comm_hook",
"_register_comm_hook",
"_StoreCollectives",
"_test_python_store",
"_verify_params_across_processes",
"_allow_inflight_collective_as_graph_input",
"_register_work",
"_set_allow_inflight_collective_as_graph_input",
"_is_nvshmem_available",
"_nvshmemx_cumodule_init",
"_SymmetricMemory",
"_hash_tensors",
"_set_global_rank",
"_dump_nccl_trace",
"_dump_nccl_trace_json",
"Backend",
"BuiltinCommHookType",
"DebugLevel",
"FakeProcessGroup",
"FileStore",
"get_debug_level",
"GradBucket",
"HashStore",
"Logger",
"PrefixStore",
"ProcessGroup",
"Reducer",
"ReduceOp",
"set_debug_level",
"set_debug_level_from_env",
"Store",
"TCPStore",
"Work",
"FakeWork",
# Additional distributed_c10d components
"_DistributedBackendOptions",
"_register_process_group",
"_resolve_process_group",
"_unregister_all_process_groups",
"_unregister_process_group",
"_current_process_group",
"_set_process_group",
"_WorkerServer",
"AllgatherOptions",
"AllreduceCoalescedOptions",
"AllreduceOptions",
"AllToAllOptions",
"BarrierOptions",
"BroadcastOptions",
"GatherOptions",
"ReduceOptions",
"ReduceScatterOptions",
"ScatterOptions",
# Process group implementations
"ProcessGroupMPI",
"ProcessGroupNCCL",
"ProcessGroupGloo",
"ProcessGroupUCC",
"ProcessGroupXCCL",
"_ProcessGroupWrapper",
# Availability flags
"_MPI_AVAILABLE",
"_NCCL_AVAILABLE",
"_GLOO_AVAILABLE",
"_UCC_AVAILABLE",
"_XCCL_AVAILABLE",
]