mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
Compare commits
2 Commits
v0.11.0rc1
...
tms/distri
Author | SHA1 | Date | |
---|---|---|---|
2f86f710dd | |||
feeb17303d |
@ -222,10 +222,15 @@ class GroupCoordinator:
|
|||||||
|
|
||||||
for ranks in group_ranks:
|
for ranks in group_ranks:
|
||||||
device_group = torch.distributed.new_group(
|
device_group = torch.distributed.new_group(
|
||||||
ranks, backend=torch_distributed_backend)
|
ranks,
|
||||||
|
backend=torch_distributed_backend,
|
||||||
|
timeout=envs.VLLM_DISTRIBUTED_INIT_TIMEOUT_SECONDS)
|
||||||
# a group with `gloo` backend, to allow direct coordination between
|
# a group with `gloo` backend, to allow direct coordination between
|
||||||
# processes through the CPU.
|
# processes through the CPU.
|
||||||
cpu_group = torch.distributed.new_group(ranks, backend="gloo")
|
cpu_group = torch.distributed.new_group(
|
||||||
|
ranks,
|
||||||
|
backend="gloo",
|
||||||
|
timeout=envs.VLLM_DISTRIBUTED_INIT_TIMEOUT_SECONDS)
|
||||||
if self.rank in ranks:
|
if self.rank in ranks:
|
||||||
self.ranks = ranks
|
self.ranks = ranks
|
||||||
self.world_size = len(ranks)
|
self.world_size = len(ranks)
|
||||||
@ -965,7 +970,8 @@ def init_distributed_environment(
|
|||||||
backend=backend,
|
backend=backend,
|
||||||
init_method=distributed_init_method,
|
init_method=distributed_init_method,
|
||||||
world_size=world_size,
|
world_size=world_size,
|
||||||
rank=rank)
|
rank=rank,
|
||||||
|
timeout=envs.VLLM_DISTRIBUTED_INIT_TIMEOUT_SECONDS)
|
||||||
# set the local rank
|
# set the local rank
|
||||||
# local_rank is not available in torch ProcessGroup,
|
# local_rank is not available in torch ProcessGroup,
|
||||||
# see https://github.com/pytorch/pytorch/issues/122816
|
# see https://github.com/pytorch/pytorch/issues/122816
|
||||||
|
@ -140,6 +140,7 @@ if TYPE_CHECKING:
|
|||||||
VLLM_NIXL_ABORT_REQUEST_TIMEOUT: int = 120
|
VLLM_NIXL_ABORT_REQUEST_TIMEOUT: int = 120
|
||||||
VLLM_USE_CUDNN_PREFILL: bool = False
|
VLLM_USE_CUDNN_PREFILL: bool = False
|
||||||
VLLM_LOOPBACK_IP: str = ""
|
VLLM_LOOPBACK_IP: str = ""
|
||||||
|
VLLM_DISTRIBUTED_INIT_TIMEOUT_SECONDS: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
def get_default_cache_root():
|
def get_default_cache_root():
|
||||||
@ -505,6 +506,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
|||||||
"VLLM_IMAGE_FETCH_TIMEOUT":
|
"VLLM_IMAGE_FETCH_TIMEOUT":
|
||||||
lambda: int(os.getenv("VLLM_IMAGE_FETCH_TIMEOUT", "5")),
|
lambda: int(os.getenv("VLLM_IMAGE_FETCH_TIMEOUT", "5")),
|
||||||
|
|
||||||
|
# Timeout for torch distributed calls
|
||||||
|
"VLLM_DISTRIBUTED_INIT_TIMEOUT_SECONDS":
|
||||||
|
lambda: maybe_convert_int(os.getenv("VLLM_IMAGE_FETCH_TIMEOUT", None)),
|
||||||
|
|
||||||
# Timeout for fetching videos when serving multimodal models
|
# Timeout for fetching videos when serving multimodal models
|
||||||
# Default is 30 seconds
|
# Default is 30 seconds
|
||||||
"VLLM_VIDEO_FETCH_TIMEOUT":
|
"VLLM_VIDEO_FETCH_TIMEOUT":
|
||||||
|
Reference in New Issue
Block a user