mirror of
				https://github.com/vllm-project/vllm.git
				synced 2025-11-04 17:34:34 +08:00 
			
		
		
		
	Compare commits
	
		
			2 Commits
		
	
	
		
			v0.10.2rc3
			...
			tms/distri
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 2f86f710dd | |||
| feeb17303d | 
@ -222,10 +222,15 @@ class GroupCoordinator:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        for ranks in group_ranks:
 | 
					        for ranks in group_ranks:
 | 
				
			||||||
            device_group = torch.distributed.new_group(
 | 
					            device_group = torch.distributed.new_group(
 | 
				
			||||||
                ranks, backend=torch_distributed_backend)
 | 
					                ranks,
 | 
				
			||||||
 | 
					                backend=torch_distributed_backend,
 | 
				
			||||||
 | 
					                timeout=envs.VLLM_DISTRIBUTED_INIT_TIMEOUT_SECONDS)
 | 
				
			||||||
            # a group with `gloo` backend, to allow direct coordination between
 | 
					            # a group with `gloo` backend, to allow direct coordination between
 | 
				
			||||||
            # processes through the CPU.
 | 
					            # processes through the CPU.
 | 
				
			||||||
            cpu_group = torch.distributed.new_group(ranks, backend="gloo")
 | 
					            cpu_group = torch.distributed.new_group(
 | 
				
			||||||
 | 
					                ranks,
 | 
				
			||||||
 | 
					                backend="gloo",
 | 
				
			||||||
 | 
					                timeout=envs.VLLM_DISTRIBUTED_INIT_TIMEOUT_SECONDS)
 | 
				
			||||||
            if self.rank in ranks:
 | 
					            if self.rank in ranks:
 | 
				
			||||||
                self.ranks = ranks
 | 
					                self.ranks = ranks
 | 
				
			||||||
                self.world_size = len(ranks)
 | 
					                self.world_size = len(ranks)
 | 
				
			||||||
@ -965,7 +970,8 @@ def init_distributed_environment(
 | 
				
			|||||||
            backend=backend,
 | 
					            backend=backend,
 | 
				
			||||||
            init_method=distributed_init_method,
 | 
					            init_method=distributed_init_method,
 | 
				
			||||||
            world_size=world_size,
 | 
					            world_size=world_size,
 | 
				
			||||||
            rank=rank)
 | 
					            rank=rank,
 | 
				
			||||||
 | 
					            timeout=envs.VLLM_DISTRIBUTED_INIT_TIMEOUT_SECONDS)
 | 
				
			||||||
    # set the local rank
 | 
					    # set the local rank
 | 
				
			||||||
    # local_rank is not available in torch ProcessGroup,
 | 
					    # local_rank is not available in torch ProcessGroup,
 | 
				
			||||||
    # see https://github.com/pytorch/pytorch/issues/122816
 | 
					    # see https://github.com/pytorch/pytorch/issues/122816
 | 
				
			||||||
 | 
				
			|||||||
@ -140,6 +140,7 @@ if TYPE_CHECKING:
 | 
				
			|||||||
    VLLM_NIXL_ABORT_REQUEST_TIMEOUT: int = 120
 | 
					    VLLM_NIXL_ABORT_REQUEST_TIMEOUT: int = 120
 | 
				
			||||||
    VLLM_USE_CUDNN_PREFILL: bool = False
 | 
					    VLLM_USE_CUDNN_PREFILL: bool = False
 | 
				
			||||||
    VLLM_LOOPBACK_IP: str = ""
 | 
					    VLLM_LOOPBACK_IP: str = ""
 | 
				
			||||||
 | 
					    VLLM_DISTRIBUTED_INIT_TIMEOUT_SECONDS: Optional[int] = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_default_cache_root():
 | 
					def get_default_cache_root():
 | 
				
			||||||
@ -505,6 +506,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
 | 
				
			|||||||
    "VLLM_IMAGE_FETCH_TIMEOUT":
 | 
					    "VLLM_IMAGE_FETCH_TIMEOUT":
 | 
				
			||||||
    lambda: int(os.getenv("VLLM_IMAGE_FETCH_TIMEOUT", "5")),
 | 
					    lambda: int(os.getenv("VLLM_IMAGE_FETCH_TIMEOUT", "5")),
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Timeout for torch distributed calls
 | 
				
			||||||
 | 
					    "VLLM_DISTRIBUTED_INIT_TIMEOUT_SECONDS":
 | 
				
			||||||
 | 
					    lambda: maybe_convert_int(os.getenv("VLLM_IMAGE_FETCH_TIMEOUT", None)),
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Timeout for fetching videos when serving multimodal models
 | 
					    # Timeout for fetching videos when serving multimodal models
 | 
				
			||||||
    # Default is 30 seconds
 | 
					    # Default is 30 seconds
 | 
				
			||||||
    "VLLM_VIDEO_FETCH_TIMEOUT":
 | 
					    "VLLM_VIDEO_FETCH_TIMEOUT":
 | 
				
			||||||
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user