Signed-off-by: Tyler Michael Smith <tysmith@redhat.com>
This commit is contained in:
Tyler Michael Smith
2025-07-11 14:42:44 +00:00
parent 5e53c89a74
commit ab153be252
2 changed files with 12 additions and 1 deletions

View File

@ -338,6 +338,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
"CUDA_VISIBLE_DEVICES":
lambda: os.environ.get("CUDA_VISIBLE_DEVICES", None),
# used to control the visible devices in the distributed setting
"VLLM_VISIBLE_DEVICES":
lambda: os.environ.get("VLLM_VISIBLE_DEVICES", None),
# timeout for each iteration in the engine
"VLLM_ENGINE_ITERATION_TIMEOUT_S":
lambda: int(os.environ.get("VLLM_ENGINE_ITERATION_TIMEOUT_S", "60")),

View File

@ -135,7 +135,14 @@ class Worker(WorkerBase):
# This env var set by Ray causes exceptions with graph building.
os.environ.pop("NCCL_ASYNC_ERROR_HANDLING", None)
self.device = torch.device(f"cuda:{self.local_rank}")
device_id = self.local_rank
if envs.VLLM_VISIBLE_DEVICES is not None:
devices = [
int(dev) for dev in (x.strip() for x in envs.VLLM_VISIBLE_DEVICES.split(','))
]
device_id = devices[self.local_rank]
self.device = torch.device(f"cuda:{device_id}")
current_platform.set_device(self.device)
_check_if_gpu_supports_dtype(self.model_config.dtype)