mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Distributed][Core] Support Py39 and Py38 for PP (#6120)
Signed-off-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai>
This commit is contained in:
committed by
GitHub
parent
1dab9bc8a9
commit
0ed646b7aa
@ -123,12 +123,7 @@ class ExecutorAsyncBase(ExecutorBase):
|
||||
multimodal_config: Optional[MultiModalConfig],
|
||||
speculative_config: Optional[SpeculativeConfig],
|
||||
) -> None:
|
||||
# This locks each pipeline parallel stage so multiple virtual engines
|
||||
# can't execute on the same stage at the same time
|
||||
self.pp_locks = [
|
||||
asyncio.Lock()
|
||||
for _ in range(parallel_config.pipeline_parallel_size)
|
||||
]
|
||||
self.pp_locks: Optional[List[asyncio.Lock]] = None
|
||||
|
||||
super().__init__(model_config, cache_config, parallel_config,
|
||||
scheduler_config, device_config, load_config,
|
||||
|
@ -349,6 +349,15 @@ class RayGPUExecutorAsync(RayGPUExecutor, DistributedGPUExecutorAsync):
|
||||
self,
|
||||
execute_model_req: Optional[ExecuteModelRequest] = None
|
||||
) -> List[SamplerOutput]:
|
||||
if self.pp_locks is None:
|
||||
# This locks each pipeline parallel stage so multiple virtual
|
||||
# engines can't execute on the same stage at the same time
|
||||
# We create the locks here to avoid creating them in the constructor
|
||||
# which uses a different asyncio loop.
|
||||
self.pp_locks = [
|
||||
asyncio.Lock()
|
||||
for _ in range(self.parallel_config.pipeline_parallel_size)
|
||||
]
|
||||
|
||||
async def _run_task_with_lock(task, lock, *args, **kwargs):
|
||||
async with lock:
|
||||
|
Reference in New Issue
Block a user