mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
Fix V1 engine serialization error with Ray distributed executor (#26148)
Signed-off-by: Nikhil Ghosh <nikhil@anyscale.com>
This commit is contained in:
@ -16,6 +16,7 @@ from vllm.logger import init_logger
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.sequence import ExecuteModelRequest, IntermediateTensors
|
||||
from vllm.utils import get_ip
|
||||
from vllm.v1.outputs import AsyncModelRunnerOutput
|
||||
from vllm.v1.worker.worker_base import WorkerWrapperBase
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@ -142,6 +143,11 @@ try:
|
||||
# but may still be finished requests.
|
||||
assert not output or not output.req_ids
|
||||
output = scheduler_output, None
|
||||
# Ensure outputs crossing Ray compiled DAG are serializable.
|
||||
# AsyncModelRunnerOutput holds CUDA events and cannot be
|
||||
# pickled.
|
||||
if isinstance(output, AsyncModelRunnerOutput):
|
||||
output = output.get_output()
|
||||
return output
|
||||
|
||||
def override_env_vars(self, vars: Dict[str, str]):
|
||||
|
Reference in New Issue
Block a user