mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[V1] Simplify Shutdown (#11659)
This commit is contained in:
@ -142,9 +142,6 @@ def test_engine_core_client(monkeypatch, multiprocessing_mode: bool):
|
||||
|
||||
client.abort_requests([request.request_id])
|
||||
|
||||
# Shutdown the client.
|
||||
client.shutdown()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_engine_core_client_asyncio(monkeypatch):
|
||||
@ -200,6 +197,3 @@ async def test_engine_core_client_asyncio(monkeypatch):
|
||||
else:
|
||||
assert len(outputs[req_id]) == MAX_TOKENS, (
|
||||
f"{len(outputs[req_id])=}, {MAX_TOKENS=}")
|
||||
|
||||
# Shutdown the client.
|
||||
client.shutdown()
|
||||
|
@ -232,11 +232,6 @@ class LLM:
|
||||
|
||||
self.request_counter = Counter()
|
||||
|
||||
def __del__(self):
|
||||
if hasattr(self, 'llm_engine') and self.llm_engine and hasattr(
|
||||
self.llm_engine, "shutdown"):
|
||||
self.llm_engine.shutdown()
|
||||
|
||||
@staticmethod
|
||||
def get_engine_class() -> Type[LLMEngine]:
|
||||
if envs.VLLM_USE_V1:
|
||||
|
@ -103,9 +103,6 @@ class AsyncLLM(EngineClient):
|
||||
|
||||
self.output_handler: Optional[asyncio.Task] = None
|
||||
|
||||
def __del__(self):
|
||||
self.shutdown()
|
||||
|
||||
@classmethod
|
||||
def from_engine_args(
|
||||
cls,
|
||||
|
@ -203,7 +203,6 @@ class EngineCoreProc(EngineCore):
|
||||
finally:
|
||||
if engine_core is not None:
|
||||
engine_core.shutdown()
|
||||
engine_core = None
|
||||
|
||||
def run_busy_loop(self):
|
||||
"""Core busy loop of the EngineCore."""
|
||||
|
@ -1,4 +1,6 @@
|
||||
from typing import List, Optional, Type
|
||||
import weakref
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Type
|
||||
|
||||
import msgspec
|
||||
import zmq
|
||||
@ -18,7 +20,7 @@ from vllm.v1.utils import BackgroundProcHandle
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
class EngineCoreClient:
|
||||
class EngineCoreClient(ABC):
|
||||
"""
|
||||
EngineCoreClient: subclasses handle different methods for pushing
|
||||
and pulling from the EngineCore for asyncio / multiprocessing.
|
||||
@ -52,8 +54,9 @@ class EngineCoreClient:
|
||||
|
||||
return InprocClient(vllm_config, executor_class, log_stats)
|
||||
|
||||
@abstractmethod
|
||||
def shutdown(self):
|
||||
pass
|
||||
...
|
||||
|
||||
def get_output(self) -> List[EngineCoreOutput]:
|
||||
raise NotImplementedError
|
||||
@ -107,9 +110,6 @@ class InprocClient(EngineCoreClient):
|
||||
def shutdown(self):
|
||||
self.engine_core.shutdown()
|
||||
|
||||
def __del__(self):
|
||||
self.shutdown()
|
||||
|
||||
def profile(self, is_start: bool = True) -> None:
|
||||
self.engine_core.profile(is_start)
|
||||
|
||||
@ -139,10 +139,14 @@ class MPClient(EngineCoreClient):
|
||||
self.decoder = msgspec.msgpack.Decoder(EngineCoreOutputs)
|
||||
|
||||
# ZMQ setup.
|
||||
if asyncio_mode:
|
||||
self.ctx = zmq.asyncio.Context()
|
||||
else:
|
||||
self.ctx = zmq.Context() # type: ignore[attr-defined]
|
||||
self.ctx = (
|
||||
zmq.asyncio.Context() # type: ignore[attr-defined]
|
||||
if asyncio_mode else zmq.Context()) # type: ignore[attr-defined]
|
||||
|
||||
# Note(rob): shutdown function cannot be a bound method,
|
||||
# else the gc cannot collect the object.
|
||||
self._finalizer = weakref.finalize(self, lambda x: x.destroy(linger=0),
|
||||
self.ctx)
|
||||
|
||||
# Paths and sockets for IPC.
|
||||
output_path = get_open_zmq_ipc_path()
|
||||
@ -153,7 +157,6 @@ class MPClient(EngineCoreClient):
|
||||
zmq.constants.PUSH)
|
||||
|
||||
# Start EngineCore in background process.
|
||||
self.proc_handle: Optional[BackgroundProcHandle]
|
||||
self.proc_handle = BackgroundProcHandle(
|
||||
input_path=input_path,
|
||||
output_path=output_path,
|
||||
@ -166,12 +169,11 @@ class MPClient(EngineCoreClient):
|
||||
})
|
||||
|
||||
def shutdown(self):
|
||||
# Shut down the zmq context.
|
||||
self.ctx.destroy(linger=0)
|
||||
|
||||
if hasattr(self, "proc_handle") and self.proc_handle:
|
||||
"""Clean up background resources."""
|
||||
if hasattr(self, "proc_handle"):
|
||||
self.proc_handle.shutdown()
|
||||
self.proc_handle = None
|
||||
|
||||
self._finalizer()
|
||||
|
||||
|
||||
class SyncMPClient(MPClient):
|
||||
|
@ -205,10 +205,3 @@ class LLMEngine:
|
||||
f"found type: {type(tokenizer_group)}")
|
||||
|
||||
return tokenizer_group
|
||||
|
||||
def __del__(self):
|
||||
self.shutdown()
|
||||
|
||||
def shutdown(self):
|
||||
if engine_core := getattr(self, "engine_core", None):
|
||||
engine_core.shutdown()
|
||||
|
@ -1,3 +1,4 @@
|
||||
import multiprocessing
|
||||
import os
|
||||
import weakref
|
||||
from collections.abc import Sequence
|
||||
@ -91,8 +92,6 @@ class BackgroundProcHandle:
|
||||
target_fn: Callable,
|
||||
process_kwargs: Dict[Any, Any],
|
||||
):
|
||||
self._finalizer = weakref.finalize(self, self.shutdown)
|
||||
|
||||
context = get_mp_context()
|
||||
reader, writer = context.Pipe(duplex=False)
|
||||
|
||||
@ -102,11 +101,11 @@ class BackgroundProcHandle:
|
||||
process_kwargs["ready_pipe"] = writer
|
||||
process_kwargs["input_path"] = input_path
|
||||
process_kwargs["output_path"] = output_path
|
||||
self.input_path = input_path
|
||||
self.output_path = output_path
|
||||
|
||||
# Run Detokenizer busy loop in background process.
|
||||
# Run busy loop in background process.
|
||||
self.proc = context.Process(target=target_fn, kwargs=process_kwargs)
|
||||
self._finalizer = weakref.finalize(self, shutdown, self.proc,
|
||||
input_path, output_path)
|
||||
self.proc.start()
|
||||
|
||||
# Wait for startup.
|
||||
@ -114,21 +113,24 @@ class BackgroundProcHandle:
|
||||
raise RuntimeError(f"{process_name} initialization failed. "
|
||||
"See root cause above.")
|
||||
|
||||
def __del__(self):
|
||||
self.shutdown()
|
||||
|
||||
def shutdown(self):
|
||||
# Shutdown the process if needed.
|
||||
if hasattr(self, "proc") and self.proc.is_alive():
|
||||
self.proc.terminate()
|
||||
self.proc.join(5)
|
||||
self._finalizer()
|
||||
|
||||
if self.proc.is_alive():
|
||||
kill_process_tree(self.proc.pid)
|
||||
|
||||
# Remove zmq ipc socket files
|
||||
ipc_sockets = [self.output_path, self.input_path]
|
||||
for ipc_socket in ipc_sockets:
|
||||
socket_file = ipc_socket.replace("ipc://", "")
|
||||
if os and os.path.exists(socket_file):
|
||||
os.remove(socket_file)
|
||||
# Note(rob): shutdown function cannot be a bound method,
|
||||
# else the gc cannot collect the object.
|
||||
def shutdown(proc: multiprocessing.Process, input_path: str, output_path: str):
|
||||
# Shutdown the process.
|
||||
if proc.is_alive():
|
||||
proc.terminate()
|
||||
proc.join(5)
|
||||
|
||||
if proc.is_alive():
|
||||
kill_process_tree(proc.pid)
|
||||
|
||||
# Remove zmq ipc socket files.
|
||||
ipc_sockets = [output_path, input_path]
|
||||
for ipc_socket in ipc_sockets:
|
||||
socket_file = ipc_socket.replace("ipc://", "")
|
||||
if os and os.path.exists(socket_file):
|
||||
os.remove(socket_file)
|
||||
|
Reference in New Issue
Block a user