mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
44
vllm/entrypoints/disaggregated/worker.py
Normal file
44
vllm/entrypoints/disaggregated/worker.py
Normal file
@ -0,0 +1,44 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import uvloop
|
||||
|
||||
from vllm.disaggregated.pd_worker import PDWorker
|
||||
from vllm.engine.async_llm_engine import AsyncEngineArgs
|
||||
from vllm.engine.protocol import EngineClient
|
||||
from vllm.entrypoints.openai.api_server import build_async_engine_client
|
||||
from vllm.logger import init_logger
|
||||
from vllm.utils import FlexibleArgumentParser
|
||||
from vllm.version import __version__ as VLLM_VERSION
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
async def run(args, engine: EngineClient):
|
||||
try:
|
||||
worker = PDWorker(engine, args.worker_addr, args.client_addr)
|
||||
await worker.run_busy_loop()
|
||||
finally:
|
||||
worker.shutdown()
|
||||
|
||||
|
||||
async def main(args) -> None:
|
||||
logger.info("vLLM P/D Worker Server %s", VLLM_VERSION)
|
||||
logger.info("Args: %s", args)
|
||||
|
||||
args.disable_frontend_multiprocessing = False
|
||||
async with build_async_engine_client(args) as engine:
|
||||
await run(args, engine)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = FlexibleArgumentParser()
|
||||
parser.add_argument('--client-addr',
|
||||
type=str,
|
||||
required=True,
|
||||
help='The address of the connector.')
|
||||
parser.add_argument('--worker-addr',
|
||||
type=str,
|
||||
required=True,
|
||||
help='The address of the worker.')
|
||||
AsyncEngineArgs.add_cli_args(parser)
|
||||
uvloop.run(main(parser.parse_args()))
|
Reference in New Issue
Block a user