!184 openmind-cli deploy新增支持vllm推理引擎

Merge pull request !184 from 张烨槟/deploy_vllm
This commit is contained in:
张烨槟
2025-05-22 11:02:03 +00:00
committed by i-robot
parent 74847c1f7c
commit 01161e07ac
3 changed files with 101 additions and 2 deletions

View File

@ -19,7 +19,6 @@ from openmind.flow.arguments import initialize_openmind, get_args
def run_deploy(**kwargs):
# stop Mindie container
if len(sys.argv) == 3 and sys.argv[-1] == "stop":
DeployMindie.stop_service(remind=True)
@ -38,5 +37,14 @@ def run_deploy(**kwargs):
DeployMindie(args).deploy()
elif args.backend == "lmdeploy":
DeployLMDeploy(args).deploy()
elif args.backend == "vllm":
import torch
if torch.__version__ >= "2.5.1":
from ..flow.deploy.vllm import DeployvLLM
DeployvLLM(args).deploy()
else:
raise ImportError(f"Required torch version >= 2.5.1, but found {torch.__version__}")
else:
raise ValueError("backend only supports mindie and lmdeploy.")
raise ValueError("backend only supports mindie, vllm and lmdeploy.")

View File

@ -752,4 +752,10 @@ def _add_deploy_args(parser):
type=int,
help="npu world size",
)
group.add_argument(
"--backend_config",
default=None,
type=str,
help="custom parameters for different backends",
)
return parser

View File

@ -0,0 +1,85 @@
# Copyright (c) 2024 Huawei Technologies Co., Ltd.
#
# openMind is licensed under Mulan PSL v2.
# You can use this software according to the terms and conditions of the Mulan PSL v2.
# You may obtain a copy of Mulan PSL v2 at:
#
# http://license.coscl.org.cn/MulanPSL2
#
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
# See the Mulan PSL v2 for more details.
import argparse
import re
import json
import ast
import uvloop
from vllm.entrypoints.openai.api_server import run_server
from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args
from openmind.utils import logging
from openmind.flow.model.loader import get_init_kwargs
logger = logging.get_logger()
logging.set_verbosity_info()
class DeployvLLM:
def __init__(self, args: argparse.Namespace):
if args.backend_config:
vllm_args = self._parse_args(args.backend_config)
else:
vllm_args = {}
_ = get_init_kwargs()
vllm_args["model"] = args.model_name_or_path
vllm_args["host"] = getattr(vllm_args, "host", "127.0.0.1")
vllm_args["port"] = getattr(args, "port", getattr(vllm_args, "port", 1025))
vllm_args["trust_remote_code"] = getattr(
args, "trust_remote_code", getattr(vllm_args, "trust_remote_code", True)
)
parser = argparse.ArgumentParser()
parser = make_arg_parser(parser)
self.args = parser.parse_args(args=[], namespace=argparse.Namespace(**vllm_args))
validate_parsed_serve_args(self.args)
@staticmethod
def _parse_args(backend_config_str: str) -> dict:
"""Parse the backend_config string into a dictionary.
Args:
backend_config_str (str): A string containing comma-separated key-value pairs.
Returns:
dict: A dictionary of parsed parameters with reasonable data types.
"""
pattern = re.compile(r"(\w+)=(\{.*\}|[^,]+)")
matches = pattern.findall(backend_config_str)
parsed_config = {}
for key, value in matches:
key = key.strip()
value = value.strip()
try:
parsed_value = ast.literal_eval(value)
except (ValueError, SyntaxError):
parsed_value = value
if key == "generation_parameters" and isinstance(parsed_value, str):
gen_params = re.sub(r"(\w+):", r'"\1":', parsed_value)
try:
parsed_value = json.loads(gen_params)
except json.JSONDecodeError:
parsed_value = parsed_value
parsed_config[key] = parsed_value
return parsed_config
def deploy(self):
uvloop.run(run_server(self.args))