!184 openmind-cli deploy新增支持vllm推理引擎
Merge pull request !184 from 张烨槟/deploy_vllm
This commit is contained in:
@ -19,7 +19,6 @@ from openmind.flow.arguments import initialize_openmind, get_args
|
||||
|
||||
|
||||
def run_deploy(**kwargs):
|
||||
|
||||
# stop Mindie container
|
||||
if len(sys.argv) == 3 and sys.argv[-1] == "stop":
|
||||
DeployMindie.stop_service(remind=True)
|
||||
@ -38,5 +37,14 @@ def run_deploy(**kwargs):
|
||||
DeployMindie(args).deploy()
|
||||
elif args.backend == "lmdeploy":
|
||||
DeployLMDeploy(args).deploy()
|
||||
elif args.backend == "vllm":
|
||||
import torch
|
||||
|
||||
if torch.__version__ >= "2.5.1":
|
||||
from ..flow.deploy.vllm import DeployvLLM
|
||||
|
||||
DeployvLLM(args).deploy()
|
||||
else:
|
||||
raise ImportError(f"Required torch version >= 2.5.1, but found {torch.__version__}")
|
||||
else:
|
||||
raise ValueError("backend only supports mindie and lmdeploy.")
|
||||
raise ValueError("backend only supports mindie, vllm and lmdeploy.")
|
||||
|
@ -752,4 +752,10 @@ def _add_deploy_args(parser):
|
||||
type=int,
|
||||
help="npu world size",
|
||||
)
|
||||
group.add_argument(
|
||||
"--backend_config",
|
||||
default=None,
|
||||
type=str,
|
||||
help="custom parameters for different backends",
|
||||
)
|
||||
return parser
|
||||
|
85
src/openmind/flow/deploy/vllm.py
Normal file
85
src/openmind/flow/deploy/vllm.py
Normal file
@ -0,0 +1,85 @@
|
||||
# Copyright (c) 2024 Huawei Technologies Co., Ltd.
|
||||
#
|
||||
# openMind is licensed under Mulan PSL v2.
|
||||
# You can use this software according to the terms and conditions of the Mulan PSL v2.
|
||||
# You may obtain a copy of Mulan PSL v2 at:
|
||||
#
|
||||
# http://license.coscl.org.cn/MulanPSL2
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
# See the Mulan PSL v2 for more details.
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import json
|
||||
import ast
|
||||
import uvloop
|
||||
|
||||
from vllm.entrypoints.openai.api_server import run_server
|
||||
from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args
|
||||
|
||||
from openmind.utils import logging
|
||||
from openmind.flow.model.loader import get_init_kwargs
|
||||
|
||||
|
||||
logger = logging.get_logger()
|
||||
logging.set_verbosity_info()
|
||||
|
||||
|
||||
class DeployvLLM:
|
||||
def __init__(self, args: argparse.Namespace):
|
||||
if args.backend_config:
|
||||
vllm_args = self._parse_args(args.backend_config)
|
||||
else:
|
||||
vllm_args = {}
|
||||
|
||||
_ = get_init_kwargs()
|
||||
vllm_args["model"] = args.model_name_or_path
|
||||
vllm_args["host"] = getattr(vllm_args, "host", "127.0.0.1")
|
||||
vllm_args["port"] = getattr(args, "port", getattr(vllm_args, "port", 1025))
|
||||
vllm_args["trust_remote_code"] = getattr(
|
||||
args, "trust_remote_code", getattr(vllm_args, "trust_remote_code", True)
|
||||
)
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser = make_arg_parser(parser)
|
||||
self.args = parser.parse_args(args=[], namespace=argparse.Namespace(**vllm_args))
|
||||
|
||||
validate_parsed_serve_args(self.args)
|
||||
|
||||
@staticmethod
|
||||
def _parse_args(backend_config_str: str) -> dict:
|
||||
"""Parse the backend_config string into a dictionary.
|
||||
Args:
|
||||
backend_config_str (str): A string containing comma-separated key-value pairs.
|
||||
Returns:
|
||||
dict: A dictionary of parsed parameters with reasonable data types.
|
||||
"""
|
||||
pattern = re.compile(r"(\w+)=(\{.*\}|[^,]+)")
|
||||
matches = pattern.findall(backend_config_str)
|
||||
|
||||
parsed_config = {}
|
||||
for key, value in matches:
|
||||
key = key.strip()
|
||||
value = value.strip()
|
||||
|
||||
try:
|
||||
parsed_value = ast.literal_eval(value)
|
||||
except (ValueError, SyntaxError):
|
||||
parsed_value = value
|
||||
|
||||
if key == "generation_parameters" and isinstance(parsed_value, str):
|
||||
gen_params = re.sub(r"(\w+):", r'"\1":', parsed_value)
|
||||
try:
|
||||
parsed_value = json.loads(gen_params)
|
||||
except json.JSONDecodeError:
|
||||
parsed_value = parsed_value
|
||||
|
||||
parsed_config[key] = parsed_value
|
||||
|
||||
return parsed_config
|
||||
|
||||
def deploy(self):
|
||||
uvloop.run(run_server(self.args))
|
Reference in New Issue
Block a user