mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
Compare commits
1 Commits
v0.11.1rc1
...
7snzwi-cod
Author | SHA1 | Date | |
---|---|---|---|
ef412c4657 |
@ -253,7 +253,7 @@ export https_proxy=http://your.proxy.server:port
|
||||
https_proxy=http://your.proxy.server:port huggingface-cli download <model_name>
|
||||
|
||||
# or use vllm cmd directly
|
||||
https_proxy=http://your.proxy.server:port vllm serve <model_name> --disable-log-requests
|
||||
https_proxy=http://your.proxy.server:port vllm serve <model_name>
|
||||
```
|
||||
|
||||
- Set the proxy in Python interpreter:
|
||||
|
@ -290,6 +290,22 @@ def test_prefix_cache_default():
|
||||
assert not engine_args.enable_prefix_caching
|
||||
|
||||
|
||||
def test_log_request_default_and_legacy_flag(caplog_vllm):
|
||||
parser = AsyncEngineArgs.add_cli_args(FlexibleArgumentParser())
|
||||
|
||||
args = parser.parse_args([])
|
||||
caplog_vllm.clear()
|
||||
engine_args = AsyncEngineArgs.from_cli_args(args=args)
|
||||
assert engine_args.disable_log_requests
|
||||
assert "--disable-log-requests" in caplog_vllm.text
|
||||
|
||||
caplog_vllm.clear()
|
||||
args = parser.parse_args(["--enable-legacy-log-requests"])
|
||||
engine_args = AsyncEngineArgs.from_cli_args(args=args)
|
||||
assert not engine_args.disable_log_requests
|
||||
assert caplog_vllm.text == ""
|
||||
|
||||
|
||||
# yapf: disable
|
||||
@pytest.mark.parametrize(("arg", "expected", "option"), [
|
||||
(None, None, "mm-processor-kwargs"),
|
||||
|
@ -881,6 +881,17 @@ class EngineArgs:
|
||||
attrs = [attr.name for attr in dataclasses.fields(cls)]
|
||||
# Set the attributes from the parsed arguments.
|
||||
engine_args = cls(**{attr: getattr(args, attr) for attr in attrs})
|
||||
if isinstance(engine_args, AsyncEngineArgs):
|
||||
if getattr(args, 'disable_log_requests', False):
|
||||
engine_args.disable_log_requests = True
|
||||
elif getattr(args, 'enable_legacy_log_requests', False):
|
||||
engine_args.disable_log_requests = False
|
||||
else:
|
||||
engine_args.disable_log_requests = True
|
||||
logger.warning(
|
||||
"'--disable-log-requests' is now enabled by default. "
|
||||
"Use --enable-legacy-log-requests to restore the previous "
|
||||
"request logging behavior.")
|
||||
return engine_args
|
||||
|
||||
def create_model_config(self) -> ModelConfig:
|
||||
@ -1701,7 +1712,8 @@ class EngineArgs:
|
||||
@dataclass
|
||||
class AsyncEngineArgs(EngineArgs):
|
||||
"""Arguments for asynchronous vLLM engine."""
|
||||
disable_log_requests: bool = False
|
||||
disable_log_requests: bool = True
|
||||
enable_legacy_log_requests: bool = False
|
||||
|
||||
@staticmethod
|
||||
def add_cli_args(parser: FlexibleArgumentParser,
|
||||
@ -1715,6 +1727,9 @@ class AsyncEngineArgs(EngineArgs):
|
||||
parser.add_argument('--disable-log-requests',
|
||||
action='store_true',
|
||||
help='Disable logging requests.')
|
||||
parser.add_argument('--enable-legacy-log-requests',
|
||||
action='store_true',
|
||||
help='Enable legacy request logging behavior.')
|
||||
current_platform.pre_register_and_update(parser)
|
||||
return parser
|
||||
|
||||
|
@ -1448,13 +1448,14 @@ async def init_app_state(
|
||||
vllm_config: VllmConfig,
|
||||
state: State,
|
||||
args: Namespace,
|
||||
engine_args: AsyncEngineArgs,
|
||||
) -> None:
|
||||
if args.served_model_name is not None:
|
||||
served_model_names = args.served_model_name
|
||||
else:
|
||||
served_model_names = [args.model]
|
||||
|
||||
if args.disable_log_requests:
|
||||
if engine_args.disable_log_requests:
|
||||
request_logger = None
|
||||
else:
|
||||
request_logger = RequestLogger(max_log_len=args.max_log_len)
|
||||
@ -1704,12 +1705,16 @@ async def run_server_worker(listen_address,
|
||||
if log_config is not None:
|
||||
uvicorn_kwargs['log_config'] = log_config
|
||||
|
||||
async with build_async_engine_client(args, client_config) as engine_client:
|
||||
engine_args = AsyncEngineArgs.from_cli_args(args)
|
||||
async with build_async_engine_client_from_engine_args(
|
||||
engine_args, args.disable_frontend_multiprocessing,
|
||||
client_config) as engine_client:
|
||||
maybe_register_tokenizer_info_endpoint(args)
|
||||
app = build_app(args)
|
||||
|
||||
vllm_config = await engine_client.get_vllm_config()
|
||||
await init_app_state(engine_client, vllm_config, app.state, args)
|
||||
await init_app_state(engine_client, vllm_config, app.state, args,
|
||||
engine_args)
|
||||
|
||||
logger.info("Starting vLLM API server %d on %s", server_index,
|
||||
listen_address)
|
||||
|
@ -326,7 +326,7 @@ async def main(args):
|
||||
for name in served_model_names
|
||||
]
|
||||
|
||||
if args.disable_log_requests:
|
||||
if engine_args.disable_log_requests:
|
||||
request_logger = None
|
||||
else:
|
||||
request_logger = RequestLogger(max_log_len=args.max_log_len)
|
||||
|
Reference in New Issue
Block a user