Compare commits

...

1 Commits

Author SHA1 Message Date
ef412c4657 [Frontend] change default log request behavior 2025-07-17 14:51:19 -07:00
5 changed files with 42 additions and 6 deletions

View File

@ -253,7 +253,7 @@ export https_proxy=http://your.proxy.server:port
https_proxy=http://your.proxy.server:port huggingface-cli download <model_name> https_proxy=http://your.proxy.server:port huggingface-cli download <model_name>
# or use vllm cmd directly # or use vllm cmd directly
https_proxy=http://your.proxy.server:port vllm serve <model_name> --disable-log-requests https_proxy=http://your.proxy.server:port vllm serve <model_name>
``` ```
- Set the proxy in Python interpreter: - Set the proxy in Python interpreter:

View File

@ -290,6 +290,22 @@ def test_prefix_cache_default():
assert not engine_args.enable_prefix_caching assert not engine_args.enable_prefix_caching
def test_log_request_default_and_legacy_flag(caplog_vllm):
parser = AsyncEngineArgs.add_cli_args(FlexibleArgumentParser())
args = parser.parse_args([])
caplog_vllm.clear()
engine_args = AsyncEngineArgs.from_cli_args(args=args)
assert engine_args.disable_log_requests
assert "--disable-log-requests" in caplog_vllm.text
caplog_vllm.clear()
args = parser.parse_args(["--enable-legacy-log-requests"])
engine_args = AsyncEngineArgs.from_cli_args(args=args)
assert not engine_args.disable_log_requests
assert caplog_vllm.text == ""
# yapf: disable # yapf: disable
@pytest.mark.parametrize(("arg", "expected", "option"), [ @pytest.mark.parametrize(("arg", "expected", "option"), [
(None, None, "mm-processor-kwargs"), (None, None, "mm-processor-kwargs"),

View File

@ -881,6 +881,17 @@ class EngineArgs:
attrs = [attr.name for attr in dataclasses.fields(cls)] attrs = [attr.name for attr in dataclasses.fields(cls)]
# Set the attributes from the parsed arguments. # Set the attributes from the parsed arguments.
engine_args = cls(**{attr: getattr(args, attr) for attr in attrs}) engine_args = cls(**{attr: getattr(args, attr) for attr in attrs})
if isinstance(engine_args, AsyncEngineArgs):
if getattr(args, 'disable_log_requests', False):
engine_args.disable_log_requests = True
elif getattr(args, 'enable_legacy_log_requests', False):
engine_args.disable_log_requests = False
else:
engine_args.disable_log_requests = True
logger.warning(
"'--disable-log-requests' is now enabled by default. "
"Use --enable-legacy-log-requests to restore the previous "
"request logging behavior.")
return engine_args return engine_args
def create_model_config(self) -> ModelConfig: def create_model_config(self) -> ModelConfig:
@ -1701,7 +1712,8 @@ class EngineArgs:
@dataclass @dataclass
class AsyncEngineArgs(EngineArgs): class AsyncEngineArgs(EngineArgs):
"""Arguments for asynchronous vLLM engine.""" """Arguments for asynchronous vLLM engine."""
disable_log_requests: bool = False disable_log_requests: bool = True
enable_legacy_log_requests: bool = False
@staticmethod @staticmethod
def add_cli_args(parser: FlexibleArgumentParser, def add_cli_args(parser: FlexibleArgumentParser,
@ -1715,6 +1727,9 @@ class AsyncEngineArgs(EngineArgs):
parser.add_argument('--disable-log-requests', parser.add_argument('--disable-log-requests',
action='store_true', action='store_true',
help='Disable logging requests.') help='Disable logging requests.')
parser.add_argument('--enable-legacy-log-requests',
action='store_true',
help='Enable legacy request logging behavior.')
current_platform.pre_register_and_update(parser) current_platform.pre_register_and_update(parser)
return parser return parser

View File

@ -1448,13 +1448,14 @@ async def init_app_state(
vllm_config: VllmConfig, vllm_config: VllmConfig,
state: State, state: State,
args: Namespace, args: Namespace,
engine_args: AsyncEngineArgs,
) -> None: ) -> None:
if args.served_model_name is not None: if args.served_model_name is not None:
served_model_names = args.served_model_name served_model_names = args.served_model_name
else: else:
served_model_names = [args.model] served_model_names = [args.model]
if args.disable_log_requests: if engine_args.disable_log_requests:
request_logger = None request_logger = None
else: else:
request_logger = RequestLogger(max_log_len=args.max_log_len) request_logger = RequestLogger(max_log_len=args.max_log_len)
@ -1704,12 +1705,16 @@ async def run_server_worker(listen_address,
if log_config is not None: if log_config is not None:
uvicorn_kwargs['log_config'] = log_config uvicorn_kwargs['log_config'] = log_config
async with build_async_engine_client(args, client_config) as engine_client: engine_args = AsyncEngineArgs.from_cli_args(args)
async with build_async_engine_client_from_engine_args(
engine_args, args.disable_frontend_multiprocessing,
client_config) as engine_client:
maybe_register_tokenizer_info_endpoint(args) maybe_register_tokenizer_info_endpoint(args)
app = build_app(args) app = build_app(args)
vllm_config = await engine_client.get_vllm_config() vllm_config = await engine_client.get_vllm_config()
await init_app_state(engine_client, vllm_config, app.state, args) await init_app_state(engine_client, vllm_config, app.state, args,
engine_args)
logger.info("Starting vLLM API server %d on %s", server_index, logger.info("Starting vLLM API server %d on %s", server_index,
listen_address) listen_address)

View File

@ -326,7 +326,7 @@ async def main(args):
for name in served_model_names for name in served_model_names
] ]
if args.disable_log_requests: if engine_args.disable_log_requests:
request_logger = None request_logger = None
else: else:
request_logger = RequestLogger(max_log_len=args.max_log_len) request_logger = RequestLogger(max_log_len=args.max_log_len)