mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
Compare commits
1 Commits
ci/build/2
...
7snzwi-cod
Author | SHA1 | Date | |
---|---|---|---|
ef412c4657 |
@ -253,7 +253,7 @@ export https_proxy=http://your.proxy.server:port
|
|||||||
https_proxy=http://your.proxy.server:port huggingface-cli download <model_name>
|
https_proxy=http://your.proxy.server:port huggingface-cli download <model_name>
|
||||||
|
|
||||||
# or use vllm cmd directly
|
# or use vllm cmd directly
|
||||||
https_proxy=http://your.proxy.server:port vllm serve <model_name> --disable-log-requests
|
https_proxy=http://your.proxy.server:port vllm serve <model_name>
|
||||||
```
|
```
|
||||||
|
|
||||||
- Set the proxy in Python interpreter:
|
- Set the proxy in Python interpreter:
|
||||||
|
@ -290,6 +290,22 @@ def test_prefix_cache_default():
|
|||||||
assert not engine_args.enable_prefix_caching
|
assert not engine_args.enable_prefix_caching
|
||||||
|
|
||||||
|
|
||||||
|
def test_log_request_default_and_legacy_flag(caplog_vllm):
|
||||||
|
parser = AsyncEngineArgs.add_cli_args(FlexibleArgumentParser())
|
||||||
|
|
||||||
|
args = parser.parse_args([])
|
||||||
|
caplog_vllm.clear()
|
||||||
|
engine_args = AsyncEngineArgs.from_cli_args(args=args)
|
||||||
|
assert engine_args.disable_log_requests
|
||||||
|
assert "--disable-log-requests" in caplog_vllm.text
|
||||||
|
|
||||||
|
caplog_vllm.clear()
|
||||||
|
args = parser.parse_args(["--enable-legacy-log-requests"])
|
||||||
|
engine_args = AsyncEngineArgs.from_cli_args(args=args)
|
||||||
|
assert not engine_args.disable_log_requests
|
||||||
|
assert caplog_vllm.text == ""
|
||||||
|
|
||||||
|
|
||||||
# yapf: disable
|
# yapf: disable
|
||||||
@pytest.mark.parametrize(("arg", "expected", "option"), [
|
@pytest.mark.parametrize(("arg", "expected", "option"), [
|
||||||
(None, None, "mm-processor-kwargs"),
|
(None, None, "mm-processor-kwargs"),
|
||||||
|
@ -881,6 +881,17 @@ class EngineArgs:
|
|||||||
attrs = [attr.name for attr in dataclasses.fields(cls)]
|
attrs = [attr.name for attr in dataclasses.fields(cls)]
|
||||||
# Set the attributes from the parsed arguments.
|
# Set the attributes from the parsed arguments.
|
||||||
engine_args = cls(**{attr: getattr(args, attr) for attr in attrs})
|
engine_args = cls(**{attr: getattr(args, attr) for attr in attrs})
|
||||||
|
if isinstance(engine_args, AsyncEngineArgs):
|
||||||
|
if getattr(args, 'disable_log_requests', False):
|
||||||
|
engine_args.disable_log_requests = True
|
||||||
|
elif getattr(args, 'enable_legacy_log_requests', False):
|
||||||
|
engine_args.disable_log_requests = False
|
||||||
|
else:
|
||||||
|
engine_args.disable_log_requests = True
|
||||||
|
logger.warning(
|
||||||
|
"'--disable-log-requests' is now enabled by default. "
|
||||||
|
"Use --enable-legacy-log-requests to restore the previous "
|
||||||
|
"request logging behavior.")
|
||||||
return engine_args
|
return engine_args
|
||||||
|
|
||||||
def create_model_config(self) -> ModelConfig:
|
def create_model_config(self) -> ModelConfig:
|
||||||
@ -1701,7 +1712,8 @@ class EngineArgs:
|
|||||||
@dataclass
|
@dataclass
|
||||||
class AsyncEngineArgs(EngineArgs):
|
class AsyncEngineArgs(EngineArgs):
|
||||||
"""Arguments for asynchronous vLLM engine."""
|
"""Arguments for asynchronous vLLM engine."""
|
||||||
disable_log_requests: bool = False
|
disable_log_requests: bool = True
|
||||||
|
enable_legacy_log_requests: bool = False
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def add_cli_args(parser: FlexibleArgumentParser,
|
def add_cli_args(parser: FlexibleArgumentParser,
|
||||||
@ -1715,6 +1727,9 @@ class AsyncEngineArgs(EngineArgs):
|
|||||||
parser.add_argument('--disable-log-requests',
|
parser.add_argument('--disable-log-requests',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help='Disable logging requests.')
|
help='Disable logging requests.')
|
||||||
|
parser.add_argument('--enable-legacy-log-requests',
|
||||||
|
action='store_true',
|
||||||
|
help='Enable legacy request logging behavior.')
|
||||||
current_platform.pre_register_and_update(parser)
|
current_platform.pre_register_and_update(parser)
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
@ -1448,13 +1448,14 @@ async def init_app_state(
|
|||||||
vllm_config: VllmConfig,
|
vllm_config: VllmConfig,
|
||||||
state: State,
|
state: State,
|
||||||
args: Namespace,
|
args: Namespace,
|
||||||
|
engine_args: AsyncEngineArgs,
|
||||||
) -> None:
|
) -> None:
|
||||||
if args.served_model_name is not None:
|
if args.served_model_name is not None:
|
||||||
served_model_names = args.served_model_name
|
served_model_names = args.served_model_name
|
||||||
else:
|
else:
|
||||||
served_model_names = [args.model]
|
served_model_names = [args.model]
|
||||||
|
|
||||||
if args.disable_log_requests:
|
if engine_args.disable_log_requests:
|
||||||
request_logger = None
|
request_logger = None
|
||||||
else:
|
else:
|
||||||
request_logger = RequestLogger(max_log_len=args.max_log_len)
|
request_logger = RequestLogger(max_log_len=args.max_log_len)
|
||||||
@ -1704,12 +1705,16 @@ async def run_server_worker(listen_address,
|
|||||||
if log_config is not None:
|
if log_config is not None:
|
||||||
uvicorn_kwargs['log_config'] = log_config
|
uvicorn_kwargs['log_config'] = log_config
|
||||||
|
|
||||||
async with build_async_engine_client(args, client_config) as engine_client:
|
engine_args = AsyncEngineArgs.from_cli_args(args)
|
||||||
|
async with build_async_engine_client_from_engine_args(
|
||||||
|
engine_args, args.disable_frontend_multiprocessing,
|
||||||
|
client_config) as engine_client:
|
||||||
maybe_register_tokenizer_info_endpoint(args)
|
maybe_register_tokenizer_info_endpoint(args)
|
||||||
app = build_app(args)
|
app = build_app(args)
|
||||||
|
|
||||||
vllm_config = await engine_client.get_vllm_config()
|
vllm_config = await engine_client.get_vllm_config()
|
||||||
await init_app_state(engine_client, vllm_config, app.state, args)
|
await init_app_state(engine_client, vllm_config, app.state, args,
|
||||||
|
engine_args)
|
||||||
|
|
||||||
logger.info("Starting vLLM API server %d on %s", server_index,
|
logger.info("Starting vLLM API server %d on %s", server_index,
|
||||||
listen_address)
|
listen_address)
|
||||||
|
@ -326,7 +326,7 @@ async def main(args):
|
|||||||
for name in served_model_names
|
for name in served_model_names
|
||||||
]
|
]
|
||||||
|
|
||||||
if args.disable_log_requests:
|
if engine_args.disable_log_requests:
|
||||||
request_logger = None
|
request_logger = None
|
||||||
else:
|
else:
|
||||||
request_logger = RequestLogger(max_log_len=args.max_log_len)
|
request_logger = RequestLogger(max_log_len=args.max_log_len)
|
||||||
|
Reference in New Issue
Block a user