mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Bugfix] Disable prefix caching by default for benchmark (#18771)
Signed-off-by: cascade812 <cascade812@outlook.com>
This commit is contained in:
@ -82,7 +82,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
|
||||
parser = EngineArgs.add_cli_args(parser)
|
||||
# V1 enables prefix caching by default which skews the latency
|
||||
# numbers. We need to disable prefix caching by default.
|
||||
parser.set_defaults(enable_prefix_caching=True)
|
||||
parser.set_defaults(enable_prefix_caching=False)
|
||||
|
||||
|
||||
def main(args: argparse.Namespace):
|
||||
|
Reference in New Issue
Block a user