[Minor] Fix benchmark_latency script (#2765)

This commit is contained in:
Woosuk Kwon
2024-02-05 12:45:37 -08:00
committed by GitHub
parent c9b45adeeb
commit 72d3a30c63

View File

@ -37,7 +37,10 @@ def main(args: argparse.Namespace):
max_tokens=args.output_len, max_tokens=args.output_len,
) )
print(sampling_params) print(sampling_params)
dummy_prompt_token_ids = [[0] * args.input_len] * args.batch_size dummy_prompt_token_ids = np.random.randint(10000,
size=(args.batch_size,
args.input_len))
dummy_prompt_token_ids = dummy_prompt_token_ids.tolist()
def run_to_completion(profile_dir: Optional[str] = None): def run_to_completion(profile_dir: Optional[str] = None):
if profile_dir: if profile_dir:
@ -71,7 +74,7 @@ def main(args: argparse.Namespace):
"." "."
) / "vllm_benchmark_result" / f"latency_result_{time.time()}" ) / "vllm_benchmark_result" / f"latency_result_{time.time()}"
print(f"Profiling (results will be saved to '{profile_dir}')...") print(f"Profiling (results will be saved to '{profile_dir}')...")
run_to_completion(profile_dir=args.profile_result_dir) run_to_completion(profile_dir=profile_dir)
return return
# Benchmark. # Benchmark.