mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
Compare commits
8 Commits
v0.10.1.1
...
benchmark-
Author | SHA1 | Date | |
---|---|---|---|
a7b809e0f0 | |||
7efc568418 | |||
9ec11b459c | |||
244d5cc749 | |||
816693fd00 | |||
7c16128106 | |||
7bb88b2edc | |||
ae4f3e2aeb |
@ -743,6 +743,8 @@ def main(args: argparse.Namespace):
|
||||
]:
|
||||
if field in result_json:
|
||||
del result_json[field]
|
||||
if field in benchmark_result:
|
||||
del benchmark_result[field]
|
||||
|
||||
# Traffic
|
||||
result_json["request_rate"] = (args.request_rate if args.request_rate
|
||||
@ -762,7 +764,10 @@ def main(args: argparse.Namespace):
|
||||
file_name = args.result_filename
|
||||
if args.result_dir:
|
||||
file_name = os.path.join(args.result_dir, file_name)
|
||||
with open(file_name, "w", encoding='utf-8') as outfile:
|
||||
with open(file_name, mode="a+", encoding='utf-8') as outfile:
|
||||
# Append a newline.
|
||||
if outfile.tell() != 0:
|
||||
outfile.write("\n")
|
||||
json.dump(result_json, outfile)
|
||||
save_to_pytorch_benchmark_format(args, result_json, file_name)
|
||||
|
||||
|
13
benchmarks/convert_to_csv.py
Normal file
13
benchmarks/convert_to_csv.py
Normal file
@ -0,0 +1,13 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
from argparse import ArgumentParser
|
||||
|
||||
import pandas as pd
|
||||
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("--input-path", type=str, required=True)
|
||||
parser.add_argument("--output-path", type=str, required=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parser.parse_args()
|
||||
df = pd.read_json(args.input_path, lines=True)
|
||||
df.to_csv(args.output_path)
|
27
benchmarks/sweep.sh
Normal file
27
benchmarks/sweep.sh
Normal file
@ -0,0 +1,27 @@
|
||||
MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||
REQUEST_RATES=(1 10 15 20)
|
||||
INPUT_LEN=1000
|
||||
OUTPUT_LEN=100
|
||||
TOTAL_SECONDS=120
|
||||
|
||||
for REQUEST_RATE in "${REQUEST_RATES[@]}";
|
||||
do
|
||||
NUM_PROMPTS=$(($TOTAL_SECONDS * $REQUEST_RATE))
|
||||
|
||||
echo ""
|
||||
echo "===== RUNNING $MODEL FOR $NUM_PROMPTS PROMPTS WITH $REQUEST_RATE QPS ====="
|
||||
echo ""
|
||||
|
||||
python3 vllm/benchmarks/benchmark_serving.py \
|
||||
--model $MODEL \
|
||||
--dataset-name random \
|
||||
--random-input-len $INPUT_LEN \
|
||||
--random-output-len $OUTPUT_LEN \
|
||||
--request-rate $REQUEST_RATE \
|
||||
--num-prompts $NUM_PROMPTS \
|
||||
--seed $REQUEST_RATE \
|
||||
--ignore-eos \
|
||||
--result-filename "results.json" \
|
||||
--save-result
|
||||
|
||||
done
|
Reference in New Issue
Block a user