mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
Compare commits
8 Commits
d31f7844f8
...
benchmark-
Author | SHA1 | Date | |
---|---|---|---|
a7b809e0f0 | |||
7efc568418 | |||
9ec11b459c | |||
244d5cc749 | |||
816693fd00 | |||
7c16128106 | |||
7bb88b2edc | |||
ae4f3e2aeb |
@ -743,6 +743,8 @@ def main(args: argparse.Namespace):
|
|||||||
]:
|
]:
|
||||||
if field in result_json:
|
if field in result_json:
|
||||||
del result_json[field]
|
del result_json[field]
|
||||||
|
if field in benchmark_result:
|
||||||
|
del benchmark_result[field]
|
||||||
|
|
||||||
# Traffic
|
# Traffic
|
||||||
result_json["request_rate"] = (args.request_rate if args.request_rate
|
result_json["request_rate"] = (args.request_rate if args.request_rate
|
||||||
@ -762,7 +764,10 @@ def main(args: argparse.Namespace):
|
|||||||
file_name = args.result_filename
|
file_name = args.result_filename
|
||||||
if args.result_dir:
|
if args.result_dir:
|
||||||
file_name = os.path.join(args.result_dir, file_name)
|
file_name = os.path.join(args.result_dir, file_name)
|
||||||
with open(file_name, "w", encoding='utf-8') as outfile:
|
with open(file_name, mode="a+", encoding='utf-8') as outfile:
|
||||||
|
# Append a newline.
|
||||||
|
if outfile.tell() != 0:
|
||||||
|
outfile.write("\n")
|
||||||
json.dump(result_json, outfile)
|
json.dump(result_json, outfile)
|
||||||
save_to_pytorch_benchmark_format(args, result_json, file_name)
|
save_to_pytorch_benchmark_format(args, result_json, file_name)
|
||||||
|
|
||||||
|
13
benchmarks/convert_to_csv.py
Normal file
13
benchmarks/convert_to_csv.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
parser = ArgumentParser()
|
||||||
|
parser.add_argument("--input-path", type=str, required=True)
|
||||||
|
parser.add_argument("--output-path", type=str, required=True)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
args = parser.parse_args()
|
||||||
|
df = pd.read_json(args.input_path, lines=True)
|
||||||
|
df.to_csv(args.output_path)
|
27
benchmarks/sweep.sh
Normal file
27
benchmarks/sweep.sh
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||||
|
REQUEST_RATES=(1 10 15 20)
|
||||||
|
INPUT_LEN=1000
|
||||||
|
OUTPUT_LEN=100
|
||||||
|
TOTAL_SECONDS=120
|
||||||
|
|
||||||
|
for REQUEST_RATE in "${REQUEST_RATES[@]}";
|
||||||
|
do
|
||||||
|
NUM_PROMPTS=$(($TOTAL_SECONDS * $REQUEST_RATE))
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "===== RUNNING $MODEL FOR $NUM_PROMPTS PROMPTS WITH $REQUEST_RATE QPS ====="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
python3 vllm/benchmarks/benchmark_serving.py \
|
||||||
|
--model $MODEL \
|
||||||
|
--dataset-name random \
|
||||||
|
--random-input-len $INPUT_LEN \
|
||||||
|
--random-output-len $OUTPUT_LEN \
|
||||||
|
--request-rate $REQUEST_RATE \
|
||||||
|
--num-prompts $NUM_PROMPTS \
|
||||||
|
--seed $REQUEST_RATE \
|
||||||
|
--ignore-eos \
|
||||||
|
--result-filename "results.json" \
|
||||||
|
--save-result
|
||||||
|
|
||||||
|
done
|
Reference in New Issue
Block a user