mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Benchmarks] Refactor run_structured_output_benchmarks.sh (#17722)
Signed-off-by: Russell Bryant <rbryant@redhat.com>
This commit is contained in:
@ -1,32 +1,98 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Define the model to use
|
||||
MODEL=${1:-"Qwen/Qwen2.5-7B-Instruct"}
|
||||
|
||||
# Define the backend to use
|
||||
BACKEND=${2:-"vllm"}
|
||||
|
||||
# Define the dataset to use
|
||||
DATASET=${3:-"xgrammar_bench"}
|
||||
|
||||
# default values
|
||||
MODEL=${MODEL:-"Qwen/Qwen2.5-7B-Instruct"}
|
||||
BACKEND=${BACKEND:-"vllm"}
|
||||
DATASET=${DATASET:-"xgrammar_bench"}
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
OUTPUT_DIR=${4:-"$SCRIPT_DIR/structured_output_benchmark_results"}
|
||||
OUTPUT_DIR=${OUTPUT_DIR:-"$SCRIPT_DIR/structured_output_benchmark_results"}
|
||||
PORT=${PORT:-8000}
|
||||
STRUCTURED_OUTPUT_RATIO=${STRUCTURED_OUTPUT_RATIO:-1}
|
||||
TOTAL_SECONDS=${TOTAL_SECONDS:-90}
|
||||
MAX_NEW_TOKENS=${MAX_NEW_TOKENS:-300}
|
||||
TOKENIZER_MODE=${TOKENIZER_MODE:-"auto"}
|
||||
|
||||
GUIDED_RATIO=${5:-0.5}
|
||||
usage() {
|
||||
echo "Usage: $0 [options]"
|
||||
echo "Options:"
|
||||
echo " --model MODEL Model to benchmark (default: $MODEL)"
|
||||
echo " --backend BACKEND Backend to use (default: $BACKEND)"
|
||||
echo " --dataset DATASET Dataset to use (default: $DATASET)"
|
||||
echo " --max-new-tokens N Maximum number of tokens to generate (default: $MAX_NEW_TOKENS)"
|
||||
echo " --output-dir DIR Output directory for results (default: $OUTPUT_DIR)"
|
||||
echo " --port PORT Port to use (default: $PORT)"
|
||||
echo " --structured-output-ratio N Ratio of structured outputs (default: $STRUCTURED_OUTPUT_RATIO)"
|
||||
echo " --tokenizer-mode MODE Tokenizer mode to use (default: $TOKENIZER_MODE)"
|
||||
echo " --total-seconds N Total seconds to run the benchmark (default: $TOTAL_SECONDS)"
|
||||
echo " -h, --help Show this help message and exit"
|
||||
exit 0
|
||||
}
|
||||
|
||||
# parse command line arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--model)
|
||||
MODEL="$2"
|
||||
shift 2
|
||||
;;
|
||||
--backend)
|
||||
BACKEND="$2"
|
||||
shift 2
|
||||
;;
|
||||
--dataset)
|
||||
DATASET="$2"
|
||||
shift 2
|
||||
;;
|
||||
--max-new-tokens)
|
||||
MAX_NEW_TOKENS="$2"
|
||||
shift 2
|
||||
;;
|
||||
--output-dir)
|
||||
OUTPUT_DIR="$2"
|
||||
shift 2
|
||||
;;
|
||||
--port)
|
||||
PORT="$2"
|
||||
shift 2
|
||||
;;
|
||||
--structured-output-ratio)
|
||||
STRUCTURED_OUTPUT_RATIO="$2"
|
||||
shift 2
|
||||
;;
|
||||
--tokenizer-mode)
|
||||
TOKENIZER_MODE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--total-seconds)
|
||||
TOTAL_SECONDS="$2"
|
||||
shift 2
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
echo "Unknown argument: $1\n"
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Create output directory if it doesn't exist
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
|
||||
# Define QPS values to test
|
||||
QPS_VALUES=(70 60 50 25 20 15 10)
|
||||
QPS_VALUES=(25 20 15 10 5 1)
|
||||
|
||||
# Common parameters
|
||||
COMMON_PARAMS="--backend $BACKEND \
|
||||
--model $MODEL \
|
||||
--dataset $DATASET \
|
||||
--structured-output-ratio $GUIDED_RATIO \
|
||||
--structured-output-ratio $STRUCTURED_OUTPUT_RATIO \
|
||||
--save-results \
|
||||
--result-dir $OUTPUT_DIR"
|
||||
--result-dir $OUTPUT_DIR \
|
||||
--output-len $MAX_NEW_TOKENS \
|
||||
--port $PORT \
|
||||
--tokenizer-mode $TOKENIZER_MODE"
|
||||
|
||||
echo "Starting structured output benchmark with model: $MODEL"
|
||||
echo "Backend: $BACKEND"
|
||||
@ -45,12 +111,15 @@ for qps in "${QPS_VALUES[@]}"; do
|
||||
# Construct filename for this run
|
||||
FILENAME="${BACKEND}_${qps}qps_$(basename $MODEL)_${DATASET}_${GIT_HASH}.json"
|
||||
|
||||
NUM_PROMPTS=$(echo "$TOTAL_SECONDS * $qps" | bc)
|
||||
NUM_PROMPTS=${NUM_PROMPTS%.*} # Remove fractional part
|
||||
echo "Running benchmark with $NUM_PROMPTS prompts"
|
||||
|
||||
# Run the benchmark
|
||||
python "$SCRIPT_DIR/benchmark_serving_structured_output.py" $COMMON_PARAMS \
|
||||
--request-rate $qps \
|
||||
--result-filename "$FILENAME" \
|
||||
--tokenizer-mode ${TOKENIZER_MODE:-"auto"} \
|
||||
--port ${PORT:-8000}
|
||||
--num-prompts $NUM_PROMPTS
|
||||
|
||||
echo "Completed benchmark with QPS: $qps"
|
||||
echo "----------------------------------------"
|
||||
|
Reference in New Issue
Block a user