[CI]add genai-perf benchmark in nightly benchmark (#10704)

Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
This commit is contained in:
Kunshang Ji
2025-01-17 12:15:09 +08:00
committed by GitHub
parent ebc73f2828
commit fead53ba78
4 changed files with 196 additions and 4 deletions

View File

@ -301,6 +301,104 @@ run_serving_tests() {
kill_gpu_processes
}
run_genai_perf_tests() {
# run genai-perf tests
# $1: a json file specifying genai-perf test cases
local genai_perf_test_file
genai_perf_test_file=$1
# Iterate over genai-perf tests
jq -c '.[]' "$genai_perf_test_file" | while read -r params; do
# get the test name, and append the GPU type back to it.
test_name=$(echo "$params" | jq -r '.test_name')
# if TEST_SELECTOR is set, only run the test cases that match the selector
if [[ -n "$TEST_SELECTOR" ]] && [[ ! "$test_name" =~ $TEST_SELECTOR ]]; then
echo "Skip test case $test_name."
continue
fi
# prepend the current serving engine to the test name
test_name=${CURRENT_LLM_SERVING_ENGINE}_${test_name}
# get common parameters
common_params=$(echo "$params" | jq -r '.common_parameters')
model=$(echo "$common_params" | jq -r '.model')
tp=$(echo "$common_params" | jq -r '.tp')
dataset_name=$(echo "$common_params" | jq -r '.dataset_name')
dataset_path=$(echo "$common_params" | jq -r '.dataset_path')
port=$(echo "$common_params" | jq -r '.port')
num_prompts=$(echo "$common_params" | jq -r '.num_prompts')
reuse_server=$(echo "$common_params" | jq -r '.reuse_server')
# get client and server arguments
server_params=$(echo "$params" | jq -r ".${CURRENT_LLM_SERVING_ENGINE}_server_parameters")
qps_list=$(echo "$params" | jq -r '.qps_list')
qps_list=$(echo "$qps_list" | jq -r '.[] | @sh')
echo "Running over qps list $qps_list"
# check if there is enough GPU to run the test
if [[ $gpu_count -lt $tp ]]; then
echo "Required num-shard $tp but only $gpu_count GPU found. Skip testcase $test_name."
continue
fi
if [[ $reuse_server == "true" ]]; then
echo "Reuse previous server for test case $test_name"
else
kill_gpu_processes
bash "$VLLM_SOURCE_CODE_LOC/.buildkite/nightly-benchmarks/scripts/launch-server.sh" \
"$server_params" "$common_params"
fi
if wait_for_server; then
echo ""
echo "$CURRENT_LLM_SERVING_ENGINE server is up and running."
else
echo ""
echo "$CURRENT_LLM_SERVING_ENGINE failed to start within the timeout period."
break
fi
# iterate over different QPS
for qps in $qps_list; do
# remove the surrounding single quote from qps
if [[ "$qps" == *"inf"* ]]; then
echo "qps was $qps"
qps=$num_prompts
echo "now qps is $qps"
fi
new_test_name=$test_name"_qps_"$qps
backend=$CURRENT_LLM_SERVING_ENGINE
if [[ "$backend" == *"vllm"* ]]; then
backend="vllm"
fi
#TODO: add output dir.
client_command="genai-perf profile \
-m $model \
--service-kind openai \
--backend vllm \
--endpoint-type chat \
--streaming \
--url localhost:$port \
--request-rate $qps \
--num-prompts $num_prompts \
"
echo "Client command: $client_command"
eval "$client_command"
#TODO: process/record outputs
done
done
kill_gpu_processes
}
prepare_dataset() {
@ -328,12 +426,17 @@ main() {
pip install -U transformers
pip install -r requirements-dev.txt
which genai-perf
# check storage
df -h
ensure_installed wget
ensure_installed curl
ensure_installed jq
# genai-perf dependency
ensure_installed libb64-0d
prepare_dataset
@ -345,6 +448,10 @@ main() {
# run the test
run_serving_tests "$BENCHMARK_ROOT/tests/nightly-tests.json"
# run genai-perf tests
run_genai_perf_tests "$BENCHMARK_ROOT/tests/genai-perf-tests.json"
mv artifacts/ $RESULTS_FOLDER/
# upload benchmark results to buildkite
python3 -m pip install tabulate pandas
python3 "$BENCHMARK_ROOT/scripts/summary-nightly-results.py"

View File

@ -0,0 +1,23 @@
[
{
"test_name": "llama8B_tp1_genai_perf",
"qps_list": [4,8,16,32],
"common_parameters": {
"model": "meta-llama/Meta-Llama-3-8B-Instruct",
"tp": 1,
"port": 8000,
"num_prompts": 500,
"reuse_server": false
},
"vllm_server_parameters": {
"disable_log_stats": "",
"disable_log_requests": "",
"gpu_memory_utilization": 0.9,
"num_scheduler_steps": 10,
"max_num_seqs": 512,
"dtype": "bfloat16"
},
"genai_perf_input_parameters": {
}
}
]

View File

@ -29,4 +29,7 @@ lm-eval[api]==0.4.4 # required for model evaluation test
bitsandbytes>=0.45.0
buildkite-test-collector==0.1.9
genai_perf==0.0.8
tritonclient==2.51.0
numpy < 2.0.0

View File

@ -37,7 +37,7 @@ audioread==3.0.1
# via librosa
awscli==1.35.23
# via -r requirements-test.in
bitsandbytes>=0.45.0
bitsandbytes==0.45.0
# via -r requirements-test.in
black==24.10.0
# via datamodel-code-generator
@ -75,6 +75,8 @@ colorama==0.4.6
# tqdm-multiprocess
contourpy==1.3.0
# via matplotlib
cramjam==2.9.0
# via fastparquet
cupy-cuda12x==13.3.0
# via ray
cycler==0.12.1
@ -109,6 +111,8 @@ email-validator==2.2.0
# via pydantic
evaluate==0.4.3
# via lm-eval
fastparquet==2024.11.0
# via genai-perf
fastrlock==0.8.2
# via cupy-cuda12x
filelock==3.16.1
@ -130,8 +134,11 @@ fsspec[http]==2024.9.0
# via
# datasets
# evaluate
# fastparquet
# huggingface-hub
# torch
genai-perf==0.0.8
# via -r requirements-test.in
genson==1.3.0
# via datamodel-code-generator
h11==0.14.0
@ -186,6 +193,8 @@ jsonschema==4.23.0
# ray
jsonschema-specifications==2024.10.1
# via jsonschema
kaleido==0.2.1
# via genai-perf
kiwisolver==1.4.7
# via matplotlib
lazy-loader==0.4
@ -200,6 +209,8 @@ lm-eval[api]==0.4.4
# via -r requirements-test.in
lxml==5.3.0
# via sacrebleu
markdown-it-py==3.0.0
# via rich
markupsafe==3.0.2
# via jinja2
matplotlib==3.9.2
@ -209,6 +220,8 @@ mbstrdecoder==1.1.3
# dataproperty
# pytablewriter
# typepy
mdurl==0.1.2
# via markdown-it-py
mistral-common[opencv]==1.5.1
# via
# -r requirements-test.in
@ -249,6 +262,8 @@ numpy==1.26.4
# datasets
# decord
# evaluate
# fastparquet
# genai-perf
# librosa
# matplotlib
# mistral-common
@ -256,15 +271,18 @@ numpy==1.26.4
# numexpr
# opencv-python-headless
# pandas
# patsy
# peft
# rouge-score
# sacrebleu
# scikit-learn
# scipy
# soxr
# statsmodels
# tensorizer
# torchvision
# transformers
# tritonclient
nvidia-cublas-cu12==12.4.5.8
# via
# nvidia-cudnn-cu12
@ -306,30 +324,39 @@ packaging==24.1
# datamodel-code-generator
# datasets
# evaluate
# fastparquet
# huggingface-hub
# lazy-loader
# matplotlib
# peft
# plotly
# pooch
# pytest
# pytest-rerunfailures
# ray
# statsmodels
# transformers
# typepy
pandas==2.2.3
# via
# datasets
# evaluate
# fastparquet
# genai-perf
# statsmodels
pathspec==0.12.1
# via black
pathvalidate==3.2.1
# via pytablewriter
patsy==1.0.1
# via statsmodels
peft==0.13.2
# via
# -r requirements-test.in
# lm-eval
pillow==10.4.0
# via
# genai-perf
# matplotlib
# mistral-common
# sentence-transformers
@ -338,6 +365,8 @@ platformdirs==4.3.6
# via
# black
# pooch
plotly==5.24.1
# via genai-perf
pluggy==1.5.0
# via pytest
pooch==1.8.2
@ -360,7 +389,9 @@ psutil==6.1.0
py==1.11.0
# via pytest-forked
pyarrow==18.0.0
# via datasets
# via
# datasets
# genai-perf
pyasn1==0.6.1
# via rsa
pybind11==2.13.6
@ -373,6 +404,8 @@ pydantic[email]==2.9.2
# mistral-common
pydantic-core==2.23.4
# via pydantic
pygments==2.18.0
# via rich
pyparsing==3.2.0
# via matplotlib
pytablewriter==1.2.0
@ -381,14 +414,18 @@ pytest==8.3.3
# via
# -r requirements-test.in
# buildkite-test-collector
# genai-perf
# pytest-asyncio
# pytest-forked
# pytest-mock
# pytest-rerunfailures
# pytest-shard
pytest-asyncio==0.24.0
# via -r requirements-test.in
pytest-forked==1.6.0
# via -r requirements-test.in
pytest-mock==3.14.0
# via genai-perf
pytest-rerunfailures==14.0
# via -r requirements-test.in
pytest-shard==0.1.2
@ -399,6 +436,8 @@ python-dateutil==2.9.0.post0
# matplotlib
# pandas
# typepy
python-rapidjson==1.20
# via tritonclient
pytz==2024.2
# via
# pandas
@ -409,9 +448,11 @@ pyyaml==6.0.2
# awscli
# datamodel-code-generator
# datasets
# genai-perf
# huggingface-hub
# peft
# ray
# responses
# timm
# transformers
ray[adag]==2.40.0
@ -438,8 +479,13 @@ requests==2.32.3
# mistral-common
# pooch
# ray
# responses
# tiktoken
# transformers
responses==0.25.3
# via genai-perf
rich==13.9.4
# via genai-perf
rouge-score==0.1.2
# via lm-eval
rpds-py==0.20.1
@ -470,6 +516,7 @@ scipy==1.13.1
# librosa
# scikit-learn
# sentence-transformers
# statsmodels
sentence-transformers==3.2.1
# via -r requirements-test.in
sentencepiece==0.2.0
@ -490,6 +537,8 @@ soxr==0.5.0.post1
# via librosa
sqlitedict==2.1.0
# via lm-eval
statsmodels==0.14.4
# via genai-perf
sympy==1.13.1
# via torch
tabledata==1.3.3
@ -499,7 +548,9 @@ tabulate==0.9.0
tcolorpy==0.1.6
# via pytablewriter
tenacity==9.0.0
# via lm-eval
# via
# lm-eval
# plotly
tensorizer==2.9.0
# via -r requirements-test.in
threadpoolctl==3.5.0
@ -540,6 +591,7 @@ tqdm-multiprocess==0.0.11
# via lm-eval
transformers==4.47.0
# via
# genai-perf
# lm-eval
# peft
# sentence-transformers
@ -548,6 +600,10 @@ transformers-stream-generator==0.0.5
# via -r requirements-test.in
triton==3.1.0
# via torch
tritonclient==2.51.0
# via
# -r requirements-test.in
# genai-perf
typepy[datetime]==1.3.2
# via
# dataproperty
@ -555,6 +611,7 @@ typepy[datetime]==1.3.2
# tabledata
typing-extensions==4.12.2
# via
# bitsandbytes
# huggingface-hub
# librosa
# mistral-common
@ -563,10 +620,12 @@ typing-extensions==4.12.2
# torch
tzdata==2024.2
# via pandas
urllib3==1.26.20
urllib3==2.2.3
# via
# botocore
# requests
# responses
# tritonclient
word2number==1.1
# via lm-eval
xxhash==3.5.0