Improve flexibility of auto_tune.sh execution. (#23766)

Signed-off-by: Anthony Su <50185138+anthonsu@users.noreply.github.com>
Signed-off-by: anthonsu <50185138+anthonsu@users.noreply.github.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
anthonsu
2025-09-04 02:41:41 -07:00
committed by GitHub
parent 51d5e9be7d
commit 04f3c35cff
2 changed files with 36 additions and 14 deletions

View File

@ -31,6 +31,12 @@ cd vllm
You must set the following variables at the top of the script before execution.
Note: You can also override the default values below via environment variables when running the script.
```bash
MODEL=meta-llama/Llama-3.3-70B-Instruct SYSTEM=TPU TP=8 DOWNLOAD_DIR='' INPUT_LEN=128 OUTPUT_LEN=2048 MAX_MODEL_LEN=2300 MIN_CACHE_HIT_PCT=0 MAX_LATENCY_ALLOWED_MS=100000000000 NUM_SEQS_LIST="128 256" NUM_BATCHED_TOKENS_LIST="1024 2048 4096" VLLM_LOGGING_LEVEL=DEBUG bash auto_tune.sh
```
| Variable | Description | Example Value |
| --- | --- | --- |
| `BASE` | **Required.** The absolute path to the parent directory of your vLLM repository directory. | `"$HOME"` |

View File

@ -5,25 +5,41 @@
TAG=$(date +"%Y_%m_%d_%H_%M")
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
BASE="$SCRIPT_DIR/../../.."
MODEL="meta-llama/Llama-3.1-8B-Instruct"
SYSTEM="TPU"
TP=1
DOWNLOAD_DIR=""
INPUT_LEN=4000
OUTPUT_LEN=16
MAX_MODEL_LEN=4096
MIN_CACHE_HIT_PCT=0
MAX_LATENCY_ALLOWED_MS=100000000000
NUM_SEQS_LIST="128 256"
NUM_BATCHED_TOKENS_LIST="512 1024 2048 4096"
VLLM_LOGGING_LEVEL=${VLLM_LOGGING_LEVEL:-INFO}
BASE=${BASE:-"$SCRIPT_DIR/../../.."}
MODEL=${MODEL:-"meta-llama/Llama-3.1-8B-Instruct"}
SYSTEM=${SYSTEM:-"TPU"}
TP=${TP:-1}
DOWNLOAD_DIR=${DOWNLOAD_DIR:-""}
INPUT_LEN=${INPUT_LEN:-4000}
OUTPUT_LEN=${OUTPUT_LEN:-16}
MAX_MODEL_LEN=${MAX_MODEL_LEN:-4096}
MIN_CACHE_HIT_PCT=${MIN_CACHE_HIT_PCT:-0}
MAX_LATENCY_ALLOWED_MS=${MAX_LATENCY_ALLOWED_MS:-100000000000}
NUM_SEQS_LIST=${NUM_SEQS_LIST:-"128 256"}
NUM_BATCHED_TOKENS_LIST=${NUM_BATCHED_TOKENS_LIST:-"512 1024 2048 4096"}
LOG_FOLDER="$BASE/auto-benchmark/$TAG"
RESULT="$LOG_FOLDER/result.txt"
PROFILE_PATH="$LOG_FOLDER/profile"
echo "result file: $RESULT"
echo "model: $MODEL"
echo "====================== AUTO TUNE PARAMETERS ===================="
echo "SCRIPT_DIR=$SCRIPT_DIR"
echo "BASE=$BASE"
echo "MODEL=$MODEL"
echo "SYSTEM=$SYSTEM"
echo "TP=$TP"
echo "DOWNLOAD_DIR=$DOWNLOAD_DIR"
echo "INPUT_LEN=$INPUT_LEN"
echo "OUTPUT_LEN=$OUTPUT_LEN"
echo "MAX_MODEL_LEN=$MAX_MODEL_LEN"
echo "MIN_CACHE_HIT_PCT=$MIN_CACHE_HIT_PCT"
echo "MAX_LATENCY_ALLOWED_MS=$MAX_LATENCY_ALLOWED_MS"
echo "NUM_SEQS_LIST=$NUM_SEQS_LIST"
echo "NUM_BATCHED_TOKENS_LIST=$NUM_BATCHED_TOKENS_LIST"
echo "VLLM_LOGGING_LEVEL=$VLLM_LOGGING_LEVEL"
echo "RESULT_FILE=$RESULT"
echo "====================== AUTO TUNEPARAMETERS ===================="
rm -rf $LOG_FOLDER
rm -rf $PROFILE_PATH