disable flashinfer warmup

This commit is contained in:
Woosuk Kwon
2025-10-16 16:49:29 +00:00
parent 8935ca208d
commit 69c9a01538

View File

@ -36,9 +36,9 @@ def kernel_warmup(worker: "Worker"):
max_tokens = worker.scheduler_config.max_num_batched_tokens max_tokens = worker.scheduler_config.max_num_batched_tokens
deep_gemm_warmup(model, max_tokens) deep_gemm_warmup(model, max_tokens)
# FlashInfer autotune for Hopper (SM 9.0) and Blackwell (SM 10.0) GPUs # # FlashInfer autotune for Hopper (SM 9.0) and Blackwell (SM 10.0) GPUs
if has_flashinfer() and current_platform.has_device_capability(90): # if has_flashinfer() and current_platform.has_device_capability(90):
flashinfer_autotune(worker.model_runner) # flashinfer_autotune(worker.model_runner)
# FlashInfer attention warmup # FlashInfer attention warmup
# Only warmup if the model has FlashInfer attention groups # Only warmup if the model has FlashInfer attention groups