disable flashinfer warmup

2025-10-20 14:53:52 +08:00 · 2025-10-16 16:49:29 +00:00
parent 8935ca208d
commit 69c9a01538
1 changed files with 3 additions and 3 deletions
--- a/vllm/model_executor/warmup/kernel_warmup.py
+++ b/vllm/model_executor/warmup/kernel_warmup.py
@ -36,9 +36,9 @@ def kernel_warmup(worker: "Worker"):
        max_tokens = worker.scheduler_config.max_num_batched_tokens
        deep_gemm_warmup(model, max_tokens)

-    # FlashInfer autotune for Hopper (SM 9.0) and Blackwell (SM 10.0) GPUs
-    if has_flashinfer() and current_platform.has_device_capability(90):
-        flashinfer_autotune(worker.model_runner)
+    # # FlashInfer autotune for Hopper (SM 9.0) and Blackwell (SM 10.0) GPUs
+    # if has_flashinfer() and current_platform.has_device_capability(90):
+    #     flashinfer_autotune(worker.model_runner)

    # FlashInfer attention warmup
    # Only warmup if the model has FlashInfer attention groups