disable flashinfer warmup

This commit is contained in:
Woosuk Kwon
2025-10-16 16:49:29 +00:00
parent 8935ca208d
commit 69c9a01538

View File

@ -36,9 +36,9 @@ def kernel_warmup(worker: "Worker"):
max_tokens = worker.scheduler_config.max_num_batched_tokens
deep_gemm_warmup(model, max_tokens)
# FlashInfer autotune for Hopper (SM 9.0) and Blackwell (SM 10.0) GPUs
if has_flashinfer() and current_platform.has_device_capability(90):
flashinfer_autotune(worker.model_runner)
# # FlashInfer autotune for Hopper (SM 9.0) and Blackwell (SM 10.0) GPUs
# if has_flashinfer() and current_platform.has_device_capability(90):
# flashinfer_autotune(worker.model_runner)
# FlashInfer attention warmup
# Only warmup if the model has FlashInfer attention groups