[misc] Do not allow to use lora with chunked prefill. (#5538)

Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
2025-10-20 14:53:52 +08:00 · 2024-06-15 23:59:36 +09:00
parent 81fbb3655f
commit e691918e3b
1 changed files with 2 additions and 0 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@ -1092,6 +1092,8 @@ class LoRAConfig:
                "Due to limitations of the custom LoRA CUDA kernel, "
                "max_num_batched_tokens must be <= 65528 when "
                "LoRA is enabled.")
+        if scheduler_config.chunked_prefill_enabled:
+            raise ValueError("LoRA is not supported with chunked prefill yet.")


@dataclass