[CI Failure] fix models/language/pooling/test_auto_prefix_cache_support.py (#24636)

Signed-off-by: wang.yuqi <noooop@126.com>
2025-10-20 23:03:52 +08:00 · 2025-09-11 18:31:23 +08:00
parent a1213fae5f
commit 25bb9e8c65
1 changed files with 4 additions and 0 deletions
--- a/vllm/config/init.py
+++ b/vllm/config/init.py
@ -3558,6 +3558,10 @@ class VllmConfig:
                    disable_chunked_prefill_reasons.append(
                        "Only \"last\" pooling supports chunked "
                        "prefill and prefix caching; disabling both.")
+                if not getattr(self.model_config.hf_config, "is_causal", True):
+                    disable_chunked_prefill_reasons.append(
+                        "Only models using causal attention supports chunked "
+                        "prefill and prefix caching; disabling both.")
            elif self.model_config.is_encoder_decoder:
                self.scheduler_config.max_num_encoder_input_tokens = \
                    MULTIMODAL_REGISTRY.get_encdec_max_encoder_len(self.model_config)