From 25bb9e8c65424e3bf24d2eab259743f9a97b7a3c Mon Sep 17 00:00:00 2001 From: "wang.yuqi" Date: Thu, 11 Sep 2025 18:31:23 +0800 Subject: [PATCH] [CI Failure] fix models/language/pooling/test_auto_prefix_cache_support.py (#24636) Signed-off-by: wang.yuqi --- vllm/config/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 587cfab355..2ead8f5a37 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -3558,6 +3558,10 @@ class VllmConfig: disable_chunked_prefill_reasons.append( "Only \"last\" pooling supports chunked " "prefill and prefix caching; disabling both.") + if not getattr(self.model_config.hf_config, "is_causal", True): + disable_chunked_prefill_reasons.append( + "Only models using causal attention supports chunked " + "prefill and prefix caching; disabling both.") elif self.model_config.is_encoder_decoder: self.scheduler_config.max_num_encoder_input_tokens = \ MULTIMODAL_REGISTRY.get_encdec_max_encoder_len(self.model_config)