[Model]Improve Qwen3VLMoeForConditionalGeneration packed_modules_mapping (#27096)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
2025-10-20 23:03:52 +08:00 · 2025-10-17 19:47:00 +08:00
parent 6c9fdbf725
commit daec4d2624
1 changed files with 13 additions and 0 deletions
--- a/vllm/model_executor/models/qwen3_vl_moe.py
+++ b/vllm/model_executor/models/qwen3_vl_moe.py
@ -350,6 +350,14 @@ class Qwen3MoeLLMForCausalLM(Qwen3MoeForCausalLM):
    dummy_inputs=Qwen3VLDummyInputsBuilder,
 )
 class Qwen3VLMoeForConditionalGeneration(Qwen3VLForConditionalGeneration):
    packed_modules_mapping = {
        "qkv_proj": [
            "q_proj",
            "k_proj",
            "v_proj",
        ],
    }
    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        super(Qwen3VLForConditionalGeneration, self).__init__()
        config: Qwen3VLMoeConfig = vllm_config.model_config.hf_config
@ -376,6 +384,11 @@ class Qwen3VLMoeForConditionalGeneration(Qwen3VLForConditionalGeneration):
        self.language_model = Qwen3MoeLLMForCausalLM(
            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "language_model")
        )
        # Whether to include the gate_up_proj mapping is determined by
        # the language model.
        self.packed_modules_mapping = (
            self.packed_modules_mapping | self.language_model.packed_modules_mapping
        )
        self.make_empty_intermediate_tensors = (
            self.language_model.make_empty_intermediate_tensors