[Model]Improve Qwen3VLMoeForConditionalGeneration packed_modules_mapping (#27096)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Jee Jee Li
2025-10-17 19:47:00 +08:00
committed by GitHub
parent 6c9fdbf725
commit daec4d2624

View File

@ -350,6 +350,14 @@ class Qwen3MoeLLMForCausalLM(Qwen3MoeForCausalLM):
dummy_inputs=Qwen3VLDummyInputsBuilder, dummy_inputs=Qwen3VLDummyInputsBuilder,
) )
class Qwen3VLMoeForConditionalGeneration(Qwen3VLForConditionalGeneration): class Qwen3VLMoeForConditionalGeneration(Qwen3VLForConditionalGeneration):
packed_modules_mapping = {
"qkv_proj": [
"q_proj",
"k_proj",
"v_proj",
],
}
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super(Qwen3VLForConditionalGeneration, self).__init__() super(Qwen3VLForConditionalGeneration, self).__init__()
config: Qwen3VLMoeConfig = vllm_config.model_config.hf_config config: Qwen3VLMoeConfig = vllm_config.model_config.hf_config
@ -376,6 +384,11 @@ class Qwen3VLMoeForConditionalGeneration(Qwen3VLForConditionalGeneration):
self.language_model = Qwen3MoeLLMForCausalLM( self.language_model = Qwen3MoeLLMForCausalLM(
vllm_config=vllm_config, prefix=maybe_prefix(prefix, "language_model") vllm_config=vllm_config, prefix=maybe_prefix(prefix, "language_model")
) )
# Whether to include the gate_up_proj mapping is determined by
# the language model.
self.packed_modules_mapping = (
self.packed_modules_mapping | self.language_model.packed_modules_mapping
)
self.make_empty_intermediate_tensors = ( self.make_empty_intermediate_tensors = (
self.language_model.make_empty_intermediate_tensors self.language_model.make_empty_intermediate_tensors