mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
[Model] Enable encoder DP for MiniCPM-V (#23948)
Signed-off-by: zjy0516 <riverclouds.zhu@qq.com> Signed-off-by: Jiangyun Zhu <riverclouds.zhu@qq.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
@ -175,7 +175,7 @@ Regardless, you need to set `mm_encoder_tp_mode="data"` in engine arguments to u
|
||||
Known supported models:
|
||||
|
||||
- Llama4 (<gh-pr:18368>)
|
||||
- MiniCPM-V-4 (<gh-pr:23327>)
|
||||
- MiniCPM-V-2.5 or above (<gh-pr:23327>, <gh-pr:23948>)
|
||||
- Qwen2.5-VL (<gh-pr:22742>)
|
||||
- Step3 (<gh-pr:22697>)
|
||||
|
||||
|
@ -977,6 +977,8 @@ class MiniCPMVBaseModel(nn.Module, SupportsMultiModal, SupportsPP):
|
||||
instantiated.
|
||||
"""
|
||||
|
||||
supports_encoder_tp_data = True
|
||||
|
||||
@classmethod
|
||||
def get_placeholder_str(cls, modality: str, i: int) -> Optional[str]:
|
||||
if modality.startswith("image"):
|
||||
@ -990,6 +992,7 @@ class MiniCPMVBaseModel(nn.Module, SupportsMultiModal, SupportsPP):
|
||||
config = vllm_config.model_config.hf_config
|
||||
multimodal_config = vllm_config.model_config.multimodal_config
|
||||
quant_config = vllm_config.quant_config
|
||||
self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data"
|
||||
super().__init__()
|
||||
# All MiniCPM-V models disable `tie_word_embeddings` but
|
||||
# `PretrainedConfig.tie_word_embeddings` defaults to True; we cannot
|
||||
@ -1237,6 +1240,8 @@ class MiniCPMVBaseModel(nn.Module, SupportsMultiModal, SupportsPP):
|
||||
|
||||
class MiniCPMV2_0(MiniCPMVBaseModel):
|
||||
|
||||
supports_encoder_tp_data = False
|
||||
|
||||
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
||||
super().__init__(vllm_config=vllm_config, prefix=prefix)
|
||||
assert self.version == (2, 0)
|
||||
@ -1351,9 +1356,12 @@ class MiniCPMV2_5(MiniCPMVBaseModel, SupportsLoRA):
|
||||
quant_config: Optional[QuantizationConfig],
|
||||
prefix: str = "",
|
||||
) -> nn.Module:
|
||||
model = Idefics2VisionTransformer(config.vision_config,
|
||||
quant_config=quant_config,
|
||||
prefix=prefix)
|
||||
model = Idefics2VisionTransformer(
|
||||
config.vision_config,
|
||||
quant_config=quant_config,
|
||||
prefix=prefix,
|
||||
use_data_parallel=self.use_data_parallel,
|
||||
)
|
||||
if self.config.drop_vision_last_layer:
|
||||
model.encoder.layers = model.encoder.layers[:-1]
|
||||
return model
|
||||
@ -1441,9 +1449,12 @@ class MiniCPMV2_6(MiniCPMVBaseModel, SupportsLoRA):
|
||||
quant_config: Optional[QuantizationConfig] = None,
|
||||
prefix: str = "",
|
||||
) -> nn.Module:
|
||||
model = Idefics2VisionTransformer(config.vision_config,
|
||||
quant_config=quant_config,
|
||||
prefix=prefix)
|
||||
model = Idefics2VisionTransformer(
|
||||
config.vision_config,
|
||||
quant_config=quant_config,
|
||||
prefix=prefix,
|
||||
use_data_parallel=self.use_data_parallel,
|
||||
)
|
||||
if self.config.drop_vision_last_layer:
|
||||
model.encoder.layers = model.encoder.layers[:-1]
|
||||
return model
|
||||
@ -1521,8 +1532,6 @@ class MiniCPMV4_0(MiniCPMVBaseModel, SupportsLoRA):
|
||||
],
|
||||
}
|
||||
|
||||
supports_encoder_tp_data = True
|
||||
|
||||
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
||||
super().__init__(vllm_config=vllm_config, prefix=prefix)
|
||||
assert self.version == (4, 0)
|
||||
@ -1546,9 +1555,12 @@ class MiniCPMV4_0(MiniCPMVBaseModel, SupportsLoRA):
|
||||
prefix: str = "",
|
||||
) -> nn.Module:
|
||||
quant_config = self._maybe_ignore_quant_config(quant_config)
|
||||
model = Idefics2VisionTransformer(config.vision_config,
|
||||
quant_config=quant_config,
|
||||
prefix=prefix)
|
||||
model = Idefics2VisionTransformer(
|
||||
config.vision_config,
|
||||
quant_config=quant_config,
|
||||
prefix=prefix,
|
||||
use_data_parallel=self.use_data_parallel,
|
||||
)
|
||||
if self.config.drop_vision_last_layer:
|
||||
model.encoder.layers = model.encoder.layers[:-1]
|
||||
return model
|
||||
@ -1652,9 +1664,12 @@ class MiniCPMV4_5(MiniCPMVBaseModel, SupportsLoRA):
|
||||
prefix: str = "",
|
||||
) -> nn.Module:
|
||||
quant_config = self._maybe_ignore_quant_config(quant_config)
|
||||
model = Idefics2VisionTransformer(config.vision_config,
|
||||
quant_config=quant_config,
|
||||
prefix=prefix)
|
||||
model = Idefics2VisionTransformer(
|
||||
config.vision_config,
|
||||
quant_config=quant_config,
|
||||
prefix=prefix,
|
||||
use_data_parallel=self.use_data_parallel,
|
||||
)
|
||||
if self.config.drop_vision_last_layer:
|
||||
model.encoder.layers = model.encoder.layers[:-1]
|
||||
return model
|
||||
|
Reference in New Issue
Block a user