add model_type deepseek_v32 support (#3304)

### What this PR does / why we need it?
Add model_type deepseek_v32 support with proper model registration and
AutoConfig support

### Does this PR introduce _any_ user-facing change?
No.


### How was this patch tested?
Image was built and fully tested with various seq length.

Co-authored-by: ken <ken.zhang1@huawei.com>
This commit is contained in:
kkrazy
2025-09-30 21:53:37 +08:00
committed by GitHub
parent 9dd8621212
commit bba4c82ce8
3 changed files with 52 additions and 4 deletions

View File

@ -11,13 +11,13 @@ def patch_pangu():
if not hasattr(self.hf_text_config, "model_type"):
return False
elif self.hf_text_config.model_type in \
('deepseek_v2', 'deepseek_v3', 'deepseek_mtp', 'pangu_ultra_moe'):
('deepseek_v2', 'deepseek_v3', 'deepseek_v32', 'deepseek_mtp', 'pangu_ultra_moe'):
return kv_lora_dim is not None
elif self.hf_text_config.model_type == 'eagle':
# if the model is an EAGLE module, check for the
# underlying architecture
return self.hf_text_config.model.model_type in \
('deepseek_v2', 'deepseek_v3', 'pangu_ultra_moe') \
('deepseek_v2', 'deepseek_v3', 'deepseek_v32', 'pangu_ultra_moe') \
and kv_lora_dim is not None
return False
@ -60,7 +60,7 @@ def patch_pangu():
@staticmethod
def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig:
if hf_config.model_type == "deepseek_v3":
if hf_config.model_type in ["deepseek_v3", "deepseek_v32"]:
hf_config.model_type = "deepseek_mtp"
if hf_config.model_type == "deepseek_mtp":
n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
@ -105,7 +105,7 @@ def patch_pangu():
# mtp acceleration for more models besides deepseek_v3
if self.target_model_config and \
(self.target_model_config.hf_text_config.model_type \
== "deepseek_v3" or
in ["deepseek_v3", "deepseek_v32"] or
self.target_model_config.hf_text_config.model_type \
== "mimo" or
self.target_model_config.hf_text_config.model_type \

View File

@ -1,6 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
from transformers import AutoConfig
from omni.adaptors.vllm.patches import model_patch
from vllm import ModelRegistry
import os
@ -10,6 +11,10 @@ if os.getenv("PROFILING_NAMELIST", None):
print("<<<Profiler patch environmental variable is enabled, applying profiler patches.")
from omni.adaptors.vllm.patches.profiler_patches import apply_profiler_patches
from omni.models.deepseek.deepseek_v32 import DeepseekV32Config
AutoConfig.register("deepseek_v32", DeepseekV32Config)
def register_model():
is_A2 = os.getenv("ASCEND_PLATFORM", "A3")=="A2"
@ -27,6 +32,9 @@ def register_model():
ModelRegistry.register_model(
"DeepseekV3ForCausalLM",
"omni.models.deepseek.deepseek_v3:DeepseekV3ForCausalLM")
ModelRegistry.register_model(
"DeepseekV32ForCausalLM",
"omni.models.deepseek.deepseek_v32:DeepseekV32ForCausalLM")
ModelRegistry.register_model(
"PanguUltraMoEForCausalLM",
"omni.models.pangu.pangu_ultra_moe:PanguUltraMoEForCausalLM")

View File

@ -0,0 +1,40 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
# Adapted from
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
# Copyright 2023 The vLLM team.
# Copyright 2023 DeepSeek-AI and the HuggingFace Inc. team. All rights reserved.
#
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
# and OPT implementations in this library. It has been modified from its
# original forms to accommodate minor architectural differences compared
# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from transformers import DeepseekV3Config
from vllm.compilation.decorators import support_torch_compile
from .deepseek_v3 import DeepseekV3ForCausalLM
class DeepseekV32Config(DeepseekV3Config):
model_type = "deepseek_v32"
keys_to_ignore_at_inference = ["past_key_values"]
def __init__(self, **kwargs):
super().__init__(**kwargs)
@support_torch_compile
class DeepseekV32ForCausalLM(DeepseekV3ForCausalLM):
pass