add model_type deepseek_v32 support (#3304)

### What this PR does / why we need it? Add model_type deepseek_v32 support with proper model registration and AutoConfig support ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Image was built and fully tested with various seq length. Co-authored-by: ken <ken.zhang1@huawei.com>
2025-10-20 13:43:53 +08:00 · 2025-09-30 21:53:37 +08:00
parent 9dd8621212
commit bba4c82ce8
3 changed files with 52 additions and 4 deletions
--- a/examples/omniinfer/omni/adaptors/vllm/patches/pangu_patch.py
+++ b/examples/omniinfer/omni/adaptors/vllm/patches/pangu_patch.py
@ -11,13 +11,13 @@ def patch_pangu():
        if not hasattr(self.hf_text_config, "model_type"):
            return False
        elif self.hf_text_config.model_type in \
-            ('deepseek_v2', 'deepseek_v3', 'deepseek_mtp', 'pangu_ultra_moe'):
+            ('deepseek_v2', 'deepseek_v3', 'deepseek_v32', 'deepseek_mtp', 'pangu_ultra_moe'):
            return kv_lora_dim is not None
        elif self.hf_text_config.model_type == 'eagle':
            # if the model is an EAGLE module, check for the
            # underlying architecture
            return self.hf_text_config.model.model_type in \
-                    ('deepseek_v2', 'deepseek_v3', 'pangu_ultra_moe') \
+                    ('deepseek_v2', 'deepseek_v3', 'deepseek_v32', 'pangu_ultra_moe') \
                and kv_lora_dim is not None
        return False

@ -60,7 +60,7 @@ def patch_pangu():

    @staticmethod
    def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig:
-        if hf_config.model_type == "deepseek_v3":
+        if hf_config.model_type in ["deepseek_v3", "deepseek_v32"]:
            hf_config.model_type = "deepseek_mtp"
        if hf_config.model_type == "deepseek_mtp":
            n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
@ -105,7 +105,7 @@ def patch_pangu():
            # mtp acceleration for more models besides deepseek_v3
            if self.target_model_config and \
                (self.target_model_config.hf_text_config.model_type \
-                        == "deepseek_v3" or
+                        in ["deepseek_v3", "deepseek_v32"] or
                    self.target_model_config.hf_text_config.model_type \
                        == "mimo" or
                    self.target_model_config.hf_text_config.model_type \
--- a/examples/omniinfer/omni/models/init.py
+++ b/examples/omniinfer/omni/models/init.py
@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.

+from transformers import AutoConfig
 from omni.adaptors.vllm.patches import model_patch 
 from vllm import ModelRegistry
 import os
@ -10,6 +11,10 @@ if os.getenv("PROFILING_NAMELIST", None):
    print("<<<Profiler patch environmental variable is enabled, applying profiler patches.")
    from omni.adaptors.vllm.patches.profiler_patches import apply_profiler_patches

+from omni.models.deepseek.deepseek_v32 import DeepseekV32Config
+
+AutoConfig.register("deepseek_v32", DeepseekV32Config)
+

 def register_model():
    is_A2 = os.getenv("ASCEND_PLATFORM", "A3")=="A2"
@ -27,6 +32,9 @@ def register_model():
        ModelRegistry.register_model(
            "DeepseekV3ForCausalLM",
            "omni.models.deepseek.deepseek_v3:DeepseekV3ForCausalLM")
+        ModelRegistry.register_model(
+            "DeepseekV32ForCausalLM",
+            "omni.models.deepseek.deepseek_v32:DeepseekV32ForCausalLM")
        ModelRegistry.register_model(
            "PanguUltraMoEForCausalLM",
            "omni.models.pangu.pangu_ultra_moe:PanguUltraMoEForCausalLM")
--- a/examples/omniinfer/omni/models/deepseek/deepseek_v32.py
+++ b/examples/omniinfer/omni/models/deepseek/deepseek_v32.py
@ -0,0 +1,40 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+
+# Adapted from
+# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
+# Copyright 2023 The vLLM team.
+# Copyright 2023 DeepSeek-AI and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from transformers import DeepseekV3Config
+from vllm.compilation.decorators import support_torch_compile
+from .deepseek_v3 import DeepseekV3ForCausalLM
+
+
+class DeepseekV32Config(DeepseekV3Config):
+    model_type = "deepseek_v32"
+    keys_to_ignore_at_inference = ["past_key_values"]
+    
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+
+@support_torch_compile
+class DeepseekV32ForCausalLM(DeepseekV3ForCausalLM):
+    pass