mirror of
https://github.com/vllm-project/vllm-ascend.git
synced 2025-10-20 13:43:53 +08:00
add model_type deepseek_v32 support (#3304)
### What this PR does / why we need it? Add model_type deepseek_v32 support with proper model registration and AutoConfig support ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Image was built and fully tested with various seq length. Co-authored-by: ken <ken.zhang1@huawei.com>
This commit is contained in:
@ -11,13 +11,13 @@ def patch_pangu():
|
||||
if not hasattr(self.hf_text_config, "model_type"):
|
||||
return False
|
||||
elif self.hf_text_config.model_type in \
|
||||
('deepseek_v2', 'deepseek_v3', 'deepseek_mtp', 'pangu_ultra_moe'):
|
||||
('deepseek_v2', 'deepseek_v3', 'deepseek_v32', 'deepseek_mtp', 'pangu_ultra_moe'):
|
||||
return kv_lora_dim is not None
|
||||
elif self.hf_text_config.model_type == 'eagle':
|
||||
# if the model is an EAGLE module, check for the
|
||||
# underlying architecture
|
||||
return self.hf_text_config.model.model_type in \
|
||||
('deepseek_v2', 'deepseek_v3', 'pangu_ultra_moe') \
|
||||
('deepseek_v2', 'deepseek_v3', 'deepseek_v32', 'pangu_ultra_moe') \
|
||||
and kv_lora_dim is not None
|
||||
return False
|
||||
|
||||
@ -60,7 +60,7 @@ def patch_pangu():
|
||||
|
||||
@staticmethod
|
||||
def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig:
|
||||
if hf_config.model_type == "deepseek_v3":
|
||||
if hf_config.model_type in ["deepseek_v3", "deepseek_v32"]:
|
||||
hf_config.model_type = "deepseek_mtp"
|
||||
if hf_config.model_type == "deepseek_mtp":
|
||||
n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
|
||||
@ -105,7 +105,7 @@ def patch_pangu():
|
||||
# mtp acceleration for more models besides deepseek_v3
|
||||
if self.target_model_config and \
|
||||
(self.target_model_config.hf_text_config.model_type \
|
||||
== "deepseek_v3" or
|
||||
in ["deepseek_v3", "deepseek_v32"] or
|
||||
self.target_model_config.hf_text_config.model_type \
|
||||
== "mimo" or
|
||||
self.target_model_config.hf_text_config.model_type \
|
||||
|
@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
|
||||
from transformers import AutoConfig
|
||||
from omni.adaptors.vllm.patches import model_patch
|
||||
from vllm import ModelRegistry
|
||||
import os
|
||||
@ -10,6 +11,10 @@ if os.getenv("PROFILING_NAMELIST", None):
|
||||
print("<<<Profiler patch environmental variable is enabled, applying profiler patches.")
|
||||
from omni.adaptors.vllm.patches.profiler_patches import apply_profiler_patches
|
||||
|
||||
from omni.models.deepseek.deepseek_v32 import DeepseekV32Config
|
||||
|
||||
AutoConfig.register("deepseek_v32", DeepseekV32Config)
|
||||
|
||||
|
||||
def register_model():
|
||||
is_A2 = os.getenv("ASCEND_PLATFORM", "A3")=="A2"
|
||||
@ -27,6 +32,9 @@ def register_model():
|
||||
ModelRegistry.register_model(
|
||||
"DeepseekV3ForCausalLM",
|
||||
"omni.models.deepseek.deepseek_v3:DeepseekV3ForCausalLM")
|
||||
ModelRegistry.register_model(
|
||||
"DeepseekV32ForCausalLM",
|
||||
"omni.models.deepseek.deepseek_v32:DeepseekV32ForCausalLM")
|
||||
ModelRegistry.register_model(
|
||||
"PanguUltraMoEForCausalLM",
|
||||
"omni.models.pangu.pangu_ultra_moe:PanguUltraMoEForCausalLM")
|
||||
|
40
examples/omniinfer/omni/models/deepseek/deepseek_v32.py
Normal file
40
examples/omniinfer/omni/models/deepseek/deepseek_v32.py
Normal file
@ -0,0 +1,40 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
||||
|
||||
# Adapted from
|
||||
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
|
||||
# Copyright 2023 The vLLM team.
|
||||
# Copyright 2023 DeepSeek-AI and the HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
|
||||
# and OPT implementations in this library. It has been modified from its
|
||||
# original forms to accommodate minor architectural differences compared
|
||||
# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from transformers import DeepseekV3Config
|
||||
from vllm.compilation.decorators import support_torch_compile
|
||||
from .deepseek_v3 import DeepseekV3ForCausalLM
|
||||
|
||||
|
||||
class DeepseekV32Config(DeepseekV3Config):
|
||||
model_type = "deepseek_v32"
|
||||
keys_to_ignore_at_inference = ["past_key_values"]
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
|
||||
@support_torch_compile
|
||||
class DeepseekV32ForCausalLM(DeepseekV3ForCausalLM):
|
||||
pass
|
Reference in New Issue
Block a user