From 4b3bd4f39777aaa793dc5877301e9aaeb938b4a5 Mon Sep 17 00:00:00 2001 From: Wang Kunpeng <1289706727@qq.com> Date: Sun, 19 Oct 2025 11:00:55 +0800 Subject: [PATCH] [main][bugfix] bugfix for minicpm models (#3527) ### What this PR does / why we need it? bugfix for minicpm-2b and minicpm3-4b - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 Signed-off-by: Wang Kunpeng <1289706727@qq.com> --- .github/workflows/vllm_ascend_test.yaml | 1 - vllm_ascend/patch/worker/patch_common/__init__.py | 4 +--- vllm_ascend/worker/model_runner_v1.py | 2 ++ 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index 60f04d316..4aceed58d 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -121,7 +121,6 @@ jobs: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib pytest -sv --cov --cov-report=xml:unittests-coverage.xml tests/ut \ --ignore=tests/ut/test_platform.py \ - --ignore=tests/ut/patch/worker/patch_common/test_patch_minicpm.py \ --ignore=tests/ut/core/test_scheduler.py \ --ignore=tests/ut/kv_connector/test_llmdatadist_connector.py \ --ignore=tests/ut/kv_connector/test_mooncake_connector.py \ diff --git a/vllm_ascend/patch/worker/patch_common/__init__.py b/vllm_ascend/patch/worker/patch_common/__init__.py index 2e215b84a..bed7e92e4 100644 --- a/vllm_ascend/patch/worker/patch_common/__init__.py +++ b/vllm_ascend/patch/worker/patch_common/__init__.py @@ -26,6 +26,4 @@ import vllm_ascend.patch.worker.patch_common.patch_logits # noqa import vllm_ascend.patch.worker.patch_common.patch_roberta # noqa import vllm_ascend.patch.worker.patch_common.patch_weight_loader # noqa import vllm_ascend.patch.worker.patch_common.patch_multimodal_merge # noqa - -# TODO: revert me when triton import is fixed -# import vllm_ascend.patch.worker.patch_common.patch_minicpm # noqa +import vllm_ascend.patch.worker.patch_common.patch_minicpm # noqa diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index 946c8ee34..06c52bfd0 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -1346,6 +1346,8 @@ class NPUModelRunner(LoRAModelRunnerMixin): positions_cpu = self.positions_cpu[:num_input_tokens] positions = self.positions[:num_input_tokens] seq_lens_cpu = self.seq_lens_cpu[:num_reqs] + attn_state = self._build_attn_state(num_reqs, num_scheduled_tokens, + num_valid_tokens) self.attn_mask = self._make_attention_mask(seq_lens=seq_lens_cpu, position=positions_cpu, attn_state=attn_state)