From a43e2f61e1ae7984a87113f85304b87b74953dd3 Mon Sep 17 00:00:00 2001
From: wangxiyuan <wangxiyuan1007@gmail.com>
Date: Thu, 9 Oct 2025 10:41:19 +0800
Subject: [PATCH] [CI] Update vLLM to v0.11.0 (#3315)

### What this PR does / why we need it?
There are 3 step to upgrade vllm-ascend to newest vllm. We'll create 3
PR

- [x] Upgrade vllm to v0.11.0 to make CI happy first .
- [ ] Move deepseek v3.2 to vllm way
- [ ] Then we'll add a new PR to add vllm main support.

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.11.0

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
---
 .github/workflows/accuracy_test.yaml                     | 2 +-
 .github/workflows/format_pr_body.yaml                    | 2 +-
 .github/workflows/nightly_benchmarks.yaml                | 2 +-
 .github/workflows/vllm_ascend_dist.yaml                  | 2 +-
 .github/workflows/vllm_ascend_test.yaml                  | 6 +++---
 .github/workflows/vllm_ascend_test_310p.yaml             | 2 +-
 .github/workflows/vllm_ascend_test_full.yaml             | 2 +-
 .github/workflows/vllm_ascend_test_full_vllm_0.11.0.yaml | 2 +-
 Dockerfile                                               | 2 +-
 Dockerfile.310p                                          | 2 +-
 Dockerfile.310p.openEuler                                | 2 +-
 Dockerfile.a3                                            | 2 +-
 Dockerfile.a3.openEuler                                  | 2 +-
 Dockerfile.openEuler                                     | 2 +-
 vllm_ascend/models/deepseek_v2.py                        | 5 ++++-
 15 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/accuracy_test.yaml b/.github/workflows/accuracy_test.yaml
index c1b0e3776..4fbeb9157 100644
--- a/.github/workflows/accuracy_test.yaml
+++ b/.github/workflows/accuracy_test.yaml
@@ -112,7 +112,7 @@ jobs:
         uses: actions/checkout@v4
         with:
           repository: vllm-project/vllm
-          ref: v0.11.0rc3
+          ref: v0.11.0
           path: ./vllm-empty
 
       - name: Install vllm-project/vllm from source
diff --git a/.github/workflows/format_pr_body.yaml b/.github/workflows/format_pr_body.yaml
index 24c087d61..2faed788c 100644
--- a/.github/workflows/format_pr_body.yaml
+++ b/.github/workflows/format_pr_body.yaml
@@ -36,7 +36,7 @@ jobs:
 
       - name: Get vLLM version
         run: |
-          VLLM_COMMIT=releases/v0.11.0
+          VLLM_COMMIT=v0.11.0
           echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
 
       - name: Checkout repository
diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml
index 21543493f..4dff9b68b 100644
--- a/.github/workflows/nightly_benchmarks.yaml
+++ b/.github/workflows/nightly_benchmarks.yaml
@@ -51,7 +51,7 @@ jobs:
     strategy:
       matrix:
         include:
-          - vllm_branch: v0.11.0rc3
+          - vllm_branch: v0.11.0
             vllm_ascend_branch: main
             vllm_use_v1: 1
       max-parallel: 1
diff --git a/.github/workflows/vllm_ascend_dist.yaml b/.github/workflows/vllm_ascend_dist.yaml
index 2a7531849..f5aa1432d 100644
--- a/.github/workflows/vllm_ascend_dist.yaml
+++ b/.github/workflows/vllm_ascend_dist.yaml
@@ -43,7 +43,7 @@ jobs:
     strategy:
       matrix:
         os: [linux-aarch64-a3-8]
-        vllm_version: [v0.11.0rc3]
+        vllm_version: [v0.11.0]
     name: vLLM Ascend test
     runs-on: ${{ matrix.os }}
     container:
diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml
index 50527d194..048cd4966 100644
--- a/.github/workflows/vllm_ascend_test.yaml
+++ b/.github/workflows/vllm_ascend_test.yaml
@@ -42,7 +42,7 @@ jobs:
   lint:
     uses: ./.github/workflows/pre-commit.yml
     with:
-      vllm: releases/v0.11.0
+      vllm: v0.11.0
 
   changes:
     runs-on: ubuntu-latest
@@ -83,7 +83,7 @@ jobs:
         VLLM_USE_MODELSCOPE: True
     strategy:
       matrix:
-        vllm_version: [releases/v0.11.0, v0.11.0rc3]
+        vllm_version: [v0.11.0]
     steps:
       - name: Install packages
         run: |
@@ -145,7 +145,7 @@ jobs:
     name: e2e-light
     strategy:
       matrix:
-        vllm_version: [releases/v0.11.0, v0.11.0rc3]
+        vllm_version: [v0.11.0]
     # Note (yikun): If CI resource are limited we can split job into two chain jobs
     needs: [lint, changes]
     # only trigger e2e test after lint passed and the change is e2e related with pull request.
diff --git a/.github/workflows/vllm_ascend_test_310p.yaml b/.github/workflows/vllm_ascend_test_310p.yaml
index 7c85f84f9..1de447fc3 100644
--- a/.github/workflows/vllm_ascend_test_310p.yaml
+++ b/.github/workflows/vllm_ascend_test_310p.yaml
@@ -53,7 +53,7 @@ jobs:
       max-parallel: 2
       matrix:
         os: [linux-aarch64-310p-1, linux-aarch64-310p-4]
-        vllm_version: [v0.11.0rc3]
+        vllm_version: [v0.11.0]
     name: 310p e2e test
     runs-on: ${{ matrix.os }}
     container:
diff --git a/.github/workflows/vllm_ascend_test_full.yaml b/.github/workflows/vllm_ascend_test_full.yaml
index 1c9f4d123..ec906c461 100644
--- a/.github/workflows/vllm_ascend_test_full.yaml
+++ b/.github/workflows/vllm_ascend_test_full.yaml
@@ -68,7 +68,7 @@ jobs:
     name: e2e-full
     strategy:
       matrix:
-        vllm_version: [releases/v0.11.0, v0.11.0rc3]
+        vllm_version: [v0.11.0]
     needs: [changes]
     if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
     uses: ./.github/workflows/_e2e_test.yaml
diff --git a/.github/workflows/vllm_ascend_test_full_vllm_0.11.0.yaml b/.github/workflows/vllm_ascend_test_full_vllm_0.11.0.yaml
index f55f8076b..0269fb6f0 100644
--- a/.github/workflows/vllm_ascend_test_full_vllm_0.11.0.yaml
+++ b/.github/workflows/vllm_ascend_test_full_vllm_0.11.0.yaml
@@ -45,7 +45,7 @@ jobs:
   e2e-test:
     uses: ./.github/workflows/_e2e_test.yaml
     with:
-      vllm: releases/v0.11.0
+      vllm: v0.11.0
       runner: linux-aarch64-a2
       image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
       type: full
diff --git a/Dockerfile b/Dockerfile
index 1d0b73b2a..2fb1c669d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.11.0rc3
+ARG VLLM_TAG=v0.11.0
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
 RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
diff --git a/Dockerfile.310p b/Dockerfile.310p
index f5ec94f2e..b1adc1a9c 100644
--- a/Dockerfile.310p
+++ b/Dockerfile.310p
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.11.0rc3
+ARG VLLM_TAG=v0.11.0
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
 RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
diff --git a/Dockerfile.310p.openEuler b/Dockerfile.310p.openEuler
index 3e9a2dab5..eeac1b336 100644
--- a/Dockerfile.310p.openEuler
+++ b/Dockerfile.310p.openEuler
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.11.0rc3
+ARG VLLM_TAG=v0.11.0
 
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
diff --git a/Dockerfile.a3 b/Dockerfile.a3
index de0169805..be2e797f0 100644
--- a/Dockerfile.a3
+++ b/Dockerfile.a3
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.11.0rc3
+ARG VLLM_TAG=v0.11.0
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
 RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
diff --git a/Dockerfile.a3.openEuler b/Dockerfile.a3.openEuler
index cec4ab63e..268aec238 100644
--- a/Dockerfile.a3.openEuler
+++ b/Dockerfile.a3.openEuler
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.11.0rc3
+ARG VLLM_TAG=v0.11.0
 
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler
index 14b6cce61..17d046b2e 100644
--- a/Dockerfile.openEuler
+++ b/Dockerfile.openEuler
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.11.0rc3
+ARG VLLM_TAG=v0.11.0
 
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
diff --git a/vllm_ascend/models/deepseek_v2.py b/vllm_ascend/models/deepseek_v2.py
index 2333c3814..60fc89ddf 100644
--- a/vllm_ascend/models/deepseek_v2.py
+++ b/vllm_ascend/models/deepseek_v2.py
@@ -422,7 +422,10 @@ class CustomDeepseekV2SFAAttention(DeepseekV2MLAAttention):
 
 class CustomDeepseekV2DecoderLayer(DeepseekV2DecoderLayer):
 
-    def __init__(self, vllm_config: VllmConfig, prefix: str) -> None:
+    def __init__(self,
+                 vllm_config: VllmConfig,
+                 prefix: str,
+                 topk_indices_buffer=None) -> None:
         nn.Module.__init__(self)
         config = vllm_config.model_config.hf_config
         model_config = vllm_config.model_config