[CI] Update vLLM to v0.11.0 (#3315)

### What this PR does / why we need it? There are 3 step to upgrade vllm-ascend to newest vllm. We'll create 3 PR - [x] Upgrade vllm to v0.11.0 to make CI happy first . - [ ] Move deepseek v3.2 to vllm way - [ ] Then we'll add a new PR to add vllm main support. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.11.0 Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-10-20 05:33:51 +08:00 · 2025-10-09 10:41:19 +08:00
parent f12f76d7ba
commit a43e2f61e1
15 changed files with 20 additions and 17 deletions
--- a/.github/workflows/accuracy_test.yaml
+++ b/.github/workflows/accuracy_test.yaml
@ -112,7 +112,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          repository: vllm-project/vllm
-          ref: v0.11.0rc3
+          ref: v0.11.0
          path: ./vllm-empty

      - name: Install vllm-project/vllm from source
--- a/.github/workflows/format_pr_body.yaml
+++ b/.github/workflows/format_pr_body.yaml
@ -36,7 +36,7 @@ jobs:

      - name: Get vLLM version
        run: |
-          VLLM_COMMIT=releases/v0.11.0
+          VLLM_COMMIT=v0.11.0
          echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV

      - name: Checkout repository
--- a/.github/workflows/nightly_benchmarks.yaml
+++ b/.github/workflows/nightly_benchmarks.yaml
@ -51,7 +51,7 @@ jobs:
    strategy:
      matrix:
        include:
-          - vllm_branch: v0.11.0rc3
+          - vllm_branch: v0.11.0
            vllm_ascend_branch: main
            vllm_use_v1: 1
      max-parallel: 1
--- a/.github/workflows/vllm_ascend_dist.yaml
+++ b/.github/workflows/vllm_ascend_dist.yaml
@ -43,7 +43,7 @@ jobs:
    strategy:
      matrix:
        os: [linux-aarch64-a3-8]
-        vllm_version: [v0.11.0rc3]
+        vllm_version: [v0.11.0]
    name: vLLM Ascend test
    runs-on: ${{ matrix.os }}
    container:
--- a/.github/workflows/vllm_ascend_test.yaml
+++ b/.github/workflows/vllm_ascend_test.yaml
@ -42,7 +42,7 @@ jobs:
  lint:
    uses: ./.github/workflows/pre-commit.yml
    with:
-      vllm: releases/v0.11.0
+      vllm: v0.11.0

  changes:
    runs-on: ubuntu-latest
@ -83,7 +83,7 @@ jobs:
        VLLM_USE_MODELSCOPE: True
    strategy:
      matrix:
-        vllm_version: [releases/v0.11.0, v0.11.0rc3]
+        vllm_version: [v0.11.0]
    steps:
      - name: Install packages
        run: |
@ -145,7 +145,7 @@ jobs:
    name: e2e-light
    strategy:
      matrix:
-        vllm_version: [releases/v0.11.0, v0.11.0rc3]
+        vllm_version: [v0.11.0]
    # Note (yikun): If CI resource are limited we can split job into two chain jobs
    needs: [lint, changes]
    # only trigger e2e test after lint passed and the change is e2e related with pull request.
--- a/.github/workflows/vllm_ascend_test_310p.yaml
+++ b/.github/workflows/vllm_ascend_test_310p.yaml
@ -53,7 +53,7 @@ jobs:
      max-parallel: 2
      matrix:
        os: [linux-aarch64-310p-1, linux-aarch64-310p-4]
-        vllm_version: [v0.11.0rc3]
+        vllm_version: [v0.11.0]
    name: 310p e2e test
    runs-on: ${{ matrix.os }}
    container:
--- a/.github/workflows/vllm_ascend_test_full.yaml
+++ b/.github/workflows/vllm_ascend_test_full.yaml
@ -68,7 +68,7 @@ jobs:
    name: e2e-full
    strategy:
      matrix:
-        vllm_version: [releases/v0.11.0, v0.11.0rc3]
+        vllm_version: [v0.11.0]
    needs: [changes]
    if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
    uses: ./.github/workflows/_e2e_test.yaml
--- a/.github/workflows/vllm_ascend_test_full_vllm_0.11.0.yaml
+++ b/.github/workflows/vllm_ascend_test_full_vllm_0.11.0.yaml
@ -45,7 +45,7 @@ jobs:
  e2e-test:
    uses: ./.github/workflows/_e2e_test.yaml
    with:
-      vllm: releases/v0.11.0
+      vllm: v0.11.0
      runner: linux-aarch64-a2
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
      type: full
--- a/2
+++ b/2
@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}

 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.11.0rc3
+ARG VLLM_TAG=v0.11.0
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
 RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
--- a/Dockerfile.310p
+++ b/Dockerfile.310p
@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}

 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.11.0rc3
+ARG VLLM_TAG=v0.11.0
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
 RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
--- a/Dockerfile.310p.openEuler
+++ b/Dockerfile.310p.openEuler
@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/

 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.11.0rc3
+ARG VLLM_TAG=v0.11.0

 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
--- a/Dockerfile.a3
+++ b/Dockerfile.a3
@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}

 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.11.0rc3
+ARG VLLM_TAG=v0.11.0
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
 RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
--- a/Dockerfile.a3.openEuler
+++ b/Dockerfile.a3.openEuler
@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/

 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.11.0rc3
+ARG VLLM_TAG=v0.11.0

 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
--- a/Dockerfile.openEuler
+++ b/Dockerfile.openEuler
@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/

 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.11.0rc3
+ARG VLLM_TAG=v0.11.0

 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
--- a/vllm_ascend/models/deepseek_v2.py
+++ b/vllm_ascend/models/deepseek_v2.py
@ -422,7 +422,10 @@ class CustomDeepseekV2SFAAttention(DeepseekV2MLAAttention):

 class CustomDeepseekV2DecoderLayer(DeepseekV2DecoderLayer):

-    def __init__(self, vllm_config: VllmConfig, prefix: str) -> None:
+    def __init__(self,
+                 vllm_config: VllmConfig,
+                 prefix: str,
+                 topk_indices_buffer=None) -> None:
        nn.Module.__init__(self)
        config = vllm_config.model_config.hf_config
        model_config = vllm_config.model_config