From a43e2f61e1ae7984a87113f85304b87b74953dd3 Mon Sep 17 00:00:00 2001 From: wangxiyuan Date: Thu, 9 Oct 2025 10:41:19 +0800 Subject: [PATCH] [CI] Update vLLM to v0.11.0 (#3315) ### What this PR does / why we need it? There are 3 step to upgrade vllm-ascend to newest vllm. We'll create 3 PR - [x] Upgrade vllm to v0.11.0 to make CI happy first . - [ ] Move deepseek v3.2 to vllm way - [ ] Then we'll add a new PR to add vllm main support. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.11.0 Signed-off-by: wangxiyuan --- .github/workflows/accuracy_test.yaml | 2 +- .github/workflows/format_pr_body.yaml | 2 +- .github/workflows/nightly_benchmarks.yaml | 2 +- .github/workflows/vllm_ascend_dist.yaml | 2 +- .github/workflows/vllm_ascend_test.yaml | 6 +++--- .github/workflows/vllm_ascend_test_310p.yaml | 2 +- .github/workflows/vllm_ascend_test_full.yaml | 2 +- .github/workflows/vllm_ascend_test_full_vllm_0.11.0.yaml | 2 +- Dockerfile | 2 +- Dockerfile.310p | 2 +- Dockerfile.310p.openEuler | 2 +- Dockerfile.a3 | 2 +- Dockerfile.a3.openEuler | 2 +- Dockerfile.openEuler | 2 +- vllm_ascend/models/deepseek_v2.py | 5 ++++- 15 files changed, 20 insertions(+), 17 deletions(-) diff --git a/.github/workflows/accuracy_test.yaml b/.github/workflows/accuracy_test.yaml index c1b0e3776..4fbeb9157 100644 --- a/.github/workflows/accuracy_test.yaml +++ b/.github/workflows/accuracy_test.yaml @@ -112,7 +112,7 @@ jobs: uses: actions/checkout@v4 with: repository: vllm-project/vllm - ref: v0.11.0rc3 + ref: v0.11.0 path: ./vllm-empty - name: Install vllm-project/vllm from source diff --git a/.github/workflows/format_pr_body.yaml b/.github/workflows/format_pr_body.yaml index 24c087d61..2faed788c 100644 --- a/.github/workflows/format_pr_body.yaml +++ b/.github/workflows/format_pr_body.yaml @@ -36,7 +36,7 @@ jobs: - name: Get vLLM version run: | - VLLM_COMMIT=releases/v0.11.0 + VLLM_COMMIT=v0.11.0 echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV - name: Checkout repository diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index 21543493f..4dff9b68b 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -51,7 +51,7 @@ jobs: strategy: matrix: include: - - vllm_branch: v0.11.0rc3 + - vllm_branch: v0.11.0 vllm_ascend_branch: main vllm_use_v1: 1 max-parallel: 1 diff --git a/.github/workflows/vllm_ascend_dist.yaml b/.github/workflows/vllm_ascend_dist.yaml index 2a7531849..f5aa1432d 100644 --- a/.github/workflows/vllm_ascend_dist.yaml +++ b/.github/workflows/vllm_ascend_dist.yaml @@ -43,7 +43,7 @@ jobs: strategy: matrix: os: [linux-aarch64-a3-8] - vllm_version: [v0.11.0rc3] + vllm_version: [v0.11.0] name: vLLM Ascend test runs-on: ${{ matrix.os }} container: diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index 50527d194..048cd4966 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -42,7 +42,7 @@ jobs: lint: uses: ./.github/workflows/pre-commit.yml with: - vllm: releases/v0.11.0 + vllm: v0.11.0 changes: runs-on: ubuntu-latest @@ -83,7 +83,7 @@ jobs: VLLM_USE_MODELSCOPE: True strategy: matrix: - vllm_version: [releases/v0.11.0, v0.11.0rc3] + vllm_version: [v0.11.0] steps: - name: Install packages run: | @@ -145,7 +145,7 @@ jobs: name: e2e-light strategy: matrix: - vllm_version: [releases/v0.11.0, v0.11.0rc3] + vllm_version: [v0.11.0] # Note (yikun): If CI resource are limited we can split job into two chain jobs needs: [lint, changes] # only trigger e2e test after lint passed and the change is e2e related with pull request. diff --git a/.github/workflows/vllm_ascend_test_310p.yaml b/.github/workflows/vllm_ascend_test_310p.yaml index 7c85f84f9..1de447fc3 100644 --- a/.github/workflows/vllm_ascend_test_310p.yaml +++ b/.github/workflows/vllm_ascend_test_310p.yaml @@ -53,7 +53,7 @@ jobs: max-parallel: 2 matrix: os: [linux-aarch64-310p-1, linux-aarch64-310p-4] - vllm_version: [v0.11.0rc3] + vllm_version: [v0.11.0] name: 310p e2e test runs-on: ${{ matrix.os }} container: diff --git a/.github/workflows/vllm_ascend_test_full.yaml b/.github/workflows/vllm_ascend_test_full.yaml index 1c9f4d123..ec906c461 100644 --- a/.github/workflows/vllm_ascend_test_full.yaml +++ b/.github/workflows/vllm_ascend_test_full.yaml @@ -68,7 +68,7 @@ jobs: name: e2e-full strategy: matrix: - vllm_version: [releases/v0.11.0, v0.11.0rc3] + vllm_version: [v0.11.0] needs: [changes] if: ${{ needs.changes.outputs.e2e_tracker == 'true' }} uses: ./.github/workflows/_e2e_test.yaml diff --git a/.github/workflows/vllm_ascend_test_full_vllm_0.11.0.yaml b/.github/workflows/vllm_ascend_test_full_vllm_0.11.0.yaml index f55f8076b..0269fb6f0 100644 --- a/.github/workflows/vllm_ascend_test_full_vllm_0.11.0.yaml +++ b/.github/workflows/vllm_ascend_test_full_vllm_0.11.0.yaml @@ -45,7 +45,7 @@ jobs: e2e-test: uses: ./.github/workflows/_e2e_test.yaml with: - vllm: releases/v0.11.0 + vllm: v0.11.0 runner: linux-aarch64-a2 image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11 type: full diff --git a/Dockerfile b/Dockerfile index 1d0b73b2a..2fb1c669d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.11.0rc3 +ARG VLLM_TAG=v0.11.0 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \ diff --git a/Dockerfile.310p b/Dockerfile.310p index f5ec94f2e..b1adc1a9c 100644 --- a/Dockerfile.310p +++ b/Dockerfile.310p @@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.11.0rc3 +ARG VLLM_TAG=v0.11.0 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \ diff --git a/Dockerfile.310p.openEuler b/Dockerfile.310p.openEuler index 3e9a2dab5..eeac1b336 100644 --- a/Dockerfile.310p.openEuler +++ b/Dockerfile.310p.openEuler @@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/ # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.11.0rc3 +ARG VLLM_TAG=v0.11.0 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. diff --git a/Dockerfile.a3 b/Dockerfile.a3 index de0169805..be2e797f0 100644 --- a/Dockerfile.a3 +++ b/Dockerfile.a3 @@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.11.0rc3 +ARG VLLM_TAG=v0.11.0 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \ diff --git a/Dockerfile.a3.openEuler b/Dockerfile.a3.openEuler index cec4ab63e..268aec238 100644 --- a/Dockerfile.a3.openEuler +++ b/Dockerfile.a3.openEuler @@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/ # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.11.0rc3 +ARG VLLM_TAG=v0.11.0 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler index 14b6cce61..17d046b2e 100644 --- a/Dockerfile.openEuler +++ b/Dockerfile.openEuler @@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/ # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.11.0rc3 +ARG VLLM_TAG=v0.11.0 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. diff --git a/vllm_ascend/models/deepseek_v2.py b/vllm_ascend/models/deepseek_v2.py index 2333c3814..60fc89ddf 100644 --- a/vllm_ascend/models/deepseek_v2.py +++ b/vllm_ascend/models/deepseek_v2.py @@ -422,7 +422,10 @@ class CustomDeepseekV2SFAAttention(DeepseekV2MLAAttention): class CustomDeepseekV2DecoderLayer(DeepseekV2DecoderLayer): - def __init__(self, vllm_config: VllmConfig, prefix: str) -> None: + def __init__(self, + vllm_config: VllmConfig, + prefix: str, + topk_indices_buffer=None) -> None: nn.Module.__init__(self) config = vllm_config.model_config.hf_config model_config = vllm_config.model_config