[CI] Update vLLM to v0.11.0 (#3315)

### What this PR does / why we need it?
There are 3 step to upgrade vllm-ascend to newest vllm. We'll create 3
PR

- [x] Upgrade vllm to v0.11.0 to make CI happy first .
- [ ] Move deepseek v3.2 to vllm way
- [ ] Then we'll add a new PR to add vllm main support.

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.11.0

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-10-09 10:41:19 +08:00
committed by GitHub
parent f12f76d7ba
commit a43e2f61e1
15 changed files with 20 additions and 17 deletions

View File

@ -112,7 +112,7 @@ jobs:
uses: actions/checkout@v4
with:
repository: vllm-project/vllm
ref: v0.11.0rc3
ref: v0.11.0
path: ./vllm-empty
- name: Install vllm-project/vllm from source

View File

@ -36,7 +36,7 @@ jobs:
- name: Get vLLM version
run: |
VLLM_COMMIT=releases/v0.11.0
VLLM_COMMIT=v0.11.0
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
- name: Checkout repository

View File

@ -51,7 +51,7 @@ jobs:
strategy:
matrix:
include:
- vllm_branch: v0.11.0rc3
- vllm_branch: v0.11.0
vllm_ascend_branch: main
vllm_use_v1: 1
max-parallel: 1

View File

@ -43,7 +43,7 @@ jobs:
strategy:
matrix:
os: [linux-aarch64-a3-8]
vllm_version: [v0.11.0rc3]
vllm_version: [v0.11.0]
name: vLLM Ascend test
runs-on: ${{ matrix.os }}
container:

View File

@ -42,7 +42,7 @@ jobs:
lint:
uses: ./.github/workflows/pre-commit.yml
with:
vllm: releases/v0.11.0
vllm: v0.11.0
changes:
runs-on: ubuntu-latest
@ -83,7 +83,7 @@ jobs:
VLLM_USE_MODELSCOPE: True
strategy:
matrix:
vllm_version: [releases/v0.11.0, v0.11.0rc3]
vllm_version: [v0.11.0]
steps:
- name: Install packages
run: |
@ -145,7 +145,7 @@ jobs:
name: e2e-light
strategy:
matrix:
vllm_version: [releases/v0.11.0, v0.11.0rc3]
vllm_version: [v0.11.0]
# Note (yikun): If CI resource are limited we can split job into two chain jobs
needs: [lint, changes]
# only trigger e2e test after lint passed and the change is e2e related with pull request.

View File

@ -53,7 +53,7 @@ jobs:
max-parallel: 2
matrix:
os: [linux-aarch64-310p-1, linux-aarch64-310p-4]
vllm_version: [v0.11.0rc3]
vllm_version: [v0.11.0]
name: 310p e2e test
runs-on: ${{ matrix.os }}
container:

View File

@ -68,7 +68,7 @@ jobs:
name: e2e-full
strategy:
matrix:
vllm_version: [releases/v0.11.0, v0.11.0rc3]
vllm_version: [v0.11.0]
needs: [changes]
if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
uses: ./.github/workflows/_e2e_test.yaml

View File

@ -45,7 +45,7 @@ jobs:
e2e-test:
uses: ./.github/workflows/_e2e_test.yaml
with:
vllm: releases/v0.11.0
vllm: v0.11.0
runner: linux-aarch64-a2
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
type: full

View File

@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
# Install vLLM
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
ARG VLLM_TAG=v0.11.0rc3
ARG VLLM_TAG=v0.11.0
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \

View File

@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
# Install vLLM
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
ARG VLLM_TAG=v0.11.0rc3
ARG VLLM_TAG=v0.11.0
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \

View File

@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
# Install vLLM
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
ARG VLLM_TAG=v0.11.0rc3
ARG VLLM_TAG=v0.11.0
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.

View File

@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
# Install vLLM
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
ARG VLLM_TAG=v0.11.0rc3
ARG VLLM_TAG=v0.11.0
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \

View File

@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
# Install vLLM
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
ARG VLLM_TAG=v0.11.0rc3
ARG VLLM_TAG=v0.11.0
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.

View File

@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
# Install vLLM
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
ARG VLLM_TAG=v0.11.0rc3
ARG VLLM_TAG=v0.11.0
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.

View File

@ -422,7 +422,10 @@ class CustomDeepseekV2SFAAttention(DeepseekV2MLAAttention):
class CustomDeepseekV2DecoderLayer(DeepseekV2DecoderLayer):
def __init__(self, vllm_config: VllmConfig, prefix: str) -> None:
def __init__(self,
vllm_config: VllmConfig,
prefix: str,
topk_indices_buffer=None) -> None:
nn.Module.__init__(self)
config = vllm_config.model_config.hf_config
model_config = vllm_config.model_config