From c2bd41ac9f64cd873afa8a061f14192adaadbf7e Mon Sep 17 00:00:00 2001 From: Huy Do Date: Thu, 16 Oct 2025 01:03:26 +0000 Subject: [PATCH] Build vLLM nightly wheels for CUDA 13.0 (#163239) Now that https://github.com/vllm-project/vllm/pull/24599 has been merged Pull Request resolved: https://github.com/pytorch/pytorch/pull/163239 Approved by: https://github.com/malfet, https://github.com/atalman --- .../build-external-packages/action.yml | 2 +- .github/workflows/build-vllm-wheel.yml | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/.github/actions/build-external-packages/action.yml b/.github/actions/build-external-packages/action.yml index c0c727d93ac6..049c3ce28e45 100644 --- a/.github/actions/build-external-packages/action.yml +++ b/.github/actions/build-external-packages/action.yml @@ -65,7 +65,7 @@ runs: cd .ci/lumen_cli python3 -m pip install -e . ) - MAX_JOBS="$(nproc --ignore=6)" + MAX_JOBS="$(nproc --ignore=10)" export MAX_JOBS # Split the comma-separated list and build each target diff --git a/.github/workflows/build-vllm-wheel.yml b/.github/workflows/build-vllm-wheel.yml index 2c6635374841..4526faf6d7fc 100644 --- a/.github/workflows/build-vllm-wheel.yml +++ b/.github/workflows/build-vllm-wheel.yml @@ -27,9 +27,8 @@ jobs: fail-fast: false matrix: python-version: [ '3.12' ] - # TODO (huydhn): Add cu130 after https://github.com/vllm-project/vllm/issues/24464 is resolved platform: [ 'manylinux_2_28_x86_64', 'manylinux_2_28_aarch64' ] - device: [ 'cu128', 'cu129' ] + device: [ 'cu128', 'cu129', 'cu130' ] include: - platform: manylinux_2_28_x86_64 device: cu128 @@ -39,6 +38,10 @@ jobs: device: cu129 manylinux-image: 'pytorch/manylinux2_28-builder:cuda12.9' runner: linux.12xlarge.memory + - platform: manylinux_2_28_x86_64 + device: cu130 + manylinux-image: 'pytorch/manylinux2_28-builder:cuda13.0' + runner: linux.12xlarge.memory - platform: manylinux_2_28_aarch64 device: cu128 manylinux-image: 'pytorch/manylinuxaarch64-builder:cuda12.8' @@ -47,6 +50,11 @@ jobs: device: cu129 manylinux-image: 'pytorch/manylinuxaarch64-builder:cuda12.9' runner: linux.arm64.r7g.12xlarge.memory + exclude: + # TODO (huydhn): Add cu130 aarch64 once PyTorch is on 2.9+ and + # xformers is update to support 13.0 + - platform: manylinux_2_28_aarch64 + device: cu130 name: "Build ${{ matrix.device }} vLLM wheel on ${{ matrix.platform }}" runs-on: ${{ matrix.runner }} timeout-minutes: 480 @@ -169,7 +177,12 @@ jobs: fail-fast: false matrix: platform: [ 'manylinux_2_28_x86_64', 'manylinux_2_28_aarch64' ] - device: [ 'cu128', 'cu129' ] + device: [ 'cu128', 'cu129', 'cu130' ] + exclude: + # TODO (huydhn): Add cu130 aarch64 once PyTorch is on 2.9+ and + # xformers is update to support 13.0 + - platform: manylinux_2_28_aarch64 + device: cu130 env: PLATFORM: ${{ matrix.platform }} BUILD_DEVICE: ${{ matrix.device }}