Build vLLM aarch64 nightly wheels (#162664)

PyTorch has published its aarch64 nightly wheels for all CUDA version after https://github.com/pytorch/pytorch/pull/162364
Pull Request resolved: https://github.com/pytorch/pytorch/pull/162664
Approved by: https://github.com/atalman
This commit is contained in:
Huy Do
2025-09-13 03:43:55 +00:00
committed by PyTorch MergeBot
parent 543d50db2b
commit 66133b1ab7
4 changed files with 44 additions and 30 deletions

View File

@ -21,6 +21,7 @@ self-hosted-runner:
- linux.arm64.2xlarge.ephemeral
- linux.arm64.m7g.4xlarge
- linux.arm64.m7g.4xlarge.ephemeral
- linux.arm64.r7g.12xlarge.memory
- linux.4xlarge.nvidia.gpu
- linux.8xlarge.nvidia.gpu
- linux.16xlarge.nvidia.gpu

View File

@ -82,16 +82,10 @@ RUN if command -v apt-get >/dev/null; then \
apt-get update -y \
&& apt-get install -y ccache software-properties-common git curl wget sudo vim; \
else \
dnf install -y git curl wget sudo vim; \
dnf install -y git curl wget sudo; \
fi \
&& python3 --version && python3 -m pip --version
# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
# Install uv for faster pip installs if not existed
RUN --mount=type=cache,target=/root/.cache/uv \
if ! python3 -m uv --version >/dev/null 2>&1; then \
@ -220,11 +214,16 @@ ARG SCCACHE_S3_NO_CREDENTIALS=0
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=.git,target=.git \
if [ "$USE_SCCACHE" = "1" ]; then \
echo "Installing sccache..." \
&& curl -L -o sccache.tar.gz https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz \
echo "Installing sccache..."; \
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
SCCACHE_ARCHIVE="sccache-v0.8.1-aarch64-unknown-linux-musl"; \
else \
SCCACHE_ARCHIVE="sccache-v0.8.1-x86_64-unknown-linux-musl"; \
fi; \
curl -L -o sccache.tar.gz "https://github.com/mozilla/sccache/releases/download/v0.8.1/${SCCACHE_ARCHIVE}.tar.gz" \
&& tar -xzf sccache.tar.gz \
&& sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \
&& rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \
&& sudo mv "${SCCACHE_ARCHIVE}"/sccache /usr/bin/sccache \
&& rm -rf sccache.tar.gz "${SCCACHE_ARCHIVE}" \
&& export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \
&& export SCCACHE_REGION=${SCCACHE_REGION_NAME} \
&& export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \
@ -285,7 +284,7 @@ RUN if command -v apt-get >/dev/null; then \
&& ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
&& curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION}; \
else \
dnf install -y git curl wget sudo vim; \
dnf install -y git curl wget sudo; \
fi \
&& python3 --version && python3 -m pip --version
@ -298,12 +297,6 @@ RUN echo "[INFO] Listing current directory before torch install step:" && \
echo "[INFO] Showing torch_build_versions.txt content:" && \
cat torch_build_versions.txt
# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
# Install uv for faster pip installs if not existed
RUN --mount=type=cache,target=/root/.cache/uv \
if ! python3 -m uv --version > /dev/null 2>&1; then \

View File

@ -84,6 +84,9 @@ repackage_wheel() {
rm -rf $package
}
# Require to re-package the wheel
${PYTHON_EXECUTABLE} -mpip install wheel==0.45.1
pushd externals/vllm/wheels
for package in xformers flashinfer-python vllm; do
repackage_wheel $package

View File

@ -12,6 +12,9 @@ on:
paths:
- .github/workflows/build-vllm-wheel.yml
- .github/ci_commit_pins/vllm.txt
schedule:
# every morning at 01:30PM UTC, 9:30AM EST, 6:30AM PST
- cron: 30 13 * * *
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
@ -24,21 +27,33 @@ jobs:
fail-fast: false
matrix:
python-version: [ '3.12' ]
# TODO (huydhn): Add cu130 https://github.com/pytorch/pytorch/pull/162000#issuecomment-3261541554
# TODO (huydhn): Add cu130 after https://github.com/vllm-project/vllm/issues/24464 is resolved
platform: [ 'manylinux_2_28_x86_64', 'manylinux_2_28_aarch64' ]
device: [ 'cu128', 'cu129' ]
runner: [ 'linux.12xlarge.memory' ]
include:
- device: cu128
- platform: manylinux_2_28_x86_64
device: cu128
manylinux-image: 'pytorch/manylinux2_28-builder:cuda12.8'
- device: cu129
runner: linux.12xlarge.memory
- platform: manylinux_2_28_x86_64
device: cu129
manylinux-image: 'pytorch/manylinux2_28-builder:cuda12.9'
name: "Build ${{ matrix.device }} vLLM wheel"
runner: linux.12xlarge.memory
- platform: manylinux_2_28_aarch64
device: cu128
manylinux-image: 'pytorch/manylinuxaarch64-builder:cuda12.8'
runner: linux.arm64.r7g.12xlarge.memory
- platform: manylinux_2_28_aarch64
device: cu129
manylinux-image: 'pytorch/manylinuxaarch64-builder:cuda12.9'
runner: linux.arm64.r7g.12xlarge.memory
name: "Build ${{ matrix.device }} vLLM wheel on ${{ matrix.platform }}"
runs-on: ${{ matrix.runner }}
timeout-minutes: 480
env:
PY_VERS: ${{ matrix.python-version }}
MANYLINUX_IMAGE: ${{ matrix.manylinux-image }}
PLATFORM: 'manylinux_2_28_x86_64'
PLATFORM: ${{ matrix.platform }}
BUILD_DEVICE: ${{ matrix.device }}
steps:
- name: Setup SSH (Click me for login details)
@ -136,7 +151,7 @@ jobs:
- uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
with:
name: vllm-wheel-${{ matrix.device }}-${{ matrix.python-version }}-${{ env.PLATFORM }}
name: vllm-wheel-${{ matrix.device }}-${{ matrix.platform }}-${{ matrix.python-version }}
if-no-files-found: error
path: ${{ runner.temp }}/artifacts/externals/vllm/wheels/*.whl
@ -146,15 +161,17 @@ jobs:
# Copied from build-triton-wheel workflow (mostly)
upload-wheel:
name: "Upload ${{ matrix.device }} vLLM wheel"
name: "Upload ${{ matrix.device }} vLLM wheel on ${{ matrix.platform }}"
needs:
- build-wheel
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
platform: [ 'manylinux_2_28_x86_64', 'manylinux_2_28_aarch64' ]
device: [ 'cu128', 'cu129' ]
env:
PLATFORM: ${{ matrix.platform }}
BUILD_DEVICE: ${{ matrix.device }}
permissions:
id-token: write
@ -190,15 +207,15 @@ jobs:
run: |
set -eux
mkdir -p "${RUNNER_TEMP}/artifacts/"
mv "${RUNNER_TEMP}"/artifacts-all/vllm-wheel-"${BUILD_DEVICE}"-*/* "${RUNNER_TEMP}/artifacts/"
mv "${RUNNER_TEMP}"/artifacts-all/vllm-wheel-"${BUILD_DEVICE}"-"${PLATFORM}"-*/* "${RUNNER_TEMP}/artifacts/"
- name: Set DRY_RUN (only for tagged pushes)
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) }}
- name: Set DRY_RUN
if: ${{ (github.event_name == 'push' && (github.event.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v'))) || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
shell: bash
run: |
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
- name: Set UPLOAD_CHANNEL
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v') }}
shell: bash
run: |