mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Build vLLM aarch64 nightly wheels (#162664)
PyTorch has published its aarch64 nightly wheels for all CUDA version after https://github.com/pytorch/pytorch/pull/162364 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162664 Approved by: https://github.com/atalman
This commit is contained in:
1
.github/actionlint.yaml
vendored
1
.github/actionlint.yaml
vendored
@ -21,6 +21,7 @@ self-hosted-runner:
|
||||
- linux.arm64.2xlarge.ephemeral
|
||||
- linux.arm64.m7g.4xlarge
|
||||
- linux.arm64.m7g.4xlarge.ephemeral
|
||||
- linux.arm64.r7g.12xlarge.memory
|
||||
- linux.4xlarge.nvidia.gpu
|
||||
- linux.8xlarge.nvidia.gpu
|
||||
- linux.16xlarge.nvidia.gpu
|
||||
|
29
.github/ci_configs/vllm/Dockerfile.tmp_vllm
vendored
29
.github/ci_configs/vllm/Dockerfile.tmp_vllm
vendored
@ -82,16 +82,10 @@ RUN if command -v apt-get >/dev/null; then \
|
||||
apt-get update -y \
|
||||
&& apt-get install -y ccache software-properties-common git curl wget sudo vim; \
|
||||
else \
|
||||
dnf install -y git curl wget sudo vim; \
|
||||
dnf install -y git curl wget sudo; \
|
||||
fi \
|
||||
&& python3 --version && python3 -m pip --version
|
||||
|
||||
# Workaround for https://github.com/openai/triton/issues/2507 and
|
||||
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
|
||||
# this won't be needed for future versions of this docker image
|
||||
# or future versions of triton.
|
||||
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
|
||||
|
||||
# Install uv for faster pip installs if not existed
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
if ! python3 -m uv --version >/dev/null 2>&1; then \
|
||||
@ -220,11 +214,16 @@ ARG SCCACHE_S3_NO_CREDENTIALS=0
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
--mount=type=bind,source=.git,target=.git \
|
||||
if [ "$USE_SCCACHE" = "1" ]; then \
|
||||
echo "Installing sccache..." \
|
||||
&& curl -L -o sccache.tar.gz https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz \
|
||||
echo "Installing sccache..."; \
|
||||
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
|
||||
SCCACHE_ARCHIVE="sccache-v0.8.1-aarch64-unknown-linux-musl"; \
|
||||
else \
|
||||
SCCACHE_ARCHIVE="sccache-v0.8.1-x86_64-unknown-linux-musl"; \
|
||||
fi; \
|
||||
curl -L -o sccache.tar.gz "https://github.com/mozilla/sccache/releases/download/v0.8.1/${SCCACHE_ARCHIVE}.tar.gz" \
|
||||
&& tar -xzf sccache.tar.gz \
|
||||
&& sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \
|
||||
&& rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \
|
||||
&& sudo mv "${SCCACHE_ARCHIVE}"/sccache /usr/bin/sccache \
|
||||
&& rm -rf sccache.tar.gz "${SCCACHE_ARCHIVE}" \
|
||||
&& export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \
|
||||
&& export SCCACHE_REGION=${SCCACHE_REGION_NAME} \
|
||||
&& export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \
|
||||
@ -285,7 +284,7 @@ RUN if command -v apt-get >/dev/null; then \
|
||||
&& ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
|
||||
&& curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION}; \
|
||||
else \
|
||||
dnf install -y git curl wget sudo vim; \
|
||||
dnf install -y git curl wget sudo; \
|
||||
fi \
|
||||
&& python3 --version && python3 -m pip --version
|
||||
|
||||
@ -298,12 +297,6 @@ RUN echo "[INFO] Listing current directory before torch install step:" && \
|
||||
echo "[INFO] Showing torch_build_versions.txt content:" && \
|
||||
cat torch_build_versions.txt
|
||||
|
||||
# Workaround for https://github.com/openai/triton/issues/2507 and
|
||||
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
|
||||
# this won't be needed for future versions of this docker image
|
||||
# or future versions of triton.
|
||||
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
|
||||
|
||||
# Install uv for faster pip installs if not existed
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
if ! python3 -m uv --version > /dev/null 2>&1; then \
|
||||
|
3
.github/scripts/prepare_vllm_wheels.sh
vendored
3
.github/scripts/prepare_vllm_wheels.sh
vendored
@ -84,6 +84,9 @@ repackage_wheel() {
|
||||
rm -rf $package
|
||||
}
|
||||
|
||||
# Require to re-package the wheel
|
||||
${PYTHON_EXECUTABLE} -mpip install wheel==0.45.1
|
||||
|
||||
pushd externals/vllm/wheels
|
||||
for package in xformers flashinfer-python vllm; do
|
||||
repackage_wheel $package
|
||||
|
41
.github/workflows/build-vllm-wheel.yml
vendored
41
.github/workflows/build-vllm-wheel.yml
vendored
@ -12,6 +12,9 @@ on:
|
||||
paths:
|
||||
- .github/workflows/build-vllm-wheel.yml
|
||||
- .github/ci_commit_pins/vllm.txt
|
||||
schedule:
|
||||
# every morning at 01:30PM UTC, 9:30AM EST, 6:30AM PST
|
||||
- cron: 30 13 * * *
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||
@ -24,21 +27,33 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: [ '3.12' ]
|
||||
# TODO (huydhn): Add cu130 https://github.com/pytorch/pytorch/pull/162000#issuecomment-3261541554
|
||||
# TODO (huydhn): Add cu130 after https://github.com/vllm-project/vllm/issues/24464 is resolved
|
||||
platform: [ 'manylinux_2_28_x86_64', 'manylinux_2_28_aarch64' ]
|
||||
device: [ 'cu128', 'cu129' ]
|
||||
runner: [ 'linux.12xlarge.memory' ]
|
||||
include:
|
||||
- device: cu128
|
||||
- platform: manylinux_2_28_x86_64
|
||||
device: cu128
|
||||
manylinux-image: 'pytorch/manylinux2_28-builder:cuda12.8'
|
||||
- device: cu129
|
||||
runner: linux.12xlarge.memory
|
||||
- platform: manylinux_2_28_x86_64
|
||||
device: cu129
|
||||
manylinux-image: 'pytorch/manylinux2_28-builder:cuda12.9'
|
||||
name: "Build ${{ matrix.device }} vLLM wheel"
|
||||
runner: linux.12xlarge.memory
|
||||
- platform: manylinux_2_28_aarch64
|
||||
device: cu128
|
||||
manylinux-image: 'pytorch/manylinuxaarch64-builder:cuda12.8'
|
||||
runner: linux.arm64.r7g.12xlarge.memory
|
||||
- platform: manylinux_2_28_aarch64
|
||||
device: cu129
|
||||
manylinux-image: 'pytorch/manylinuxaarch64-builder:cuda12.9'
|
||||
runner: linux.arm64.r7g.12xlarge.memory
|
||||
name: "Build ${{ matrix.device }} vLLM wheel on ${{ matrix.platform }}"
|
||||
runs-on: ${{ matrix.runner }}
|
||||
timeout-minutes: 480
|
||||
env:
|
||||
PY_VERS: ${{ matrix.python-version }}
|
||||
MANYLINUX_IMAGE: ${{ matrix.manylinux-image }}
|
||||
PLATFORM: 'manylinux_2_28_x86_64'
|
||||
PLATFORM: ${{ matrix.platform }}
|
||||
BUILD_DEVICE: ${{ matrix.device }}
|
||||
steps:
|
||||
- name: Setup SSH (Click me for login details)
|
||||
@ -136,7 +151,7 @@ jobs:
|
||||
|
||||
- uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
|
||||
with:
|
||||
name: vllm-wheel-${{ matrix.device }}-${{ matrix.python-version }}-${{ env.PLATFORM }}
|
||||
name: vllm-wheel-${{ matrix.device }}-${{ matrix.platform }}-${{ matrix.python-version }}
|
||||
if-no-files-found: error
|
||||
path: ${{ runner.temp }}/artifacts/externals/vllm/wheels/*.whl
|
||||
|
||||
@ -146,15 +161,17 @@ jobs:
|
||||
|
||||
# Copied from build-triton-wheel workflow (mostly)
|
||||
upload-wheel:
|
||||
name: "Upload ${{ matrix.device }} vLLM wheel"
|
||||
name: "Upload ${{ matrix.device }} vLLM wheel on ${{ matrix.platform }}"
|
||||
needs:
|
||||
- build-wheel
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
platform: [ 'manylinux_2_28_x86_64', 'manylinux_2_28_aarch64' ]
|
||||
device: [ 'cu128', 'cu129' ]
|
||||
env:
|
||||
PLATFORM: ${{ matrix.platform }}
|
||||
BUILD_DEVICE: ${{ matrix.device }}
|
||||
permissions:
|
||||
id-token: write
|
||||
@ -190,15 +207,15 @@ jobs:
|
||||
run: |
|
||||
set -eux
|
||||
mkdir -p "${RUNNER_TEMP}/artifacts/"
|
||||
mv "${RUNNER_TEMP}"/artifacts-all/vllm-wheel-"${BUILD_DEVICE}"-*/* "${RUNNER_TEMP}/artifacts/"
|
||||
mv "${RUNNER_TEMP}"/artifacts-all/vllm-wheel-"${BUILD_DEVICE}"-"${PLATFORM}"-*/* "${RUNNER_TEMP}/artifacts/"
|
||||
|
||||
- name: Set DRY_RUN (only for tagged pushes)
|
||||
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) }}
|
||||
- name: Set DRY_RUN
|
||||
if: ${{ (github.event_name == 'push' && (github.event.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v'))) || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
|
||||
shell: bash
|
||||
run: |
|
||||
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
|
||||
- name: Set UPLOAD_CHANNEL
|
||||
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v') }}
|
||||
shell: bash
|
||||
run: |
|
||||
|
Reference in New Issue
Block a user