Files
pytorch/.github/workflows/build-triton-wheel.yml
Benjamin Glass cbc53b7696 Update pybind11 submodule to 3.0.1 (#160754)
Upgrade to PyBind11 v3. This allows us to strip out our own (possibly broken?) handling of the C++ ABI when building extensions, in favor of the more-complete PyBind11 internal handling.

Fixes a few test failures due to https://github.com/pybind/pybind11/issues/5774, which effectively makes the `__qualname__` attribute of functions platform-dependent.

Test plan: CI

Pull Request resolved: https://github.com/pytorch/pytorch/pull/160754
Approved by: https://github.com/Skylion007
2025-08-27 21:15:01 +00:00

344 lines
14 KiB
YAML

name: Build Triton wheels
on:
push:
branches:
- main
tags:
# NOTE: Binary build pipelines should only get triggered on release candidate builds
# Release candidate tags look like: v1.11.0-rc1
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
- 'ciflow/triton_binaries/*'
paths:
- .github/workflows/build-triton-wheel.yml
- .github/scripts/build_triton_wheel.py
- .github/ci_commit_pins/triton.txt
- .github/scripts/windows/install_vs2022.ps1
- .github/scripts/windows/build_triton.bat
- .ci/docker/ci_commit_pins/triton.txt
- .ci/docker/ci_commit_pins/triton-xpu.txt
workflow_dispatch:
pull_request:
paths:
- .github/workflows/build-triton-wheel.yml
- .github/scripts/build_triton_wheel.py
- .github/ci_commit_pins/triton.txt
- .github/scripts/windows/install_vs2022.ps1
- .github/scripts/windows/build_triton.bat
- .ci/docker/ci_commit_pins/triton.txt
- .ci/docker/ci_commit_pins/triton-xpu.txt
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
build-wheel:
name: "Build Triton Wheel"
needs: get-label-type
runs-on: ${{ matrix.runs_on }}
strategy:
fail-fast: false
matrix:
py_vers: [ "3.9", "3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t" ]
device: ["cuda", "rocm", "xpu", "aarch64"]
docker-image: ["pytorch/manylinux2_28-builder:cpu"]
include:
- device: "rocm"
rocm_version: "6.4"
runs_on: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge"
- device: "cuda"
rocm_version: ""
runs_on: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge"
- device: "xpu"
rocm_version: ""
runs_on: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge"
- device: "aarch64"
rocm_version: ""
runs_on: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.2xlarge"
timeout-minutes: 40
env:
DOCKER_IMAGE: ${{ matrix.device == 'rocm' && format('pytorch/manylinux2_28-builder:rocm{0}', matrix.rocm_version) || matrix.device == 'aarch64' && 'pytorch/manylinux2_28_aarch64-builder:cpu-aarch64' || matrix.docker-image }}
PY_VERS: ${{ matrix.py_vers }}
BUILD_DEVICE: ${{ matrix.device }}
PLATFORM: 'manylinux_2_28_x86_64'
steps:
- name: Setup SSH (Click me for login details)
uses: pytorch/test-infra/.github/actions/setup-ssh@main
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
with:
submodules: false
- name: Setup Linux
uses: ./.github/actions/setup-linux
- name: Pull Docker image
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
with:
docker-image: ${{ env.DOCKER_IMAGE }}
- name: Build Triton wheel
env:
IS_RELEASE_TAG: ${{ startsWith(github.event.ref, 'refs/tags/v') }}
run: |
set -x
mkdir -p "${RUNNER_TEMP}/artifacts/"
container_name=$(docker run \
--tty \
--detach \
-v "${GITHUB_WORKSPACE}:/pytorch" \
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
-w /artifacts/ \
"${DOCKER_IMAGE}" \
)
# Determine python executable for given version
case $PY_VERS in
3.9)
PYTHON_EXECUTABLE=/opt/python/cp39-cp39/bin/python
;;
3.10)
PYTHON_EXECUTABLE=/opt/python/cp310-cp310/bin/python
;;
3.11)
PYTHON_EXECUTABLE=/opt/python/cp311-cp311/bin/python
;;
3.12)
PYTHON_EXECUTABLE=/opt/python/cp312-cp312/bin/python
;;
3.13)
PYTHON_EXECUTABLE=/opt/python/cp313-cp313/bin/python
;;
3.13t)
PYTHON_EXECUTABLE=/opt/python/cp313-cp313t/bin/python
;;
3.14)
PYTHON_EXECUTABLE=/opt/python/cp314-cp314/bin/python
;;
3.14t)
PYTHON_EXECUTABLE=/opt/python/cp314-cp314t/bin/python
;;
*)
echo "Unsupported python version ${PY_VERS}"
exit 1
;;
esac
RELEASE=""
WITH_CLANG_LDD=""
if [[ "${IS_RELEASE_TAG}" == true ]]; then
RELEASE="--release"
fi
docker exec -t "${container_name}" yum install -y zlib-devel zip
docker exec -t "${container_name}" "${PYTHON_EXECUTABLE}" -m pip install -U setuptools==78.1.0 pybind11==3.0.1 auditwheel wheel
set +e
docker exec -t "${container_name}" command -v pip
has_pip=$?
set -e
if [ $has_pip -eq 0 ] ; then
docker exec -t "${container_name}" pip install -U cmake --force-reinstall
else
docker exec -t "${container_name}" "${PYTHON_EXECUTABLE}" -m pip install -U cmake --force-reinstall
fi
if [[ ("${{ matrix.device }}" == "cuda" || "${{ matrix.device }}" == "rocm" || "${{ matrix.device }}" == "aarch64" ) ]]; then
# With this install, it gets clang 16.0.6.
docker exec -t "${container_name}" dnf install clang lld -y
WITH_CLANG_LDD="--with-clang-ldd"
fi
if [[ "${BUILD_DEVICE}" == xpu ]]; then
docker exec -t "${container_name}" bash -c "dnf install -y gcc-toolset-13-gcc-c++"
docker exec -t "${container_name}" bash -c "source /opt/rh/gcc-toolset-13/enable && ${PYTHON_EXECUTABLE} /pytorch/.github/scripts/build_triton_wheel.py --device=$BUILD_DEVICE $RELEASE"
else
docker exec -t "${container_name}" bash -c "${PYTHON_EXECUTABLE} /pytorch/.github/scripts/build_triton_wheel.py --device=$BUILD_DEVICE $RELEASE $WITH_CLANG_LDD"
fi
if [[ ("${{ matrix.device }}" == "cuda" || "${{ matrix.device }}" == "xpu") ]]; then
docker exec -t "${container_name}" bash -c "auditwheel repair --plat ${PLATFORM} //artifacts/*.whl"
else
docker exec -t "${container_name}" bash -c "mkdir //artifacts/wheelhouse"
docker exec -t "${container_name}" bash -c "mv //artifacts/*.whl //artifacts/wheelhouse/"
fi
docker exec -t "${container_name}" chown -R 1000.1000 /artifacts/wheelhouse
- uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
with:
name: pytorch-triton-wheel-${{ matrix.py_vers }}-${{ matrix.device }}-${{ env.PLATFORM }}
if-no-files-found: error
path: ${{ runner.temp }}/artifacts/wheelhouse/*
- name: Teardown Linux
uses: pytorch/test-infra/.github/actions/teardown-linux@main
if: always()
build-wheel-win:
name: "Build Triton Windows Wheel"
needs: get-label-type
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
strategy:
fail-fast: false
matrix:
py_vers: [ "3.9", "3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t" ]
device: ["xpu"]
timeout-minutes: 40
env:
PY_VERS: ${{ matrix.py_vers }}
BUILD_DEVICE: ${{ matrix.device }}
VC_INSTALL_PATH: "C:\\MSVC-BuildTools-2022"
steps:
- name: Display EC2 information
shell: bash
run: |
set -euo pipefail
function get_ec2_metadata() {
# Pulled from instance metadata endpoint for EC2
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
category=$1
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
}
echo "ami-id: $(get_ec2_metadata ami-id)"
echo "instance-id: $(get_ec2_metadata instance-id)"
echo "instance-type: $(get_ec2_metadata instance-type)"
echo "system info $(uname -a)"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: pytorch/test-infra/.github/actions/setup-ssh@main
continue-on-error: true
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
- name: Enable git long paths and symlinks on Windows and disable fsmonitor daemon
shell: bash
run: |
git config --global core.longpaths true
git config --global core.symlinks true
# https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock
# the directory on Windows and prevent GHA from checking out as reported
# in https://github.com/actions/checkout/issues/1018
git config --global core.fsmonitor false
- name: Checkout PyTorch
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: false
path: pytorch
show-progress: false
- name: Clean PyTorch checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: pytorch
- name: Enable long paths on Windows and install VS2022 17.13.2
env:
VC_YEAR: 2022
VC_VERSION: 17.13.2
shell: bash
working-directory: pytorch
run: |
powershell .github/scripts/windows/install_vs2022.ps1
- name: Build Triton wheel
env:
IS_RELEASE_TAG: ${{ startsWith(github.event.ref, 'refs/tags/v') }}
working-directory: pytorch
shell: bash
run: |
set -x
export RELEASE=""
if [[ "${IS_RELEASE_TAG}" == true ]]; then
export RELEASE="--release"
fi
.github/scripts/windows/build_triton.bat
mkdir -p "${RUNNER_TEMP}/artifacts/"
mv ./*.whl "${RUNNER_TEMP}/artifacts/"
- uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
with:
name: pytorch-triton-wheel-${{ matrix.py_vers }}-${{ matrix.device }}
if-no-files-found: error
path: ${{ runner.temp }}/artifacts/*
upload-wheel:
runs-on: ubuntu-22.04
needs:
- build-wheel
- build-wheel-win
permissions:
id-token: write
contents: read
container:
image: continuumio/miniconda3:4.12.0
environment: ${{ (github.event_name == 'push' && github.event.ref == 'refs/heads/main') && 'nightly-wheel-upload' || '' }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Configure AWS credentials(PyTorch account) for main
if: ${{ github.event_name == 'push' && github.event.ref == 'refs/heads/main' }}
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
with:
role-to-assume: arn:aws:iam::749337293305:role/gha_workflow_nightly_build_wheels
aws-region: us-east-1
- name: Configure AWS credentials(PyTorch account) for RC builds
if: ${{ github.event_name == 'push' && (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/')) }}
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
with:
role-to-assume: arn:aws:iam::749337293305:role/gha_workflow_test_build_wheels
aws-region: us-east-1
- name: Download Build Artifacts
uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7
with:
# Download all available artifacts
path: ${{ runner.temp }}/artifacts-all
- name: Select Wheel Artifacts
shell: bash
run: |
set -x
mkdir -p "${RUNNER_TEMP}/artifacts/"
mv "${RUNNER_TEMP}"/artifacts-all/pytorch-triton-wheel-*/* "${RUNNER_TEMP}/artifacts/"
- name: Set DRY_RUN (only for tagged pushes)
if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) }}
shell: bash
run: |
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
- name: Set UPLOAD_CHANNEL (only for tagged pushes)
if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v') }}
shell: bash
run: |
set -ex
# reference ends with an RC suffix
if [[ "${GITHUB_REF_NAME}" = *-rc[0-9]* ]]; then
echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
fi
# NB: This step is gated by DRY_RUN, which is enabled everywhere except main and release branches
- name: Upload binaries
env:
PACKAGE_TYPE: wheel
# The UPLOAD_SUBFOLDER needs to be empty here so that triton wheels are uploaded
# to nightly or test
UPLOAD_SUBFOLDER: ""
PKG_DIR: ${{ runner.temp }}/artifacts
shell: bash
run: |
set -ex
bash .circleci/scripts/binary_upload.sh