mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Build Triton in Docker image (#95233)
See a bunch of timeout error when trying to clone and build Triton today c6d8d10b3e
, so let's build triton as part of the Docker image.
* The pinned commit file is moved to the Docker context at `.ci/docker/ci_commit_pins/triton.txt`, and `.github/ci_commit_pins/triton.txt` is now a soft link pointing to it
* New Docker images are built whenever the pinned commit is updated
* The build logic is in `.ci/docker/common/install_triton.sh` which copies `install_triton` step in the CI. The latter can be removed in a separate PR after this one
Pull Request resolved: https://github.com/pytorch/pytorch/pull/95233
Approved by: https://github.com/weiwangmeta, https://github.com/malfet
This commit is contained in:
@ -100,6 +100,7 @@ case "$image" in
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-bionic-cuda11.7-cudnn8-py3-gcc7)
|
||||
CUDA_VERSION=11.7.0
|
||||
@ -113,6 +114,7 @@ case "$image" in
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-bionic-cuda11.8-cudnn8-py3-gcc7)
|
||||
CUDA_VERSION=11.8.0
|
||||
@ -126,6 +128,7 @@ case "$image" in
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-focal-py3-clang7-asan)
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
@ -134,6 +137,7 @@ case "$image" in
|
||||
DB=yes
|
||||
VISION=yes
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-focal-py3-clang10-onnx)
|
||||
ANACONDA_PYTHON_VERSION=3.8
|
||||
@ -162,6 +166,7 @@ case "$image" in
|
||||
VULKAN_SDK_VERSION=1.2.162.1
|
||||
SWIFTSHADER=yes
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-bionic-py3.11-clang9)
|
||||
ANACONDA_PYTHON_VERSION=3.11
|
||||
@ -172,6 +177,7 @@ case "$image" in
|
||||
VULKAN_SDK_VERSION=1.2.162.1
|
||||
SWIFTSHADER=yes
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-bionic-py3.8-gcc9)
|
||||
ANACONDA_PYTHON_VERSION=3.8
|
||||
@ -180,6 +186,7 @@ case "$image" in
|
||||
DB=yes
|
||||
VISION=yes
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-focal-rocm-n-1-py3)
|
||||
ANACONDA_PYTHON_VERSION=3.8
|
||||
@ -209,6 +216,7 @@ case "$image" in
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-jammy-cuda11.6-cudnn8-py3.8-clang12)
|
||||
ANACONDA_PYTHON_VERSION=3.8
|
||||
@ -218,6 +226,7 @@ case "$image" in
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-jammy-cuda11.7-cudnn8-py3.8-clang12)
|
||||
ANACONDA_PYTHON_VERSION=3.8
|
||||
@ -227,6 +236,7 @@ case "$image" in
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-jammy-cuda11.8-cudnn8-py3.8-clang12)
|
||||
ANACONDA_PYTHON_VERSION=3.8
|
||||
@ -236,6 +246,7 @@ case "$image" in
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-focal-linter)
|
||||
# TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
|
||||
@ -328,6 +339,7 @@ docker build \
|
||||
--build-arg "UCX_COMMIT=${UCX_COMMIT}" \
|
||||
--build-arg "UCC_COMMIT=${UCC_COMMIT}" \
|
||||
--build-arg "CONDA_CMAKE=${CONDA_CMAKE}" \
|
||||
--build-arg "TRITON=${TRITON}" \
|
||||
-f $(dirname ${DOCKERFILE})/Dockerfile \
|
||||
-t "$tmp_tag" \
|
||||
"$@" \
|
||||
|
1
.ci/docker/ci_commit_pins/triton.txt
Normal file
1
.ci/docker/ci_commit_pins/triton.txt
Normal file
@ -0,0 +1 @@
|
||||
b8b470bc597c1c5bd03682c09fe3e6b7c53787fd
|
@ -13,7 +13,7 @@ as_jenkins() {
|
||||
# NB: Pass on PATH and LD_LIBRARY_PATH to sudo invocation
|
||||
# NB: This must be run from a directory that jenkins has access to,
|
||||
# works around https://github.com/conda/conda-package-handling/pull/34
|
||||
$SUDO -H -u jenkins env -u SUDO_UID -u SUDO_GID -u SUDO_COMMAND -u SUDO_USER env "PATH=$PATH" "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" $*
|
||||
$SUDO -E -H -u jenkins env -u SUDO_UID -u SUDO_GID -u SUDO_COMMAND -u SUDO_USER env "PATH=$PATH" "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" $*
|
||||
}
|
||||
|
||||
conda_install() {
|
||||
@ -30,3 +30,7 @@ conda_run() {
|
||||
pip_install() {
|
||||
as_jenkins conda run -n py_$ANACONDA_PYTHON_VERSION pip install --progress-bar off $*
|
||||
}
|
||||
|
||||
get_pinned_commit() {
|
||||
cat "${1}".txt
|
||||
}
|
||||
|
54
.ci/docker/common/install_triton.sh
Executable file
54
.ci/docker/common/install_triton.sh
Executable file
@ -0,0 +1,54 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
|
||||
|
||||
get_conda_version() {
|
||||
as_jenkins conda list -n py_$ANACONDA_PYTHON_VERSION | grep -w $* | head -n 1 | awk '{print $2}'
|
||||
}
|
||||
|
||||
conda_reinstall() {
|
||||
as_jenkins conda install -q -n py_$ANACONDA_PYTHON_VERSION -y --force-reinstall $*
|
||||
}
|
||||
|
||||
# The logic here is copied from .ci/pytorch/common_utils.sh
|
||||
TRITON_PINNED_COMMIT=$(get_pinned_commit triton)
|
||||
|
||||
apt update
|
||||
apt-get install -y gpg-agent
|
||||
|
||||
if [ -n "${CONDA_CMAKE}" ]; then
|
||||
# Keep the current cmake and numpy version here, so we can reinstall them later
|
||||
CMAKE_VERSION=$(get_conda_version cmake)
|
||||
NUMPY_VERSION=$(get_conda_version numpy)
|
||||
fi
|
||||
|
||||
if [ -n "${GCC_VERSION}" ] && [[ "${GCC_VERSION}" == "7" ]]; then
|
||||
# Triton needs at least gcc-9 to build
|
||||
apt-get install -y g++-9
|
||||
|
||||
CXX=g++-9 pip_install "git+https://github.com/openai/triton@${TRITON_PINNED_COMMIT}#subdirectory=python"
|
||||
elif [ -n "${CLANG_VERSION}" ]; then
|
||||
# Triton needs <filesystem> which surprisingly is not available with clang-9 toolchain
|
||||
add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
apt-get install -y g++-9
|
||||
|
||||
CXX=g++-9 pip_install "git+https://github.com/openai/triton@${TRITON_PINNED_COMMIT}#subdirectory=python"
|
||||
else
|
||||
pip_install "git+https://github.com/openai/triton@${TRITON_PINNED_COMMIT}#subdirectory=python"
|
||||
fi
|
||||
|
||||
if [ -n "${CONDA_CMAKE}" ]; then
|
||||
# TODO: This is to make sure that the same cmake and numpy version from install conda
|
||||
# script is used. Without this step, the newer cmake version (3.25.2) downloaded by
|
||||
# triton build step via pip will fail to detect conda MKL. Once that issue is fixed,
|
||||
# this can be removed.
|
||||
#
|
||||
# The correct numpy version also needs to be set here because conda claims that it
|
||||
# causes inconsistent environment. Without this, conda will attempt to install the
|
||||
# latest numpy version, which fails ASAN tests with the following import error: Numba
|
||||
# needs NumPy 1.20 or less.
|
||||
conda_reinstall cmake="${CMAKE_VERSION}"
|
||||
conda_reinstall numpy="${NUMPY_VERSION}"
|
||||
fi
|
@ -258,3 +258,8 @@ ghstack==0.7.1
|
||||
#Description: ghstack tool
|
||||
#Pinned versions: 0.7.1
|
||||
#test that import:
|
||||
|
||||
jinja2==3.1.2
|
||||
#Description: jinja2 template engine
|
||||
#Pinned versions: 3.1.2
|
||||
#test that import:
|
||||
|
@ -85,6 +85,15 @@ COPY ./common/install_cmake.sh install_cmake.sh
|
||||
RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
|
||||
RUN rm install_cmake.sh
|
||||
|
||||
ARG TRITON
|
||||
# Install triton, this needs to be done before sccache because the latter will
|
||||
# try to reach out to S3, which docker build runners don't have access
|
||||
COPY ./common/install_triton.sh install_triton.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ci_commit_pins/triton.txt triton.txt
|
||||
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
|
||||
RUN rm install_triton.sh common_utils.sh triton.txt
|
||||
|
||||
# Install ccache/sccache (do this last, so we get priority in PATH)
|
||||
COPY ./common/install_cache.sh install_cache.sh
|
||||
ENV PATH /opt/cache/bin:$PATH
|
||||
|
@ -134,6 +134,15 @@ ENV OPENSSL_ROOT_DIR /opt/openssl
|
||||
ENV OPENSSL_DIR /opt/openssl
|
||||
RUN rm install_openssl.sh
|
||||
|
||||
ARG TRITON
|
||||
# Install triton, this needs to be done before sccache because the latter will
|
||||
# try to reach out to S3, which docker build runners don't have access
|
||||
COPY ./common/install_triton.sh install_triton.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ci_commit_pins/triton.txt triton.txt
|
||||
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
|
||||
RUN rm install_triton.sh common_utils.sh triton.txt
|
||||
|
||||
# Install ccache/sccache (do this last, so we get priority in PATH)
|
||||
COPY ./common/install_cache.sh install_cache.sh
|
||||
ENV PATH /opt/cache/bin:$PATH
|
||||
|
1
.github/ci_commit_pins/triton.txt
vendored
1
.github/ci_commit_pins/triton.txt
vendored
@ -1 +0,0 @@
|
||||
b8b470bc597c1c5bd03682c09fe3e6b7c53787fd
|
1
.github/ci_commit_pins/triton.txt
vendored
Symbolic link
1
.github/ci_commit_pins/triton.txt
vendored
Symbolic link
@ -0,0 +1 @@
|
||||
../../.ci/docker/ci_commit_pins/triton.txt
|
2
.github/workflows/build-triton-wheel.yml
vendored
2
.github/workflows/build-triton-wheel.yml
vendored
@ -9,11 +9,13 @@ on:
|
||||
- .github/workflows/build-triton-wheel.yml
|
||||
- .github/scripts/build_triton_wheel.py
|
||||
- .github/ci_commit_pins/triton.txt
|
||||
- .ci/docker/ci_commit_pins/triton.txt
|
||||
pull_request:
|
||||
paths:
|
||||
- .github/workflows/build-triton-wheel.yml
|
||||
- .github/scripts/build_triton_wheel.py
|
||||
- .github/ci_commit_pins/triton.txt
|
||||
- .ci/docker/ci_commit_pins/triton.txt
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
|
||||
|
Reference in New Issue
Block a user