Compare commits

..

1 Commits

Author SHA1 Message Date
1a8934af84 [dynamo] Graph break on random_ op
Fixes https://github.com/pytorch/pytorch/issues/121621

ghstack-source-id: 098b44305ae2aaab334e6973f6e94f937e61f9a0
Pull Request resolved: https://github.com/pytorch/pytorch/pull/130222
2024-07-07 15:29:48 -07:00
2691 changed files with 41076 additions and 75049 deletions

View File

@ -1,4 +1,4 @@
# Docker images for GitHub CI and CD
# Docker images for GitHub CI
This directory contains everything needed to build the Docker images
that are used in our CI.
@ -12,7 +12,7 @@ each image as the `BUILD_ENVIRONMENT` environment variable.
See `build.sh` for valid build environments (it's the giant switch).
## Docker CI builds
## Contents
* `build.sh` -- dispatch script to launch all builds
* `common` -- scripts used to execute individual Docker build stages
@ -21,12 +21,6 @@ See `build.sh` for valid build environments (it's the giant switch).
* `ubuntu-rocm` -- Dockerfile for Ubuntu image with ROCm support
* `ubuntu-xpu` -- Dockerfile for Ubuntu image with XPU support
### Docker CD builds
* `conda` - Dockerfile and build.sh to build Docker images used in nightly conda builds
* `manywheel` - Dockerfile and build.sh to build Docker images used in nightly manywheel builds
* `libtorch` - Dockerfile and build.sh to build Docker images used in nightly libtorch builds
## Usage
```bash

View File

@ -407,22 +407,6 @@ case "$image" in
# from pytorch/llvm:9.0.1 is x86 specific
SKIP_LLVM_SRC_BUILD_INSTALL=yes
;;
pytorch-linux-jammy-aarch64-py3.10-gcc11-inductor-benchmarks)
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=11
ACL=yes
PROTOBUF=yes
DB=yes
VISION=yes
CONDA_CMAKE=yes
# snadampal: skipping sccache due to the following issue
# https://github.com/pytorch/pytorch/issues/121559
SKIP_SCCACHE_INSTALL=yes
# snadampal: skipping llvm src build install because the current version
# from pytorch/llvm:9.0.1 is x86 specific
SKIP_LLVM_SRC_BUILD_INSTALL=yes
INDUCTOR_BENCHMARKS=yes
;;
*)
# Catch-all for builds that are not hardcoded.
PROTOBUF=yes

View File

@ -1 +1 @@
48da61aa34b73ea8e2ee815a6a79eea817e361db
c572f9e509b5ec5d56f4d218271e36269bba244f

View File

@ -1,5 +0,0 @@
0.6b
manylinux_2_17
rocm6.1
04b5df8c8123f90cba3ede7e971e6fbc6040d506
77c29fa3f3b614e187d7213d745e989a92708cee2bc6020419ab49019af399d1

View File

@ -85,7 +85,7 @@ fi
else
CONDA_COMMON_DEPS="astunparse pyyaml mkl=2021.4.0 mkl-include=2021.4.0 setuptools"
if [ "$ANACONDA_PYTHON_VERSION" = "3.11" ] || [ "$ANACONDA_PYTHON_VERSION" = "3.12" ] || [ "$ANACONDA_PYTHON_VERSION" = "3.13" ]; then
if [ "$ANACONDA_PYTHON_VERSION" = "3.11" ] || [ "$ANACONDA_PYTHON_VERSION" = "3.12" ]; then
conda_install numpy=1.26.0 ${CONDA_COMMON_DEPS}
else
conda_install numpy=1.21.2 ${CONDA_COMMON_DEPS}

View File

@ -1,20 +0,0 @@
#!/bin/bash
# Script used only in CD pipeline
set -ex
# Anaconda
# Latest anaconda is using openssl-3 which is incompatible with all currently published versions of git
# Which are using openssl-1.1.1, see https://anaconda.org/anaconda/git/files?version=2.40.1 for example
MINICONDA_URL=https://repo.anaconda.com/miniconda/Miniconda3-py311_23.5.2-0-Linux-x86_64.sh
wget -q $MINICONDA_URL
# NB: Manually invoke bash per https://github.com/conda/conda/issues/10431
bash $(basename "$MINICONDA_URL") -b -p /opt/conda
rm $(basename "$MINICONDA_URL")
export PATH=/opt/conda/bin:$PATH
# See https://github.com/pytorch/builder/issues/1473
# Pin conda to 23.5.2 as it's the last one compatible with openssl-1.1.1
conda install -y conda=23.5.2 conda-build anaconda-client git ninja
# The cmake version here needs to match with the minimum version of cmake
# supported by PyTorch (3.18). There is only 3.18.2 on anaconda
/opt/conda/bin/pip3 install cmake==3.18.2
conda remove -y --force patchelf

View File

@ -1,95 +0,0 @@
#!/bin/bash
# Script used only in CD pipeline
set -uex -o pipefail
PYTHON_DOWNLOAD_URL=https://www.python.org/ftp/python
PYTHON_DOWNLOAD_GITHUB_BRANCH=https://github.com/python/cpython/archive/refs/heads
GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py
# Python versions to be installed in /opt/$VERSION_NO
CPYTHON_VERSIONS=${CPYTHON_VERSIONS:-"3.8.1 3.9.0 3.10.1 3.11.0 3.12.0 3.13.0"}
function check_var {
if [ -z "$1" ]; then
echo "required variable not defined"
exit 1
fi
}
function do_cpython_build {
local py_ver=$1
local py_folder=$2
check_var $py_ver
check_var $py_folder
tar -xzf Python-$py_ver.tgz
pushd $py_folder
local prefix="/opt/_internal/cpython-${py_ver}"
mkdir -p ${prefix}/lib
if [[ -n $(which patchelf) ]]; then
local shared_flags="--enable-shared"
else
local shared_flags="--disable-shared"
fi
if [[ -z "${WITH_OPENSSL+x}" ]]; then
local openssl_flags=""
else
local openssl_flags="--with-openssl=${WITH_OPENSSL} --with-openssl-rpath=auto"
fi
# -Wformat added for https://bugs.python.org/issue17547 on Python 2.6
CFLAGS="-Wformat" ./configure --prefix=${prefix} ${openssl_flags} ${shared_flags} > /dev/null
make -j40 > /dev/null
make install > /dev/null
if [[ "${shared_flags}" == "--enable-shared" ]]; then
patchelf --set-rpath '$ORIGIN/../lib' ${prefix}/bin/python3
fi
popd
rm -rf $py_folder
# Some python's install as bin/python3. Make them available as
# bin/python.
if [ -e ${prefix}/bin/python3 ]; then
ln -s python3 ${prefix}/bin/python
fi
${prefix}/bin/python get-pip.py
if [ -e ${prefix}/bin/pip3 ] && [ ! -e ${prefix}/bin/pip ]; then
ln -s pip3 ${prefix}/bin/pip
fi
${prefix}/bin/pip install wheel==0.34.2
local abi_tag=$(${prefix}/bin/python -c "from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag; print('{0}{1}-{2}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag()))")
ln -s ${prefix} /opt/python/${abi_tag}
}
function build_cpython {
local py_ver=$1
check_var $py_ver
check_var $PYTHON_DOWNLOAD_URL
local py_ver_folder=$py_ver
if [ "$py_ver" = "3.13.0" ]; then
PY_VER_SHORT="3.13"
check_var $PYTHON_DOWNLOAD_GITHUB_BRANCH
wget $PYTHON_DOWNLOAD_GITHUB_BRANCH/$PY_VER_SHORT.tar.gz -O Python-$py_ver.tgz
do_cpython_build $py_ver cpython-$PY_VER_SHORT
else
wget -q $PYTHON_DOWNLOAD_URL/$py_ver_folder/Python-$py_ver.tgz
do_cpython_build $py_ver Python-$py_ver
fi
rm -f Python-$py_ver.tgz
}
function build_cpythons {
check_var $GET_PIP_URL
curl -sLO $GET_PIP_URL
for py_ver in $@; do
build_cpython $py_ver
done
rm -f get-pip.py
}
mkdir -p /opt/python
mkdir -p /opt/_internal
build_cpythons $CPYTHON_VERSIONS

View File

@ -1,239 +0,0 @@
#!/bin/bash
set -ex
NCCL_VERSION=v2.21.5-1
CUDNN_VERSION=9.1.0.70
function install_cusparselt_040 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && pushd tmp_cusparselt
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.4.0.7-archive.tar.xz
tar xf libcusparse_lt-linux-x86_64-0.4.0.7-archive.tar.xz
cp -a libcusparse_lt-linux-x86_64-0.4.0.7-archive/include/* /usr/local/cuda/include/
cp -a libcusparse_lt-linux-x86_64-0.4.0.7-archive/lib/* /usr/local/cuda/lib64/
popd
rm -rf tmp_cusparselt
}
function install_cusparselt_052 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && pushd tmp_cusparselt
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.5.2.1-archive.tar.xz
tar xf libcusparse_lt-linux-x86_64-0.5.2.1-archive.tar.xz
cp -a libcusparse_lt-linux-x86_64-0.5.2.1-archive/include/* /usr/local/cuda/include/
cp -a libcusparse_lt-linux-x86_64-0.5.2.1-archive/lib/* /usr/local/cuda/lib64/
popd
rm -rf tmp_cusparselt
}
function install_118 {
echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.4.0"
rm -rf /usr/local/cuda-11.8 /usr/local/cuda
# install CUDA 11.8.0 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
chmod +x cuda_11.8.0_520.61.05_linux.run
./cuda_11.8.0_520.61.05_linux.run --toolkit --silent
rm -f cuda_11.8.0_520.61.05_linux.run
rm -f /usr/local/cuda && ln -s /usr/local/cuda-11.8 /usr/local/cuda
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz
tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_cudnn
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
cd nccl && make -j src.build
cp -a build/include/* /usr/local/cuda/include/
cp -a build/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf nccl
install_cusparselt_040
ldconfig
}
function install_121 {
echo "Installing CUDA 12.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
rm -rf /usr/local/cuda-12.1 /usr/local/cuda
# install CUDA 12.1.0 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run
chmod +x cuda_12.1.1_530.30.02_linux.run
./cuda_12.1.1_530.30.02_linux.run --toolkit --silent
rm -f cuda_12.1.1_530.30.02_linux.run
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.1 /usr/local/cuda
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_cudnn
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
cd nccl && make -j src.build
cp -a build/include/* /usr/local/cuda/include/
cp -a build/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf nccl
install_cusparselt_052
ldconfig
}
function install_124 {
echo "Installing CUDA 12.4 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
rm -rf /usr/local/cuda-12.4 /usr/local/cuda
# install CUDA 12.4.0 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run
chmod +x cuda_12.4.0_550.54.14_linux.run
./cuda_12.4.0_550.54.14_linux.run --toolkit --silent
rm -f cuda_12.4.0_550.54.14_linux.run
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.4 /usr/local/cuda
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_cudnn
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
cd nccl && make -j src.build
cp -a build/include/* /usr/local/cuda/include/
cp -a build/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf nccl
install_cusparselt_052
ldconfig
}
function prune_118 {
echo "Pruning CUDA 11.8 and cuDNN"
#####################################################################################
# CUDA 11.8 prune static libs
#####################################################################################
export NVPRUNE="/usr/local/cuda-11.8/bin/nvprune"
export CUDA_LIB_DIR="/usr/local/cuda-11.8/lib64"
export GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
export GENCODE_CUDNN="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
if [[ -n "$OVERRIDE_GENCODE" ]]; then
export GENCODE=$OVERRIDE_GENCODE
fi
# all CUDA libs except CuDNN and CuBLAS (cudnn and cublas need arch 3.7 included)
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
| xargs -I {} bash -c \
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
# prune CuDNN and CuBLAS
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
#####################################################################################
# CUDA 11.8 prune visual tools
#####################################################################################
export CUDA_BASE="/usr/local/cuda-11.8/"
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.3.0 $CUDA_BASE/nsight-systems-2022.4.2/
}
function prune_121 {
echo "Pruning CUDA 12.1"
#####################################################################################
# CUDA 12.1 prune static libs
#####################################################################################
export NVPRUNE="/usr/local/cuda-12.1/bin/nvprune"
export CUDA_LIB_DIR="/usr/local/cuda-12.1/lib64"
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
if [[ -n "$OVERRIDE_GENCODE" ]]; then
export GENCODE=$OVERRIDE_GENCODE
fi
# all CUDA libs except CuDNN and CuBLAS
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
| xargs -I {} bash -c \
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
# prune CuDNN and CuBLAS
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
#####################################################################################
# CUDA 12.1 prune visual tools
#####################################################################################
export CUDA_BASE="/usr/local/cuda-12.1/"
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2023.1.0 $CUDA_BASE/nsight-systems-2023.1.2/
}
function prune_124 {
echo "Pruning CUDA 12.4"
#####################################################################################
# CUDA 12.4 prune static libs
#####################################################################################
export NVPRUNE="/usr/local/cuda-12.4/bin/nvprune"
export CUDA_LIB_DIR="/usr/local/cuda-12.4/lib64"
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
if [[ -n "$OVERRIDE_GENCODE" ]]; then
export GENCODE=$OVERRIDE_GENCODE
fi
if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then
export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN
fi
# all CUDA libs except CuDNN and CuBLAS
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
| xargs -I {} bash -c \
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
# prune CuDNN and CuBLAS
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
#####################################################################################
# CUDA 12.1 prune visual tools
#####################################################################################
export CUDA_BASE="/usr/local/cuda-12.4/"
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/
}
# idiomatic parameter and option handling in sh
while test $# -gt 0
do
case "$1" in
11.8) install_118; prune_118
;;
12.1) install_121; prune_121
;;
12.4) install_124; prune_124
;;
*) echo "bad argument $1"; exit 1
;;
esac
shift
done

View File

@ -1,93 +0,0 @@
#!/bin/bash
# Script used only in CD pipeline
set -ex
NCCL_VERSION=v2.21.5-1
function install_cusparselt_052 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && pushd tmp_cusparselt
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.5.2.1-archive.tar.xz
tar xf libcusparse_lt-linux-sbsa-0.5.2.1-archive.tar.xz
cp -a libcusparse_lt-linux-sbsa-0.5.2.1-archive/include/* /usr/local/cuda/include/
cp -a libcusparse_lt-linux-sbsa-0.5.2.1-archive/lib/* /usr/local/cuda/lib64/
popd
rm -rf tmp_cusparselt
}
function install_124 {
echo "Installing CUDA 12.4 and cuDNN 9.1 and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
rm -rf /usr/local/cuda-12.4 /usr/local/cuda
# install CUDA 12.4.0 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux_sbsa.run
chmod +x cuda_12.4.0_550.54.14_linux_sbsa.run
./cuda_12.4.0_550.54.14_linux_sbsa.run --toolkit --silent
rm -f cuda_12.4.0_550.54.14_linux_sbsa.run
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.4 /usr/local/cuda
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-9.1.0.70_cuda12-archive.tar.xz -O cudnn-linux-sbsa-9.1.0.70_cuda12-archive.tar.xz
tar xf cudnn-linux-sbsa-9.1.0.70_cuda12-archive.tar.xz
cp -a cudnn-linux-sbsa-9.1.0.70_cuda12-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-sbsa-9.1.0.70_cuda12-archive/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_cudnn
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
git clone -b ${NCCL_VERSION} --depth 1 https://github.com/NVIDIA/nccl.git
cd nccl && make -j src.build
cp -a build/include/* /usr/local/cuda/include/
cp -a build/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf nccl
install_cusparselt_052
ldconfig
}
function prune_124 {
echo "Pruning CUDA 12.4"
#####################################################################################
# CUDA 12.4 prune static libs
#####################################################################################
export NVPRUNE="/usr/local/cuda-12.4/bin/nvprune"
export CUDA_LIB_DIR="/usr/local/cuda-12.4/lib64"
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
if [[ -n "$OVERRIDE_GENCODE" ]]; then
export GENCODE=$OVERRIDE_GENCODE
fi
# all CUDA libs except CuDNN and CuBLAS
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
| xargs -I {} bash -c \
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
# prune CuDNN and CuBLAS
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
#####################################################################################
# CUDA 12.1 prune visual tools
#####################################################################################
export CUDA_BASE="/usr/local/cuda-12.4/"
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/
}
# idiomatic parameter and option handling in sh
while test $# -gt 0
do
case "$1" in
12.4) install_124; prune_124
;;
*) echo "bad argument $1"; exit 1
;;
esac
shift
done

View File

@ -1,23 +0,0 @@
#!/bin/bash
# Script used only in CD pipeline
set -ex
LIBPNG_VERSION=1.6.37
mkdir -p libpng
pushd libpng
wget http://download.sourceforge.net/libpng/libpng-$LIBPNG_VERSION.tar.gz
tar -xvzf libpng-$LIBPNG_VERSION.tar.gz
pushd libpng-$LIBPNG_VERSION
./configure
make
make install
popd
popd
rm -rf libpng

View File

@ -1,29 +0,0 @@
#!/usr/bin/env bash
# Script used only in CD pipeline
set -eou pipefail
MAGMA_VERSION="2.5.2"
function do_install() {
cuda_version=$1
cuda_version_nodot=${1/./}
MAGMA_VERSION="2.6.1"
magma_archive="magma-cuda${cuda_version_nodot}-${MAGMA_VERSION}-1.tar.bz2"
cuda_dir="/usr/local/cuda-${cuda_version}"
(
set -x
tmp_dir=$(mktemp -d)
pushd ${tmp_dir}
curl -OLs https://anaconda.org/pytorch/magma-cuda${cuda_version_nodot}/${MAGMA_VERSION}/download/linux-64/${magma_archive}
tar -xvf "${magma_archive}"
mkdir -p "${cuda_dir}/magma"
mv include "${cuda_dir}/magma/include"
mv lib "${cuda_dir}/magma/lib"
popd
)
}
do_install $1

View File

@ -1,134 +0,0 @@
#!/bin/bash
# Script used only in CD pipeline
set -ex
ROCM_VERSION=$1
if [[ -z $ROCM_VERSION ]]; then
echo "missing ROCM_VERSION"
exit 1;
fi
# To make version comparison easier, create an integer representation.
save_IFS="$IFS"
IFS=. ROCM_VERSION_ARRAY=(${ROCM_VERSION})
IFS="$save_IFS"
if [[ ${#ROCM_VERSION_ARRAY[@]} == 2 ]]; then
ROCM_VERSION_MAJOR=${ROCM_VERSION_ARRAY[0]}
ROCM_VERSION_MINOR=${ROCM_VERSION_ARRAY[1]}
ROCM_VERSION_PATCH=0
elif [[ ${#ROCM_VERSION_ARRAY[@]} == 3 ]]; then
ROCM_VERSION_MAJOR=${ROCM_VERSION_ARRAY[0]}
ROCM_VERSION_MINOR=${ROCM_VERSION_ARRAY[1]}
ROCM_VERSION_PATCH=${ROCM_VERSION_ARRAY[2]}
else
echo "Unhandled ROCM_VERSION ${ROCM_VERSION}"
exit 1
fi
ROCM_INT=$(($ROCM_VERSION_MAJOR * 10000 + $ROCM_VERSION_MINOR * 100 + $ROCM_VERSION_PATCH))
# Install custom MIOpen + COMgr for ROCm >= 4.0.1
if [[ $ROCM_INT -lt 40001 ]]; then
echo "ROCm version < 4.0.1; will not install custom MIOpen"
exit 0
fi
# Function to retry functions that sometimes timeout or have flaky failures
retry () {
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
}
# Build custom MIOpen to use comgr for offline compilation.
## Need a sanitized ROCM_VERSION without patchlevel; patchlevel version 0 must be added to paths.
ROCM_DOTS=$(echo ${ROCM_VERSION} | tr -d -c '.' | wc -c)
if [[ ${ROCM_DOTS} == 1 ]]; then
ROCM_VERSION_NOPATCH="${ROCM_VERSION}"
ROCM_INSTALL_PATH="/opt/rocm-${ROCM_VERSION}.0"
else
ROCM_VERSION_NOPATCH="${ROCM_VERSION%.*}"
ROCM_INSTALL_PATH="/opt/rocm-${ROCM_VERSION}"
fi
# MIOPEN_USE_HIP_KERNELS is a Workaround for COMgr issues
MIOPEN_CMAKE_COMMON_FLAGS="
-DMIOPEN_USE_COMGR=ON
-DMIOPEN_BUILD_DRIVER=OFF
"
# Pull MIOpen repo and set DMIOPEN_EMBED_DB based on ROCm version
if [[ $ROCM_INT -ge 60100 ]] && [[ $ROCM_INT -lt 60200 ]]; then
echo "ROCm 6.1 MIOpen does not need any patches, do not build from source"
exit 0
elif [[ $ROCM_INT -ge 60000 ]] && [[ $ROCM_INT -lt 60100 ]]; then
echo "ROCm 6.0 MIOpen does not need any patches, do not build from source"
exit 0
elif [[ $ROCM_INT -ge 50700 ]] && [[ $ROCM_INT -lt 60000 ]]; then
echo "ROCm 5.7 MIOpen does not need any patches, do not build from source"
exit 0
elif [[ $ROCM_INT -ge 50600 ]] && [[ $ROCM_INT -lt 50700 ]]; then
MIOPEN_BRANCH="release/rocm-rel-5.6-staging"
elif [[ $ROCM_INT -ge 50500 ]] && [[ $ROCM_INT -lt 50600 ]]; then
MIOPEN_BRANCH="release/rocm-rel-5.5-gfx11"
elif [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off"
MIOPEN_BRANCH="release/rocm-rel-5.4-staging"
elif [[ $ROCM_INT -ge 50300 ]] && [[ $ROCM_INT -lt 50400 ]]; then
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off"
MIOPEN_BRANCH="release/rocm-rel-5.3-staging"
elif [[ $ROCM_INT -ge 50200 ]] && [[ $ROCM_INT -lt 50300 ]]; then
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off"
MIOPEN_BRANCH="release/rocm-rel-5.2-staging"
elif [[ $ROCM_INT -ge 50100 ]] && [[ $ROCM_INT -lt 50200 ]]; then
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36"
MIOPEN_BRANCH="release/rocm-rel-5.1-staging"
elif [[ $ROCM_INT -ge 50000 ]] && [[ $ROCM_INT -lt 50100 ]]; then
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36"
MIOPEN_BRANCH="release/rocm-rel-5.0-staging"
else
echo "Unhandled ROCM_VERSION ${ROCM_VERSION}"
exit 1
fi
yum remove -y miopen-hip
git clone https://github.com/ROCm/MIOpen -b ${MIOPEN_BRANCH}
pushd MIOpen
# remove .git to save disk space since CI runner was running out
rm -rf .git
# Don't build MLIR to save docker build time
# since we are disabling MLIR backend for MIOpen anyway
if [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then
sed -i '/rocMLIR/d' requirements.txt
elif [[ $ROCM_INT -ge 50200 ]] && [[ $ROCM_INT -lt 50400 ]]; then
sed -i '/llvm-project-mlir/d' requirements.txt
fi
## MIOpen minimum requirements
cmake -P install_deps.cmake --minimum
# clean up since CI runner was running out of disk space
rm -rf /tmp/*
yum clean all
rm -rf /var/cache/yum
rm -rf /var/lib/yum/yumdb
rm -rf /var/lib/yum/history
## Build MIOpen
mkdir -p build
cd build
PKG_CONFIG_PATH=/usr/local/lib/pkgconfig CXX=${ROCM_INSTALL_PATH}/llvm/bin/clang++ cmake .. \
${MIOPEN_CMAKE_COMMON_FLAGS} \
${MIOPEN_CMAKE_DB_FLAGS} \
-DCMAKE_PREFIX_PATH="${ROCM_INSTALL_PATH}/hip;${ROCM_INSTALL_PATH}"
make MIOpen -j $(nproc)
# Build MIOpen package
make -j $(nproc) package
# clean up since CI runner was running out of disk space
rm -rf /usr/local/cget
yum install -y miopen-*.rpm
popd
rm -rf MIOpen

View File

@ -1,16 +0,0 @@
#!/bin/bash
set -ex
# MKL
MKL_VERSION=2024.2.0
MKLROOT=/opt/intel
mkdir -p ${MKLROOT}
pushd /tmp
python3 -mpip install wheel
python3 -mpip download -d . mkl-static==${MKL_VERSION}
python3 -m wheel unpack mkl_static-${MKL_VERSION}-py2.py3-none-manylinux1_x86_64.whl
python3 -m wheel unpack mkl_include-${MKL_VERSION}-py2.py3-none-manylinux1_x86_64.whl
mv mkl_static-${MKL_VERSION}/mkl_static-${MKL_VERSION}.data/data/lib ${MKLROOT}
mv mkl_include-${MKL_VERSION}/mkl_include-${MKL_VERSION}.data/data/include ${MKLROOT}

View File

@ -1,13 +0,0 @@
#!/bin/bash
# Script used only in CD pipeline
set -ex
mkdir -p /usr/local/mnist/
cd /usr/local/mnist
for img in train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz; do
wget -q https://ossci-datasets.s3.amazonaws.com/mnist/$img
gzip -d $img
done

View File

@ -1,22 +0,0 @@
#!/bin/bash
# Script used only in CD pipeline
set -ex
cd /
git clone https://github.com/OpenMathLib/OpenBLAS.git -b v0.3.25 --depth 1 --shallow-submodules
OPENBLAS_BUILD_FLAGS="
NUM_THREADS=128
USE_OPENMP=1
NO_SHARED=0
DYNAMIC_ARCH=1
TARGET=ARMV8
CFLAGS=-O3
"
OPENBLAS_CHECKOUT_DIR="OpenBLAS"
make -j8 ${OPENBLAS_BUILD_FLAGS} -C ${OPENBLAS_CHECKOUT_DIR}
make -j8 ${OPENBLAS_BUILD_FLAGS} install -C ${OPENBLAS_CHECKOUT_DIR}

View File

@ -1,16 +0,0 @@
#!/bin/bash
# Script used only in CD pipeline
set -ex
# Pin the version to latest release 0.17.2, building newer commit starts
# to fail on the current image
git clone -b 0.17.2 --single-branch https://github.com/NixOS/patchelf
cd patchelf
sed -i 's/serial/parallel/g' configure.ac
./bootstrap.sh
./configure
make
make install
cd ..
rm -rf patchelf

View File

@ -1,150 +0,0 @@
#!/bin/bash
# Script used only in CD pipeline
###########################
### prereqs
###########################
# Install Python packages depending on the base OS
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$ID" in
ubuntu)
apt-get update -y
apt-get install -y libpciaccess-dev pkg-config
apt-get clean
;;
centos)
yum install -y libpciaccess-devel pkgconfig
;;
*)
echo "Unable to determine OS..."
exit 1
;;
esac
python3 -m pip install meson ninja
###########################
### clone repo
###########################
GIT_SSL_NO_VERIFY=true git clone https://gitlab.freedesktop.org/mesa/drm.git
pushd drm
###########################
### patch
###########################
patch -p1 <<'EOF'
diff --git a/amdgpu/amdgpu_asic_id.c b/amdgpu/amdgpu_asic_id.c
index a5007ffc..13fa07fc 100644
--- a/amdgpu/amdgpu_asic_id.c
+++ b/amdgpu/amdgpu_asic_id.c
@@ -22,6 +22,13 @@
*
*/
+#define _XOPEN_SOURCE 700
+#define _LARGEFILE64_SOURCE
+#define _FILE_OFFSET_BITS 64
+#include <ftw.h>
+#include <link.h>
+#include <limits.h>
+
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
@@ -34,6 +41,19 @@
#include "amdgpu_drm.h"
#include "amdgpu_internal.h"
+static char *amdgpuids_path = NULL;
+static const char* amdgpuids_path_msg = NULL;
+
+static int check_for_location_of_amdgpuids(const char *filepath, const struct stat *info, const int typeflag, struct FTW *pathinfo)
+{
+ if (typeflag == FTW_F && strstr(filepath, "amdgpu.ids")) {
+ amdgpuids_path = strdup(filepath);
+ return 1;
+ }
+
+ return 0;
+}
+
static int parse_one_line(struct amdgpu_device *dev, const char *line)
{
char *buf, *saveptr;
@@ -113,10 +133,46 @@ void amdgpu_parse_asic_ids(struct amdgpu_device *dev)
int line_num = 1;
int r = 0;
+ // attempt to find typical location for amdgpu.ids file
fp = fopen(AMDGPU_ASIC_ID_TABLE, "r");
+
+ // if it doesn't exist, search
+ if (!fp) {
+
+ char self_path[ PATH_MAX ];
+ ssize_t count;
+ ssize_t i;
+
+ count = readlink( "/proc/self/exe", self_path, PATH_MAX );
+ if (count > 0) {
+ self_path[count] = '\0';
+
+ // remove '/bin/python' from self_path
+ for (i=count; i>0; --i) {
+ if (self_path[i] == '/') break;
+ self_path[i] = '\0';
+ }
+ self_path[i] = '\0';
+ for (; i>0; --i) {
+ if (self_path[i] == '/') break;
+ self_path[i] = '\0';
+ }
+ self_path[i] = '\0';
+
+ if (1 == nftw(self_path, check_for_location_of_amdgpuids, 5, FTW_PHYS)) {
+ fp = fopen(amdgpuids_path, "r");
+ amdgpuids_path_msg = amdgpuids_path;
+ }
+ }
+
+ }
+ else {
+ amdgpuids_path_msg = AMDGPU_ASIC_ID_TABLE;
+ }
+
+ // both hard-coded location and search have failed
if (!fp) {
- fprintf(stderr, "%s: %s\n", AMDGPU_ASIC_ID_TABLE,
- strerror(errno));
+ fprintf(stderr, "amdgpu.ids: No such file or directory\n");
return;
}
@@ -132,7 +188,7 @@ void amdgpu_parse_asic_ids(struct amdgpu_device *dev)
continue;
}
- drmMsg("%s version: %s\n", AMDGPU_ASIC_ID_TABLE, line);
+ drmMsg("%s version: %s\n", amdgpuids_path_msg, line);
break;
}
@@ -150,7 +206,7 @@ void amdgpu_parse_asic_ids(struct amdgpu_device *dev)
if (r == -EINVAL) {
fprintf(stderr, "Invalid format: %s: line %d: %s\n",
- AMDGPU_ASIC_ID_TABLE, line_num, line);
+ amdgpuids_path_msg, line_num, line);
} else if (r && r != -EAGAIN) {
fprintf(stderr, "%s: Cannot parse ASIC IDs: %s\n",
__func__, strerror(-r));
EOF
###########################
### build
###########################
meson builddir --prefix=/opt/amdgpu
pushd builddir
ninja install
popd
popd

View File

@ -1,11 +1,7 @@
#!/bin/bash
# Script used in CI and CD pipeline
set -ex
MKLROOT=${MKLROOT:-/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION}
# "install" hipMAGMA into /opt/rocm/magma by copying after build
git clone https://bitbucket.org/icl/magma.git
pushd magma
@ -15,10 +11,7 @@ git checkout a1625ff4d9bc362906bd01f805dbbe12612953f6
cp make.inc-examples/make.inc.hip-gcc-mkl make.inc
echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc
if [[ -f "${MKLROOT}/lib/libmkl_core.a" ]]; then
echo 'LIB = -Wl,--start-group -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -Wl,--end-group -lpthread -lstdc++ -lm -lgomp -lhipblas -lhipsparse' >> make.inc
fi
echo 'LIB += -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib -Wl,--rpath,$(MKLROOT)/lib -Wl,--rpath,/opt/rocm/magma/lib -ldl' >> make.inc
echo 'LIB += -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib -Wl,--rpath,$(MKLROOT)/lib -Wl,--rpath,/opt/rocm/magma/lib' >> make.inc
echo 'DEVCCFLAGS += --gpu-max-threads-per-block=256' >> make.inc
export PATH="${PATH}:/opt/rocm/bin"
if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then
@ -32,7 +25,7 @@ done
# hipcc with openmp flag may cause isnan() on __device__ not to be found; depending on context, compiler may attempt to match with host definition
sed -i 's/^FOPENMP/#FOPENMP/g' make.inc
make -f make.gen.hipMAGMA -j $(nproc)
LANG=C.UTF-8 make lib/libmagma.so -j $(nproc) MKLROOT="${MKLROOT}"
make testing/testing_dgemm -j $(nproc) MKLROOT="${MKLROOT}"
LANG=C.UTF-8 make lib/libmagma.so -j $(nproc) MKLROOT=/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION
make testing/testing_dgemm -j $(nproc) MKLROOT=/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION
popd
mv magma /opt/rocm

View File

@ -1,6 +1,6 @@
#!/bin/bash
set -xe
# Script used in CI and CD pipeline
# Intel® software for general purpose GPU capabilities.
# Refer to https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpus.html
@ -8,23 +8,19 @@ set -xe
# Users should update to the latest version as it becomes available
function install_ubuntu() {
. /etc/os-release
if [[ ! " jammy " =~ " ${VERSION_CODENAME} " ]]; then
echo "Ubuntu version ${VERSION_CODENAME} not supported"
exit
fi
apt-get update -y
apt-get install -y gpg-agent wget
# To add the online network package repository for the GPU Driver LTS releases
# Set up the repository. To do this, download the key to the system keyring
wget -qO - https://repositories.intel.com/gpu/intel-graphics.key \
| gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
| gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg
wget -qO - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
| gpg --dearmor --output /usr/share/keyrings/intel-for-pytorch-gpu-dev-keyring.gpg
# Add the signed entry to APT sources and configure the APT client to use the Intel repository
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] \
https://repositories.intel.com/gpu/ubuntu ${VERSION_CODENAME}/lts/2350 unified" \
| tee /etc/apt/sources.list.d/intel-gpu-${VERSION_CODENAME}.list
# To add the online network network package repository for the Intel Support Packages
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
| gpg --dearmor > /usr/share/keyrings/intel-for-pytorch-gpu-dev-keyring.gpg
https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" \
| tee /etc/apt/sources.list.d/intel-gpu-jammy.list
echo "deb [signed-by=/usr/share/keyrings/intel-for-pytorch-gpu-dev-keyring.gpg] \
https://apt.repos.intel.com/intel-for-pytorch-gpu-dev all main" \
| tee /etc/apt/sources.list.d/intel-for-pytorch-gpu-dev.list
@ -101,86 +97,6 @@ EOF
rm -rf /var/lib/yum/history
}
function install_rhel() {
. /etc/os-release
if [[ "${ID}" == "rhel" ]]; then
if [[ ! " 8.6 8.8 8.9 9.0 9.2 9.3 " =~ " ${VERSION_ID} " ]]; then
echo "RHEL version ${VERSION_ID} not supported"
exit
fi
elif [[ "${ID}" == "almalinux" ]]; then
# Workaround for almalinux8 which used by quay.io/pypa/manylinux_2_28_x86_64
VERSION_ID="8.6"
fi
dnf install -y 'dnf-command(config-manager)'
# To add the online network package repository for the GPU Driver LTS releases
dnf config-manager --add-repo \
https://repositories.intel.com/gpu/rhel/${VERSION_ID}/lts/2350/unified/intel-gpu-${VERSION_ID}.repo
# To add the online network network package repository for the Intel Support Packages
tee > /etc/yum.repos.d/intel-for-pytorch-gpu-dev.repo << EOF
[intel-for-pytorch-gpu-dev]
name=Intel for Pytorch GPU dev repository
baseurl=https://yum.repos.intel.com/intel-for-pytorch-gpu-dev
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
EOF
# The xpu-smi packages
dnf install -y xpu-smi
# Compute and Media Runtimes
dnf install -y \
intel-opencl intel-media intel-mediasdk libmfxgen1 libvpl2\
level-zero intel-level-zero-gpu mesa-dri-drivers mesa-vulkan-drivers \
mesa-vdpau-drivers libdrm mesa-libEGL mesa-libgbm mesa-libGL \
mesa-libxatracker libvpl-tools intel-metrics-discovery \
intel-metrics-library intel-igc-core intel-igc-cm \
libva libva-utils intel-gmmlib libmetee intel-gsc intel-ocloc
# Development packages
dnf install -y --refresh \
intel-igc-opencl-devel level-zero-devel intel-gsc-devel libmetee-devel \
level-zero-devel
# Install Intel Support Packages
yum install -y intel-for-pytorch-gpu-dev intel-pti-dev
# Cleanup
dnf clean all
rm -rf /var/cache/yum
rm -rf /var/lib/yum/yumdb
rm -rf /var/lib/yum/history
}
function install_sles() {
. /etc/os-release
VERSION_SP=${VERSION_ID//./sp}
if [[ ! " 15sp4 15sp5 " =~ " ${VERSION_SP} " ]]; then
echo "SLES version ${VERSION_ID} not supported"
exit
fi
# To add the online network package repository for the GPU Driver LTS releases
zypper addrepo -f -r \
https://repositories.intel.com/gpu/sles/${VERSION_SP}/lts/2350/unified/intel-gpu-${VERSION_SP}.repo
rpm --import https://repositories.intel.com/gpu/intel-graphics.key
# To add the online network network package repository for the Intel Support Packages
zypper addrepo https://yum.repos.intel.com/intel-for-pytorch-gpu-dev intel-for-pytorch-gpu-dev
rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
# The xpu-smi packages
zypper install -y lsb-release flex bison xpu-smi
# Compute and Media Runtimes
zypper install -y intel-level-zero-gpu level-zero intel-gsc intel-opencl intel-ocloc \
intel-media-driver libigfxcmrt7 libvpl2 libvpl-tools libmfxgen1 libmfx1
# Development packages
zypper install -y libigdfcl-devel intel-igc-cm libigfxcmrt-devel level-zero-devel
# Install Intel Support Packages
zypper install -y intel-for-pytorch-gpu-dev intel-pti-dev
}
# The installation depends on the base OS
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
@ -191,12 +107,6 @@ case "$ID" in
centos)
install_centos
;;
rhel|almalinux)
install_rhel
;;
sles)
install_sles
;;
*)
echo "Unable to determine OS..."
exit 1

View File

@ -1,101 +0,0 @@
ARG CUDA_VERSION=10.2
ARG BASE_TARGET=cuda${CUDA_VERSION}
FROM centos:7 as base
ENV LC_ALL en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US.UTF-8
ARG DEVTOOLSET_VERSION=9
RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
RUN yum update -y
RUN yum install -y wget curl perl util-linux xz bzip2 git patch which unzip
# Just add everything as a safe.directory for git since these will be used in multiple places with git
RUN git config --global --add safe.directory '*'
RUN yum install -y yum-utils centos-release-scl
RUN yum-config-manager --enable rhel-server-rhscl-7-rpms
RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
RUN yum install -y devtoolset-${DEVTOOLSET_VERSION}-gcc devtoolset-${DEVTOOLSET_VERSION}-gcc-c++ devtoolset-${DEVTOOLSET_VERSION}-gcc-gfortran devtoolset-${DEVTOOLSET_VERSION}-binutils
# EPEL for cmake
RUN wget http://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm && \
rpm -ivh epel-release-latest-7.noarch.rpm && \
rm -f epel-release-latest-7.noarch.rpm
# cmake
RUN yum install -y cmake3 && \
ln -s /usr/bin/cmake3 /usr/bin/cmake
ENV PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
RUN yum install -y autoconf aclocal automake make sudo
RUN rm -rf /usr/local/cuda-*
FROM base as patchelf
# Install patchelf
ADD ./common/install_patchelf.sh install_patchelf.sh
RUN bash ./install_patchelf.sh && rm install_patchelf.sh && cp $(which patchelf) /patchelf
FROM base as openssl
# Install openssl
ADD ./common/install_openssl.sh install_openssl.sh
RUN bash ./install_openssl.sh && rm install_openssl.sh
FROM base as conda
# Install Anaconda
ADD ./common/install_conda_docker.sh install_conda.sh
RUN bash ./install_conda.sh && rm install_conda.sh
# Install CUDA
FROM base as cuda
ARG CUDA_VERSION=10.2
RUN rm -rf /usr/local/cuda-*
ADD ./common/install_cuda.sh install_cuda.sh
ENV CUDA_HOME=/usr/local/cuda-${CUDA_VERSION}
# Preserve CUDA_VERSION for the builds
ENV CUDA_VERSION=${CUDA_VERSION}
# Make things in our path by default
ENV PATH=/usr/local/cuda-${CUDA_VERSION}/bin:$PATH
FROM cuda as cuda11.8
RUN bash ./install_cuda.sh 11.8
ENV DESIRED_CUDA=11.8
FROM cuda as cuda12.1
RUN bash ./install_cuda.sh 12.1
ENV DESIRED_CUDA=12.1
FROM cuda as cuda12.4
RUN bash ./install_cuda.sh 12.4
ENV DESIRED_CUDA=12.4
# Install MNIST test data
FROM base as mnist
ADD ./common/install_mnist.sh install_mnist.sh
RUN bash ./install_mnist.sh
FROM base as all_cuda
COPY --from=cuda11.8 /usr/local/cuda-11.8 /usr/local/cuda-11.8
COPY --from=cuda12.1 /usr/local/cuda-12.1 /usr/local/cuda-12.1
COPY --from=cuda12.4 /usr/local/cuda-12.4 /usr/local/cuda-12.4
# Final step
FROM ${BASE_TARGET} as final
COPY --from=openssl /opt/openssl /opt/openssl
COPY --from=patchelf /patchelf /usr/local/bin/patchelf
COPY --from=conda /opt/conda /opt/conda
# Add jni.h for java host build.
COPY ./common/install_jni.sh install_jni.sh
COPY ./java/jni.h jni.h
RUN bash ./install_jni.sh && rm install_jni.sh
ENV PATH /opt/conda/bin:$PATH
COPY --from=mnist /usr/local/mnist /usr/local/mnist
RUN rm -rf /usr/local/cuda
RUN chmod o+rw /usr/local
RUN touch /.condarc && \
chmod o+rw /.condarc && \
chmod -R o+rw /opt/conda

View File

@ -1,76 +0,0 @@
#!/usr/bin/env bash
# Script used only in CD pipeline
set -eou pipefail
image="$1"
shift
if [ -z "${image}" ]; then
echo "Usage: $0 IMAGE"
exit 1
fi
DOCKER_IMAGE_NAME="pytorch/${image}"
export DOCKER_BUILDKIT=1
TOPDIR=$(git rev-parse --show-toplevel)
CUDA_VERSION=${CUDA_VERSION:-12.1}
case ${CUDA_VERSION} in
cpu)
BASE_TARGET=base
DOCKER_TAG=cpu
;;
all)
BASE_TARGET=all_cuda
DOCKER_TAG=latest
;;
*)
BASE_TARGET=cuda${CUDA_VERSION}
DOCKER_TAG=cuda${CUDA_VERSION}
;;
esac
(
set -x
docker build \
--target final \
--progress plain \
--build-arg "BASE_TARGET=${BASE_TARGET}" \
--build-arg "CUDA_VERSION=${CUDA_VERSION}" \
--build-arg "DEVTOOLSET_VERSION=9" \
-t ${DOCKER_IMAGE_NAME} \
$@ \
-f "${TOPDIR}/.ci/docker/conda/Dockerfile" \
${TOPDIR}/.ci/docker/
)
if [[ "${DOCKER_TAG}" =~ ^cuda* ]]; then
# Test that we're using the right CUDA compiler
(
set -x
docker run --rm "${DOCKER_IMAGE_NAME}" nvcc --version | grep "cuda_${CUDA_VERSION}"
)
fi
GITHUB_REF=${GITHUB_REF:-$(git symbolic-ref -q HEAD || git describe --tags --exact-match)}
GIT_BRANCH_NAME=${GITHUB_REF##*/}
GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)}
DOCKER_IMAGE_BRANCH_TAG=${DOCKER_IMAGE_NAME}-${GIT_BRANCH_NAME}
DOCKER_IMAGE_SHA_TAG=${DOCKER_IMAGE_NAME}-${GIT_COMMIT_SHA}
if [[ "${WITH_PUSH:-}" == true ]]; then
(
set -x
docker push "${DOCKER_IMAGE_NAME}"
if [[ -n ${GITHUB_REF} ]]; then
docker tag ${DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_BRANCH_TAG}
docker tag ${DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_SHA_TAG}
docker push "${DOCKER_IMAGE_BRANCH_TAG}"
docker push "${DOCKER_IMAGE_SHA_TAG}"
fi
)
fi

View File

@ -1,107 +0,0 @@
ARG BASE_TARGET=base
ARG GPU_IMAGE=ubuntu:20.04
FROM ${GPU_IMAGE} as base
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get clean && apt-get update
RUN apt-get install -y curl locales g++ git-all autoconf automake make cmake wget unzip sudo
# Just add everything as a safe.directory for git since these will be used in multiple places with git
RUN git config --global --add safe.directory '*'
RUN locale-gen en_US.UTF-8
ENV LC_ALL en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US.UTF-8
# Install openssl
FROM base as openssl
ADD ./common/install_openssl.sh install_openssl.sh
RUN bash ./install_openssl.sh && rm install_openssl.sh
# Install python
FROM base as python
ADD common/install_cpython.sh install_cpython.sh
RUN apt-get update -y && \
apt-get install build-essential gdb lcov libbz2-dev libffi-dev \
libgdbm-dev liblzma-dev libncurses5-dev libreadline6-dev \
libsqlite3-dev libssl-dev lzma lzma-dev tk-dev uuid-dev zlib1g-dev -y && \
bash ./install_cpython.sh && \
rm install_cpython.sh && \
apt-get clean
FROM base as conda
ADD ./common/install_conda_docker.sh install_conda.sh
RUN bash ./install_conda.sh && rm install_conda.sh
FROM base as cpu
# Install Anaconda
COPY --from=conda /opt/conda /opt/conda
# Install python
COPY --from=python /opt/python /opt/python
COPY --from=python /opt/_internal /opt/_internal
ENV PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH
# Install MKL
ADD ./common/install_mkl.sh install_mkl.sh
RUN bash ./install_mkl.sh && rm install_mkl.sh
FROM cpu as cuda
ADD ./common/install_cuda.sh install_cuda.sh
ADD ./common/install_magma.sh install_magma.sh
ENV CUDA_HOME /usr/local/cuda
FROM cuda as cuda11.8
RUN bash ./install_cuda.sh 11.8
RUN bash ./install_magma.sh 11.8
RUN ln -sf /usr/local/cuda-11.8 /usr/local/cuda
FROM cuda as cuda12.1
RUN bash ./install_cuda.sh 12.1
RUN bash ./install_magma.sh 12.1
RUN ln -sf /usr/local/cuda-12.1 /usr/local/cuda
FROM cuda as cuda12.4
RUN bash ./install_cuda.sh 12.4
RUN bash ./install_magma.sh 12.4
RUN ln -sf /usr/local/cuda-12.4 /usr/local/cuda
FROM cpu as rocm
ARG PYTORCH_ROCM_ARCH
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
ENV MKLROOT /opt/intel
# Adding ROCM_PATH env var so that LoadHip.cmake (even with logic updated for ROCm6.0)
# find HIP works for ROCm5.7. Not needed for ROCm6.0 and above.
# Remove below when ROCm5.7 is not in support matrix anymore.
ENV ROCM_PATH /opt/rocm
# No need to install ROCm as base docker image should have full ROCm install
#ADD ./common/install_rocm.sh install_rocm.sh
ADD ./common/install_rocm_drm.sh install_rocm_drm.sh
ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
# gfortran and python needed for building magma from source for ROCm
RUN apt-get update -y && \
apt-get install gfortran -y && \
apt-get install python -y && \
apt-get clean
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
# Install AOTriton
COPY ./common/common_utils.sh common_utils.sh
COPY ./common/aotriton_version.txt aotriton_version.txt
COPY ./common/install_aotriton.sh install_aotriton.sh
RUN bash ./install_aotriton.sh /opt/rocm && rm install_aotriton.sh aotriton_version.txt
ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton
FROM ${BASE_TARGET} as final
COPY --from=openssl /opt/openssl /opt/openssl
# Install patchelf
ADD ./common/install_patchelf.sh install_patchelf.sh
RUN bash ./install_patchelf.sh && rm install_patchelf.sh
# Install Anaconda
COPY --from=conda /opt/conda /opt/conda
# Install python
COPY --from=python /opt/python /opt/python
COPY --from=python /opt/_internal /opt/_internal
ENV PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH

View File

@ -1,93 +0,0 @@
#!/usr/bin/env bash
# Script used only in CD pipeline
set -eou pipefail
image="$1"
shift
if [ -z "${image}" ]; then
echo "Usage: $0 IMAGE"
exit 1
fi
DOCKER_IMAGE="pytorch/${image}"
TOPDIR=$(git rev-parse --show-toplevel)
GPU_ARCH_TYPE=${GPU_ARCH_TYPE:-cpu}
GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-}
WITH_PUSH=${WITH_PUSH:-}
DOCKER=${DOCKER:-docker}
case ${GPU_ARCH_TYPE} in
cpu)
BASE_TARGET=cpu
DOCKER_TAG=cpu
GPU_IMAGE=ubuntu:20.04
DOCKER_GPU_BUILD_ARG=""
;;
cuda)
BASE_TARGET=cuda${GPU_ARCH_VERSION}
DOCKER_TAG=cuda${GPU_ARCH_VERSION}
GPU_IMAGE=ubuntu:20.04
DOCKER_GPU_BUILD_ARG=""
;;
rocm)
BASE_TARGET=rocm
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-complete
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"
ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)"
if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then
ROCM_VERSION_INT=$((${BASH_REMATCH[1]}*10000 + ${BASH_REMATCH[2]}*100 + ${BASH_REMATCH[3]:-0}))
else
echo "ERROR: rocm regex failed"
exit 1
fi
if [[ $ROCM_VERSION_INT -ge 60000 ]]; then
PYTORCH_ROCM_ARCH+=";gfx942"
fi
DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}"
;;
*)
echo "ERROR: Unrecognized GPU_ARCH_TYPE: ${GPU_ARCH_TYPE}"
exit 1
;;
esac
(
set -x
DOCKER_BUILDKIT=1 ${DOCKER} build \
--target final \
${DOCKER_GPU_BUILD_ARG} \
--build-arg "GPU_IMAGE=${GPU_IMAGE}" \
--build-arg "BASE_TARGET=${BASE_TARGET}" \
-t "${DOCKER_IMAGE}" \
$@ \
-f "${TOPDIR}/.ci/docker/libtorch/Dockerfile" \
"${TOPDIR}/.ci/docker/"
)
GITHUB_REF=${GITHUB_REF:-$(git symbolic-ref -q HEAD || git describe --tags --exact-match)}
GIT_BRANCH_NAME=${GITHUB_REF##*/}
GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)}
DOCKER_IMAGE_BRANCH_TAG=${DOCKER_IMAGE}-${GIT_BRANCH_NAME}
DOCKER_IMAGE_SHA_TAG=${DOCKER_IMAGE}-${GIT_COMMIT_SHA}
if [[ "${WITH_PUSH}" == true ]]; then
(
set -x
${DOCKER} push "${DOCKER_IMAGE}"
if [[ -n ${GITHUB_REF} ]]; then
${DOCKER} tag ${DOCKER_IMAGE} ${DOCKER_IMAGE_BRANCH_TAG}
${DOCKER} tag ${DOCKER_IMAGE} ${DOCKER_IMAGE_SHA_TAG}
${DOCKER} push "${DOCKER_IMAGE_BRANCH_TAG}"
${DOCKER} push "${DOCKER_IMAGE_SHA_TAG}"
fi
)
fi

View File

@ -1,203 +0,0 @@
# syntax = docker/dockerfile:experimental
ARG ROCM_VERSION=3.7
ARG BASE_CUDA_VERSION=11.8
ARG GPU_IMAGE=centos:7
FROM centos:7 as base
ENV LC_ALL en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US.UTF-8
ARG DEVTOOLSET_VERSION=9
# Note: This is required patch since CentOS have reached EOL
# otherwise any yum install setp will fail
RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
RUN yum install -y wget curl perl util-linux xz bzip2 git patch which perl zlib-devel
# Just add everything as a safe.directory for git since these will be used in multiple places with git
RUN git config --global --add safe.directory '*'
RUN yum install -y yum-utils centos-release-scl
RUN yum-config-manager --enable rhel-server-rhscl-7-rpms
# Note: After running yum-config-manager --enable rhel-server-rhscl-7-rpms
# patch is required once again. Somehow this steps adds mirror.centos.org
RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
RUN yum install -y devtoolset-${DEVTOOLSET_VERSION}-gcc devtoolset-${DEVTOOLSET_VERSION}-gcc-c++ devtoolset-${DEVTOOLSET_VERSION}-gcc-gfortran devtoolset-${DEVTOOLSET_VERSION}-binutils
ENV PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
RUN wget http://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm && \
rpm -ivh epel-release-latest-7.noarch.rpm && \
rm -f epel-release-latest-7.noarch.rpm
# cmake-3.18.4 from pip
RUN yum install -y python3-pip && \
python3 -mpip install cmake==3.18.4 && \
ln -s /usr/local/bin/cmake /usr/bin/cmake
RUN yum install -y autoconf aclocal automake make sudo
FROM base as openssl
# Install openssl (this must precede `build python` step)
# (In order to have a proper SSL module, Python is compiled
# against a recent openssl [see env vars above], which is linked
# statically. We delete openssl afterwards.)
ADD ./common/install_openssl.sh install_openssl.sh
RUN bash ./install_openssl.sh && rm install_openssl.sh
# EPEL for cmake
FROM base as patchelf
# Install patchelf
ADD ./common/install_patchelf.sh install_patchelf.sh
RUN bash ./install_patchelf.sh && rm install_patchelf.sh
RUN cp $(which patchelf) /patchelf
FROM patchelf as python
# build python
COPY manywheel/build_scripts /build_scripts
ADD ./common/install_cpython.sh /build_scripts/install_cpython.sh
RUN bash build_scripts/build.sh && rm -r build_scripts
FROM base as cuda
ARG BASE_CUDA_VERSION=10.2
# Install CUDA
ADD ./common/install_cuda.sh install_cuda.sh
RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh
FROM base as intel
# MKL
ADD ./common/install_mkl.sh install_mkl.sh
RUN bash ./install_mkl.sh && rm install_mkl.sh
FROM base as magma
ARG BASE_CUDA_VERSION=10.2
# Install magma
ADD ./common/install_magma.sh install_magma.sh
RUN bash ./install_magma.sh ${BASE_CUDA_VERSION} && rm install_magma.sh
FROM base as jni
# Install java jni header
ADD ./common/install_jni.sh install_jni.sh
ADD ./java/jni.h jni.h
RUN bash ./install_jni.sh && rm install_jni.sh
FROM base as libpng
# Install libpng
ADD ./common/install_libpng.sh install_libpng.sh
RUN bash ./install_libpng.sh && rm install_libpng.sh
FROM ${GPU_IMAGE} as common
RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
ENV LC_ALL en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US.UTF-8
RUN yum install -y \
aclocal \
autoconf \
automake \
bison \
bzip2 \
curl \
diffutils \
file \
git \
make \
patch \
perl \
unzip \
util-linux \
wget \
which \
xz \
yasm
RUN yum install -y \
https://repo.ius.io/ius-release-el7.rpm \
https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
RUN yum swap -y git git236-core
# git236+ would refuse to run git commands in repos owned by other users
# Which causes version check to fail, as pytorch repo is bind-mounted into the image
# Override this behaviour by treating every folder as safe
# For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
RUN git config --global --add safe.directory "*"
ENV SSL_CERT_FILE=/opt/_internal/certs.pem
# Install LLVM version
COPY --from=openssl /opt/openssl /opt/openssl
COPY --from=python /opt/python /opt/python
COPY --from=python /opt/_internal /opt/_internal
COPY --from=python /opt/python/cp39-cp39/bin/auditwheel /usr/local/bin/auditwheel
COPY --from=intel /opt/intel /opt/intel
COPY --from=patchelf /usr/local/bin/patchelf /usr/local/bin/patchelf
COPY --from=jni /usr/local/include/jni.h /usr/local/include/jni.h
COPY --from=libpng /usr/local/bin/png* /usr/local/bin/
COPY --from=libpng /usr/local/bin/libpng* /usr/local/bin/
COPY --from=libpng /usr/local/include/png* /usr/local/include/
COPY --from=libpng /usr/local/include/libpng* /usr/local/include/
COPY --from=libpng /usr/local/lib/libpng* /usr/local/lib/
COPY --from=libpng /usr/local/lib/pkgconfig /usr/local/lib/pkgconfig
FROM common as cpu_final
ARG BASE_CUDA_VERSION=10.1
ARG DEVTOOLSET_VERSION=9
RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
RUN yum install -y yum-utils centos-release-scl
RUN yum-config-manager --enable rhel-server-rhscl-7-rpms
RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
RUN yum install -y devtoolset-${DEVTOOLSET_VERSION}-gcc devtoolset-${DEVTOOLSET_VERSION}-gcc-c++ devtoolset-${DEVTOOLSET_VERSION}-gcc-gfortran devtoolset-${DEVTOOLSET_VERSION}-binutils
ENV PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
# cmake is already installed inside the rocm base image, so remove if present
RUN rpm -e cmake || true
# cmake-3.18.4 from pip
RUN yum install -y python3-pip && \
python3 -mpip install cmake==3.18.4 && \
ln -s /usr/local/bin/cmake /usr/bin/cmake
# ninja
RUN yum install -y ninja-build
FROM cpu_final as cuda_final
RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION}
COPY --from=cuda /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION}
COPY --from=magma /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION}
RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda
ENV PATH=/usr/local/cuda/bin:$PATH
FROM cpu_final as rocm_final
ARG ROCM_VERSION=3.7
ARG PYTORCH_ROCM_ARCH
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
# Adding ROCM_PATH env var so that LoadHip.cmake (even with logic updated for ROCm6.0)
# find HIP works for ROCm5.7. Not needed for ROCm6.0 and above.
# Remove below when ROCm5.7 is not in support matrix anymore.
ENV ROCM_PATH /opt/rocm
ENV MKLROOT /opt/intel
# No need to install ROCm as base docker image should have full ROCm install
#ADD ./common/install_rocm.sh install_rocm.sh
#RUN ROCM_VERSION=${ROCM_VERSION} bash ./install_rocm.sh && rm install_rocm.sh
ADD ./common/install_rocm_drm.sh install_rocm_drm.sh
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
# cmake3 is needed for the MIOpen build
RUN ln -sf /usr/local/bin/cmake /usr/bin/cmake3
ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
ADD ./common/install_miopen.sh install_miopen.sh
RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
# Install AOTriton
COPY ./common/common_utils.sh common_utils.sh
COPY ./common/aotriton_version.txt aotriton_version.txt
COPY ./common/install_aotriton.sh install_aotriton.sh
RUN bash ./install_aotriton.sh /opt/rocm && rm install_aotriton.sh aotriton_version.txt
ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton

View File

@ -1,152 +0,0 @@
# syntax = docker/dockerfile:experimental
ARG ROCM_VERSION=3.7
ARG BASE_CUDA_VERSION=10.2
ARG GPU_IMAGE=nvidia/cuda:${BASE_CUDA_VERSION}-devel-centos7
FROM quay.io/pypa/manylinux2014_x86_64 as base
ENV LC_ALL en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US.UTF-8
RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
RUN yum install -y wget curl perl util-linux xz bzip2 git patch which perl zlib-devel
RUN yum install -y yum-utils centos-release-scl sudo
RUN yum-config-manager --enable rhel-server-rhscl-7-rpms
RUN yum install -y devtoolset-7-gcc devtoolset-7-gcc-c++ devtoolset-7-gcc-gfortran devtoolset-7-binutils
ENV PATH=/opt/rh/devtoolset-7/root/usr/bin:$PATH
ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-7/root/usr/lib64:/opt/rh/devtoolset-7/root/usr/lib:$LD_LIBRARY_PATH
# cmake
RUN yum install -y cmake3 && \
ln -s /usr/bin/cmake3 /usr/bin/cmake
FROM base as openssl
# Install openssl (this must precede `build python` step)
# (In order to have a proper SSL module, Python is compiled
# against a recent openssl [see env vars above], which is linked
# statically. We delete openssl afterwards.)
ADD ./common/install_openssl.sh install_openssl.sh
RUN bash ./install_openssl.sh && rm install_openssl.sh
# remove unncessary python versions
RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2
RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4
RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
FROM base as cuda
ARG BASE_CUDA_VERSION=10.2
# Install CUDA
ADD ./common/install_cuda.sh install_cuda.sh
RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh
FROM base as intel
# MKL
ADD ./common/install_mkl.sh install_mkl.sh
RUN bash ./install_mkl.sh && rm install_mkl.sh
FROM base as magma
ARG BASE_CUDA_VERSION=10.2
# Install magma
ADD ./common/install_magma.sh install_magma.sh
RUN bash ./install_magma.sh ${BASE_CUDA_VERSION} && rm install_magma.sh
FROM base as jni
# Install java jni header
ADD ./common/install_jni.sh install_jni.sh
ADD ./java/jni.h jni.h
RUN bash ./install_jni.sh && rm install_jni.sh
FROM base as libpng
# Install libpng
ADD ./common/install_libpng.sh install_libpng.sh
RUN bash ./install_libpng.sh && rm install_libpng.sh
FROM ${GPU_IMAGE} as common
RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
ENV LC_ALL en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US.UTF-8
RUN yum install -y \
aclocal \
autoconf \
automake \
bison \
bzip2 \
curl \
diffutils \
file \
git \
make \
patch \
perl \
unzip \
util-linux \
wget \
which \
xz \
yasm
RUN yum install -y \
https://repo.ius.io/ius-release-el7.rpm \
https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
RUN yum swap -y git git236-core
# git236+ would refuse to run git commands in repos owned by other users
# Which causes version check to fail, as pytorch repo is bind-mounted into the image
# Override this behaviour by treating every folder as safe
# For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
RUN git config --global --add safe.directory "*"
ENV SSL_CERT_FILE=/opt/_internal/certs.pem
# Install LLVM version
COPY --from=openssl /opt/openssl /opt/openssl
COPY --from=base /opt/python /opt/python
COPY --from=base /opt/_internal /opt/_internal
COPY --from=base /usr/local/bin/auditwheel /usr/local/bin/auditwheel
COPY --from=intel /opt/intel /opt/intel
COPY --from=base /usr/local/bin/patchelf /usr/local/bin/patchelf
COPY --from=libpng /usr/local/bin/png* /usr/local/bin/
COPY --from=libpng /usr/local/bin/libpng* /usr/local/bin/
COPY --from=libpng /usr/local/include/png* /usr/local/include/
COPY --from=libpng /usr/local/include/libpng* /usr/local/include/
COPY --from=libpng /usr/local/lib/libpng* /usr/local/lib/
COPY --from=libpng /usr/local/lib/pkgconfig /usr/local/lib/pkgconfig
COPY --from=jni /usr/local/include/jni.h /usr/local/include/jni.h
FROM common as cpu_final
ARG BASE_CUDA_VERSION=10.2
RUN yum install -y yum-utils centos-release-scl
RUN yum-config-manager --enable rhel-server-rhscl-7-rpms
RUN yum install -y devtoolset-7-gcc devtoolset-7-gcc-c++ devtoolset-7-gcc-gfortran devtoolset-7-binutils
ENV PATH=/opt/rh/devtoolset-7/root/usr/bin:$PATH
ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-7/root/usr/lib64:/opt/rh/devtoolset-7/root/usr/lib:$LD_LIBRARY_PATH
# cmake
RUN yum install -y cmake3 && \
ln -s /usr/bin/cmake3 /usr/bin/cmake
# ninja
RUN yum install -y http://repo.okay.com.mx/centos/7/x86_64/release/okay-release-1-1.noarch.rpm
RUN yum install -y ninja-build
FROM cpu_final as cuda_final
RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION}
COPY --from=cuda /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION}
COPY --from=magma /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION}
FROM common as rocm_final
ARG ROCM_VERSION=3.7
# Install ROCm
ADD ./common/install_rocm.sh install_rocm.sh
RUN bash ./install_rocm.sh ${ROCM_VERSION} && rm install_rocm.sh
# cmake is already installed inside the rocm base image, but both 2 and 3 exist
# cmake3 is needed for the later MIOpen custom build, so that step is last.
RUN yum install -y cmake3 && \
rm -f /usr/bin/cmake && \
ln -s /usr/bin/cmake3 /usr/bin/cmake
ADD ./common/install_miopen.sh install_miopen.sh
RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh

View File

@ -1,153 +0,0 @@
# syntax = docker/dockerfile:experimental
ARG ROCM_VERSION=3.7
ARG BASE_CUDA_VERSION=11.8
ARG GPU_IMAGE=amd64/almalinux:8
FROM quay.io/pypa/manylinux_2_28_x86_64 as base
ENV LC_ALL en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US.UTF-8
ARG DEVTOOLSET_VERSION=11
RUN yum install -y sudo wget curl perl util-linux xz bzip2 git patch which perl zlib-devel yum-utils gcc-toolset-${DEVTOOLSET_VERSION}-toolchain
ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
# cmake-3.18.4 from pip
RUN yum install -y python3-pip && \
python3 -mpip install cmake==3.18.4 && \
ln -s /usr/local/bin/cmake /usr/bin/cmake3
FROM base as openssl
# Install openssl (this must precede `build python` step)
# (In order to have a proper SSL module, Python is compiled
# against a recent openssl [see env vars above], which is linked
# statically. We delete openssl afterwards.)
ADD ./common/install_openssl.sh install_openssl.sh
RUN bash ./install_openssl.sh && rm install_openssl.sh
# remove unncessary python versions
RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2
RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4
RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
FROM base as cuda
ARG BASE_CUDA_VERSION=11.8
# Install CUDA
ADD ./common/install_cuda.sh install_cuda.sh
RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh
FROM base as intel
# MKL
ADD ./common/install_mkl.sh install_mkl.sh
RUN bash ./install_mkl.sh && rm install_mkl.sh
FROM base as magma
ARG BASE_CUDA_VERSION=10.2
# Install magma
ADD ./common/install_magma.sh install_magma.sh
RUN bash ./install_magma.sh ${BASE_CUDA_VERSION} && rm install_magma.sh
FROM base as jni
# Install java jni header
ADD ./common/install_jni.sh install_jni.sh
ADD ./java/jni.h jni.h
RUN bash ./install_jni.sh && rm install_jni.sh
FROM base as libpng
# Install libpng
ADD ./common/install_libpng.sh install_libpng.sh
RUN bash ./install_libpng.sh && rm install_libpng.sh
FROM ${GPU_IMAGE} as common
ARG DEVTOOLSET_VERSION=11
ENV LC_ALL en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US.UTF-8
RUN yum -y install epel-release
RUN yum -y update
RUN yum install -y \
autoconf \
automake \
bison \
bzip2 \
curl \
diffutils \
file \
git \
make \
patch \
perl \
unzip \
util-linux \
wget \
which \
xz \
gcc-toolset-${DEVTOOLSET_VERSION}-toolchain \
glibc-langpack-en
RUN yum install -y \
https://repo.ius.io/ius-release-el7.rpm \
https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
RUN yum swap -y git git236-core
# git236+ would refuse to run git commands in repos owned by other users
# Which causes version check to fail, as pytorch repo is bind-mounted into the image
# Override this behaviour by treating every folder as safe
# For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
RUN git config --global --add safe.directory "*"
ENV SSL_CERT_FILE=/opt/_internal/certs.pem
# Install LLVM version
COPY --from=openssl /opt/openssl /opt/openssl
COPY --from=base /opt/python /opt/python
COPY --from=base /opt/_internal /opt/_internal
COPY --from=base /usr/local/bin/auditwheel /usr/local/bin/auditwheel
COPY --from=intel /opt/intel /opt/intel
COPY --from=base /usr/local/bin/patchelf /usr/local/bin/patchelf
COPY --from=libpng /usr/local/bin/png* /usr/local/bin/
COPY --from=libpng /usr/local/bin/libpng* /usr/local/bin/
COPY --from=libpng /usr/local/include/png* /usr/local/include/
COPY --from=libpng /usr/local/include/libpng* /usr/local/include/
COPY --from=libpng /usr/local/lib/libpng* /usr/local/lib/
COPY --from=libpng /usr/local/lib/pkgconfig /usr/local/lib/pkgconfig
COPY --from=jni /usr/local/include/jni.h /usr/local/include/jni.h
FROM common as cpu_final
ARG BASE_CUDA_VERSION=11.8
ARG DEVTOOLSET_VERSION=11
# Ensure the expected devtoolset is used
ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
# cmake-3.18.4 from pip
RUN yum install -y python3-pip && \
python3 -mpip install cmake==3.18.4 && \
ln -s /usr/local/bin/cmake /usr/bin/cmake3
FROM cpu_final as cuda_final
RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION}
COPY --from=cuda /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION}
COPY --from=magma /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION}
FROM common as rocm_final
ARG ROCM_VERSION=3.7
# Install ROCm
ADD ./common/install_rocm.sh install_rocm.sh
RUN bash ./install_rocm.sh ${ROCM_VERSION} && rm install_rocm.sh
# cmake is already installed inside the rocm base image, but both 2 and 3 exist
# cmake3 is needed for the later MIOpen custom build, so that step is last.
RUN yum install -y cmake3 && \
rm -f /usr/bin/cmake && \
ln -s /usr/bin/cmake3 /usr/bin/cmake
ADD ./common/install_miopen.sh install_miopen.sh
RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
FROM cpu_final as xpu_final
# cmake-3.28.4 from pip
RUN python3 -m pip install --upgrade pip && \
python3 -mpip install cmake==3.28.4
ADD ./common/install_xpu.sh install_xpu.sh
RUN bash ./install_xpu.sh && rm install_xpu.sh
RUN pushd /opt/_internal && tar -xJf static-libs-for-embedding-only.tar.xz && popd

View File

@ -1,57 +0,0 @@
FROM quay.io/pypa/manylinux_2_28_aarch64 as base
# Graviton needs GCC 10 or above for the build. GCC12 is the default version in almalinux-8.
ARG GCCTOOLSET_VERSION=11
# Language variabes
ENV LC_ALL=en_US.UTF-8
ENV LANG=en_US.UTF-8
ENV LANGUAGE=en_US.UTF-8
# Installed needed OS packages. This is to support all
# the binary builds (torch, vision, audio, text, data)
RUN yum -y install epel-release
RUN yum -y update
RUN yum install -y \
autoconf \
automake \
bison \
bzip2 \
curl \
diffutils \
file \
git \
less \
libffi-devel \
libgomp \
make \
openssl-devel \
patch \
perl \
unzip \
util-linux \
wget \
which \
xz \
yasm \
zstd \
sudo \
gcc-toolset-${GCCTOOLSET_VERSION}-toolchain
# Ensure the expected devtoolset is used
ENV PATH=/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/bin:$PATH
ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
# git236+ would refuse to run git commands in repos owned by other users
# Which causes version check to fail, as pytorch repo is bind-mounted into the image
# Override this behaviour by treating every folder as safe
# For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
RUN git config --global --add safe.directory "*"
FROM base as final
# remove unncessary python versions
RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2
RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4
RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6

View File

@ -1,94 +0,0 @@
FROM quay.io/pypa/manylinux2014_aarch64 as base
# Graviton needs GCC 10 for the build
ARG DEVTOOLSET_VERSION=10
# Language variabes
ENV LC_ALL=en_US.UTF-8
ENV LANG=en_US.UTF-8
ENV LANGUAGE=en_US.UTF-8
# Installed needed OS packages. This is to support all
# the binary builds (torch, vision, audio, text, data)
RUN yum -y install epel-release
RUN yum -y update
RUN yum install -y \
autoconf \
automake \
bison \
bzip2 \
curl \
diffutils \
file \
git \
make \
patch \
perl \
unzip \
util-linux \
wget \
which \
xz \
yasm \
less \
zstd \
libgomp \
sudo \
devtoolset-${DEVTOOLSET_VERSION}-gcc \
devtoolset-${DEVTOOLSET_VERSION}-gcc-c++ \
devtoolset-${DEVTOOLSET_VERSION}-gcc-gfortran \
devtoolset-${DEVTOOLSET_VERSION}-binutils
# Ensure the expected devtoolset is used
ENV PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
# git236+ would refuse to run git commands in repos owned by other users
# Which causes version check to fail, as pytorch repo is bind-mounted into the image
# Override this behaviour by treating every folder as safe
# For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
RUN git config --global --add safe.directory "*"
###############################################################################
# libglfortran.a hack
#
# libgfortran.a from quay.io/pypa/manylinux2014_aarch64 is not compiled with -fPIC.
# This causes __stack_chk_guard@@GLIBC_2.17 on pytorch build. To solve, get
# ubuntu's libgfortran.a which is compiled with -fPIC
# NOTE: Need a better way to get this library as Ubuntu's package can be removed by the vender, or changed
###############################################################################
RUN cd ~/ \
&& curl -L -o ~/libgfortran-10-dev.deb http://ports.ubuntu.com/ubuntu-ports/pool/universe/g/gcc-10/libgfortran-10-dev_10.5.0-1ubuntu1_arm64.deb \
&& ar x ~/libgfortran-10-dev.deb \
&& tar --use-compress-program=unzstd -xvf data.tar.zst -C ~/ \
&& cp -f ~/usr/lib/gcc/aarch64-linux-gnu/10/libgfortran.a /opt/rh/devtoolset-10/root/usr/lib/gcc/aarch64-redhat-linux/10/
# install cmake
RUN yum install -y cmake3 && \
ln -s /usr/bin/cmake3 /usr/bin/cmake
FROM base as openssl
# Install openssl (this must precede `build python` step)
# (In order to have a proper SSL module, Python is compiled
# against a recent openssl [see env vars above], which is linked
# statically. We delete openssl afterwards.)
ADD ./common/install_openssl.sh install_openssl.sh
RUN bash ./install_openssl.sh && rm install_openssl.sh
ENV SSL_CERT_FILE=/opt/_internal/certs.pem
FROM base as openblas
# Install openblas
ADD ./common/install_openblas.sh install_openblas.sh
RUN bash ./install_openblas.sh && rm install_openblas.sh
FROM openssl as final
# remove unncessary python versions
RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2
RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4
RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
COPY --from=openblas /opt/OpenBLAS/ /opt/OpenBLAS/
ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:$LD_LIBRARY_PATH

View File

@ -1,91 +0,0 @@
FROM quay.io/pypa/manylinux_2_28_aarch64 as base
# Cuda ARM build needs gcc 11
ARG DEVTOOLSET_VERSION=11
# Language variables
ENV LC_ALL=en_US.UTF-8
ENV LANG=en_US.UTF-8
ENV LANGUAGE=en_US.UTF-8
# Installed needed OS packages. This is to support all
# the binary builds (torch, vision, audio, text, data)
RUN yum -y install epel-release
RUN yum -y update
RUN yum install -y \
autoconf \
automake \
bison \
bzip2 \
curl \
diffutils \
file \
git \
make \
patch \
perl \
unzip \
util-linux \
wget \
which \
xz \
yasm \
less \
zstd \
libgomp \
sudo \
gcc-toolset-${DEVTOOLSET_VERSION}-toolchain
# Ensure the expected devtoolset is used
ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
# git236+ would refuse to run git commands in repos owned by other users
# Which causes version check to fail, as pytorch repo is bind-mounted into the image
# Override this behaviour by treating every folder as safe
# For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
RUN git config --global --add safe.directory "*"
FROM base as openssl
# Install openssl (this must precede `build python` step)
# (In order to have a proper SSL module, Python is compiled
# against a recent openssl [see env vars above], which is linked
# statically. We delete openssl afterwards.)
ADD ./common/install_openssl.sh install_openssl.sh
RUN bash ./install_openssl.sh && rm install_openssl.sh
ENV SSL_CERT_FILE=/opt/_internal/certs.pem
FROM openssl as final
# remove unncessary python versions
RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2
RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4
RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
FROM base as cuda
ARG BASE_CUDA_VERSION
# Install CUDA
ADD ./common/install_cuda_aarch64.sh install_cuda_aarch64.sh
RUN bash ./install_cuda_aarch64.sh ${BASE_CUDA_VERSION} && rm install_cuda_aarch64.sh
FROM base as magma
ARG BASE_CUDA_VERSION
# Install magma
ADD ./common/install_magma.sh install_magma.sh
RUN bash ./install_magma.sh ${BASE_CUDA_VERSION} && rm install_magma.sh
FROM base as openblas
# Install openblas
ADD ./common/install_openblas.sh install_openblas.sh
RUN bash ./install_openblas.sh && rm install_openblas.sh
FROM final as cuda_final
ARG BASE_CUDA_VERSION
RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION}
COPY --from=cuda /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION}
COPY --from=magma /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION}
COPY --from=openblas /opt/OpenBLAS/ /opt/OpenBLAS/
RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda
ENV PATH=/usr/local/cuda/bin:$PATH
ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:$LD_LIBRARY_PATH

View File

@ -1,71 +0,0 @@
FROM centos:8 as base
ENV LC_ALL en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US.UTF-8
ENV PATH /opt/rh/gcc-toolset-11/root/bin/:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
# change to a valid repo
RUN sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-Linux-*.repo
# enable to install ninja-build
RUN sed -i 's|enabled=0|enabled=1|g' /etc/yum.repos.d/CentOS-Linux-PowerTools.repo
RUN yum -y update
RUN yum install -y wget curl perl util-linux xz bzip2 git patch which zlib-devel sudo
RUN yum install -y autoconf automake make cmake gdb gcc-toolset-11-gcc-c++
FROM base as openssl
ADD ./common/install_openssl.sh install_openssl.sh
RUN bash ./install_openssl.sh && rm install_openssl.sh
# Install python
FROM base as python
RUN yum install -y openssl-devel zlib-devel bzip2-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel libpcap-devel xz-devel libffi-devel
ADD common/install_cpython.sh install_cpython.sh
RUN bash ./install_cpython.sh && rm install_cpython.sh
FROM base as conda
ADD ./common/install_conda_docker.sh install_conda.sh
RUN bash ./install_conda.sh && rm install_conda.sh
RUN /opt/conda/bin/conda install -y cmake
FROM base as intel
# Install MKL
COPY --from=python /opt/python /opt/python
COPY --from=python /opt/_internal /opt/_internal
COPY --from=conda /opt/conda /opt/conda
ENV PATH=/opt/conda/bin:$PATH
ADD ./common/install_mkl.sh install_mkl.sh
RUN bash ./install_mkl.sh && rm install_mkl.sh
FROM base as patchelf
ADD ./common/install_patchelf.sh install_patchelf.sh
RUN bash ./install_patchelf.sh && rm install_patchelf.sh
RUN cp $(which patchelf) /patchelf
FROM base as jni
ADD ./common/install_jni.sh install_jni.sh
ADD ./java/jni.h jni.h
RUN bash ./install_jni.sh && rm install_jni.sh
FROM base as libpng
ADD ./common/install_libpng.sh install_libpng.sh
RUN bash ./install_libpng.sh && rm install_libpng.sh
FROM base as final
COPY --from=openssl /opt/openssl /opt/openssl
COPY --from=python /opt/python /opt/python
COPY --from=python /opt/_internal /opt/_internal
COPY --from=intel /opt/intel /opt/intel
COPY --from=conda /opt/conda /opt/conda
COPY --from=patchelf /usr/local/bin/patchelf /usr/local/bin/patchelf
COPY --from=jni /usr/local/include/jni.h /usr/local/include/jni.h
COPY --from=libpng /usr/local/bin/png* /usr/local/bin/
COPY --from=libpng /usr/local/bin/libpng* /usr/local/bin/
COPY --from=libpng /usr/local/include/png* /usr/local/include/
COPY --from=libpng /usr/local/include/libpng* /usr/local/include/
COPY --from=libpng /usr/local/lib/libpng* /usr/local/lib/
COPY --from=libpng /usr/local/lib/pkgconfig /usr/local/lib/pkgconfig
RUN yum install -y ninja-build

View File

@ -1,73 +0,0 @@
FROM --platform=linux/s390x docker.io/ubuntu:24.04 as base
# Language variables
ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8
ENV LANGUAGE=C.UTF-8
# Installed needed OS packages. This is to support all
# the binary builds (torch, vision, audio, text, data)
RUN apt update ; apt upgrade -y
RUN apt install -y \
build-essential \
autoconf \
automake \
bzip2 \
curl \
diffutils \
file \
git \
make \
patch \
perl \
unzip \
util-linux \
wget \
which \
xz-utils \
less \
zstd \
cmake \
python3 \
python3-dev \
python3-setuptools \
python3-yaml \
python3-typing-extensions \
libblas-dev \
libopenblas-dev \
liblapack-dev \
libatlas-base-dev
# git236+ would refuse to run git commands in repos owned by other users
# Which causes version check to fail, as pytorch repo is bind-mounted into the image
# Override this behaviour by treating every folder as safe
# For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
RUN git config --global --add safe.directory "*"
FROM base as openssl
# Install openssl (this must precede `build python` step)
# (In order to have a proper SSL module, Python is compiled
# against a recent openssl [see env vars above], which is linked
# statically. We delete openssl afterwards.)
ADD ./common/install_openssl.sh install_openssl.sh
RUN bash ./install_openssl.sh && rm install_openssl.sh
ENV SSL_CERT_FILE=/opt/_internal/certs.pem
# EPEL for cmake
FROM base as patchelf
# Install patchelf
ADD ./common/install_patchelf.sh install_patchelf.sh
RUN bash ./install_patchelf.sh && rm install_patchelf.sh
RUN cp $(which patchelf) /patchelf
FROM patchelf as python
# build python
COPY manywheel/build_scripts /build_scripts
ADD ./common/install_cpython.sh /build_scripts/install_cpython.sh
RUN bash build_scripts/build.sh && rm -r build_scripts
FROM openssl as final
COPY --from=python /opt/python /opt/python
COPY --from=python /opt/_internal /opt/_internal
COPY --from=python /opt/python/cp39-cp39/bin/auditwheel /usr/local/bin/auditwheel
COPY --from=patchelf /usr/local/bin/patchelf /usr/local/bin/patchelf

View File

@ -1,154 +0,0 @@
#!/usr/bin/env bash
# Script used only in CD pipeline
set -eou pipefail
TOPDIR=$(git rev-parse --show-toplevel)
image="$1"
shift
if [ -z "${image}" ]; then
echo "Usage: $0 IMAGE"
exit 1
fi
DOCKER_IMAGE="pytorch/${image}"
DOCKER_REGISTRY="${DOCKER_REGISTRY:-docker.io}"
GPU_ARCH_TYPE=${GPU_ARCH_TYPE:-cpu}
GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-}
MANY_LINUX_VERSION=${MANY_LINUX_VERSION:-}
DOCKERFILE_SUFFIX=${DOCKERFILE_SUFFIX:-}
WITH_PUSH=${WITH_PUSH:-}
case ${GPU_ARCH_TYPE} in
cpu)
TARGET=cpu_final
DOCKER_TAG=cpu
GPU_IMAGE=centos:7
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=9"
;;
cpu-manylinux_2_28)
TARGET=cpu_final
DOCKER_TAG=cpu
GPU_IMAGE=amd64/almalinux:8
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11"
MANY_LINUX_VERSION="2_28"
;;
cpu-aarch64)
TARGET=final
DOCKER_TAG=cpu-aarch64
GPU_IMAGE=arm64v8/centos:7
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=10"
MANY_LINUX_VERSION="aarch64"
;;
cpu-aarch64-2_28)
TARGET=final
DOCKER_TAG=cpu-aarch64
GPU_IMAGE=arm64v8/almalinux:8
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11"
MANY_LINUX_VERSION="2_28_aarch64"
;;
cpu-cxx11-abi)
TARGET=final
DOCKER_TAG=cpu-cxx11-abi
GPU_IMAGE=""
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=9"
MANY_LINUX_VERSION="cxx11-abi"
;;
cpu-s390x)
TARGET=final
DOCKER_TAG=cpu-s390x
GPU_IMAGE=redhat/ubi9
DOCKER_GPU_BUILD_ARG=""
MANY_LINUX_VERSION="s390x"
;;
cuda)
TARGET=cuda_final
DOCKER_TAG=cuda${GPU_ARCH_VERSION}
# Keep this up to date with the minimum version of CUDA we currently support
GPU_IMAGE=centos:7
DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=9"
;;
cuda-manylinux_2_28)
TARGET=cuda_final
DOCKER_TAG=cuda${GPU_ARCH_VERSION}
GPU_IMAGE=amd64/almalinux:8
DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=11"
MANY_LINUX_VERSION="2_28"
;;
cuda-aarch64)
TARGET=cuda_final
DOCKER_TAG=cuda${GPU_ARCH_VERSION}
GPU_IMAGE=arm64v8/centos:7
DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=11"
MANY_LINUX_VERSION="aarch64"
DOCKERFILE_SUFFIX="_cuda_aarch64"
;;
rocm)
TARGET=rocm_final
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
GPU_IMAGE=rocm/dev-centos-7:${GPU_ARCH_VERSION}-complete
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"
ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)"
if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then
ROCM_VERSION_INT=$((${BASH_REMATCH[1]}*10000 + ${BASH_REMATCH[2]}*100 + ${BASH_REMATCH[3]:-0}))
else
echo "ERROR: rocm regex failed"
exit 1
fi
if [[ $ROCM_VERSION_INT -ge 60000 ]]; then
PYTORCH_ROCM_ARCH+=";gfx942"
fi
DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=9"
;;
xpu)
TARGET=xpu_final
DOCKER_TAG=xpu
GPU_IMAGE=amd64/almalinux:8
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11"
MANY_LINUX_VERSION="2_28"
;;
*)
echo "ERROR: Unrecognized GPU_ARCH_TYPE: ${GPU_ARCH_TYPE}"
exit 1
;;
esac
IMAGES=''
if [[ -n ${MANY_LINUX_VERSION} && -z ${DOCKERFILE_SUFFIX} ]]; then
DOCKERFILE_SUFFIX=_${MANY_LINUX_VERSION}
fi
(
set -x
DOCKER_BUILDKIT=1 docker build \
${DOCKER_GPU_BUILD_ARG} \
--build-arg "GPU_IMAGE=${GPU_IMAGE}" \
--target "${TARGET}" \
-t "${DOCKER_IMAGE}" \
$@ \
-f "${TOPDIR}/.ci/docker/manywheel/Dockerfile${DOCKERFILE_SUFFIX}" \
"${TOPDIR}/.ci/docker/"
)
GITHUB_REF=${GITHUB_REF:-$(git symbolic-ref -q HEAD || git describe --tags --exact-match)}
GIT_BRANCH_NAME=${GITHUB_REF##*/}
GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)}
DOCKER_IMAGE_BRANCH_TAG=${DOCKER_IMAGE}-${GIT_BRANCH_NAME}
DOCKER_IMAGE_SHA_TAG=${DOCKER_IMAGE}-${GIT_COMMIT_SHA}
if [[ "${WITH_PUSH}" == true ]]; then
(
set -x
docker push "${DOCKER_IMAGE}"
if [[ -n ${GITHUB_REF} ]]; then
docker tag ${DOCKER_IMAGE} ${DOCKER_IMAGE_BRANCH_TAG}
docker tag ${DOCKER_IMAGE} ${DOCKER_IMAGE_SHA_TAG}
docker push "${DOCKER_IMAGE_BRANCH_TAG}"
docker push "${DOCKER_IMAGE_SHA_TAG}"
fi
)
fi

View File

@ -1,131 +0,0 @@
#!/bin/bash
# Top-level build script called from Dockerfile
# Script used only in CD pipeline
# Stop at any error, show all commands
set -ex
# openssl version to build, with expected sha256 hash of .tar.gz
# archive
OPENSSL_ROOT=openssl-1.1.1l
OPENSSL_HASH=0b7a3e5e59c34827fe0c3a74b7ec8baef302b98fa80088d7f9153aa16fa76bd1
DEVTOOLS_HASH=a8ebeb4bed624700f727179e6ef771dafe47651131a00a78b342251415646acc
PATCHELF_HASH=d9afdff4baeacfbc64861454f368b7f2c15c44d245293f7587bbf726bfe722fb
CURL_ROOT=curl-7.73.0
CURL_HASH=cf34fe0b07b800f1c01a499a6e8b2af548f6d0e044dca4a29d88a4bee146d131
AUTOCONF_ROOT=autoconf-2.69
AUTOCONF_HASH=954bd69b391edc12d6a4a51a2dd1476543da5c6bbf05a95b59dc0dd6fd4c2969
# Get build utilities
MY_DIR=$(dirname "${BASH_SOURCE[0]}")
source $MY_DIR/build_utils.sh
if [ "$(uname -m)" != "s390x" ] ; then
# Dependencies for compiling Python that we want to remove from
# the final image after compiling Python
PYTHON_COMPILE_DEPS="zlib-devel bzip2-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel db4-devel libpcap-devel xz-devel libffi-devel"
# Libraries that are allowed as part of the manylinux1 profile
MANYLINUX1_DEPS="glibc-devel libstdc++-devel glib2-devel libX11-devel libXext-devel libXrender-devel mesa-libGL-devel libICE-devel libSM-devel ncurses-devel"
# Development tools and libraries
yum -y install bzip2 make git patch unzip bison yasm diffutils \
automake which file cmake28 \
kernel-devel-`uname -r` \
${PYTHON_COMPILE_DEPS}
else
# Dependencies for compiling Python that we want to remove from
# the final image after compiling Python
PYTHON_COMPILE_DEPS="zlib1g-dev libbz2-dev libncurses-dev libsqlite3-dev libdb-dev libpcap-dev liblzma-dev libffi-dev"
# Libraries that are allowed as part of the manylinux1 profile
MANYLINUX1_DEPS="libglib2.0-dev libX11-dev libncurses-dev"
# Development tools and libraries
apt install -y bzip2 make git patch unzip diffutils \
automake which file cmake \
linux-headers-virtual \
${PYTHON_COMPILE_DEPS}
fi
# Install newest autoconf
build_autoconf $AUTOCONF_ROOT $AUTOCONF_HASH
autoconf --version
# Compile the latest Python releases.
# (In order to have a proper SSL module, Python is compiled
# against a recent openssl [see env vars above], which is linked
# statically. We delete openssl afterwards.)
build_openssl $OPENSSL_ROOT $OPENSSL_HASH
/build_scripts/install_cpython.sh
PY39_BIN=/opt/python/cp39-cp39/bin
# Our openssl doesn't know how to find the system CA trust store
# (https://github.com/pypa/manylinux/issues/53)
# And it's not clear how up-to-date that is anyway
# So let's just use the same one pip and everyone uses
$PY39_BIN/pip install certifi
ln -s $($PY39_BIN/python -c 'import certifi; print(certifi.where())') \
/opt/_internal/certs.pem
# If you modify this line you also have to modify the versions in the
# Dockerfiles:
export SSL_CERT_FILE=/opt/_internal/certs.pem
# Install newest curl
build_curl $CURL_ROOT $CURL_HASH
rm -rf /usr/local/include/curl /usr/local/lib/libcurl* /usr/local/lib/pkgconfig/libcurl.pc
hash -r
curl --version
curl-config --features
# Install patchelf (latest with unreleased bug fixes)
curl -sLOk https://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.gz
# check_sha256sum patchelf-0.9njs2.tar.gz $PATCHELF_HASH
tar -xzf patchelf-0.10.tar.gz
(cd patchelf-0.10 && ./configure && make && make install)
rm -rf patchelf-0.10.tar.gz patchelf-0.10
# Install latest pypi release of auditwheel
$PY39_BIN/pip install auditwheel
ln -s $PY39_BIN/auditwheel /usr/local/bin/auditwheel
# Clean up development headers and other unnecessary stuff for
# final image
if [ "$(uname -m)" != "s390x" ] ; then
yum -y erase wireless-tools gtk2 libX11 hicolor-icon-theme \
avahi freetype bitstream-vera-fonts \
${PYTHON_COMPILE_DEPS} || true > /dev/null 2>&1
yum -y install ${MANYLINUX1_DEPS}
yum -y clean all > /dev/null 2>&1
yum list installed
else
apt purge -y ${PYTHON_COMPILE_DEPS} || true > /dev/null 2>&1
fi
# we don't need libpython*.a, and they're many megabytes
find /opt/_internal -name '*.a' -print0 | xargs -0 rm -f
# Strip what we can -- and ignore errors, because this just attempts to strip
# *everything*, including non-ELF files:
find /opt/_internal -type f -print0 \
| xargs -0 -n1 strip --strip-unneeded 2>/dev/null || true
# We do not need the Python test suites, or indeed the precompiled .pyc and
# .pyo files. Partially cribbed from:
# https://github.com/docker-library/python/blob/master/3.4/slim/Dockerfile
find /opt/_internal \
\( -type d -a -name test -o -name tests \) \
-o \( -type f -a -name '*.pyc' -o -name '*.pyo' \) \
-print0 | xargs -0 rm -f
for PYTHON in /opt/python/*/bin/python; do
# Smoke test to make sure that our Pythons work, and do indeed detect as
# being manylinux compatible:
$PYTHON $MY_DIR/manylinux1-check.py
# Make sure that SSL cert checking works
$PYTHON $MY_DIR/ssl-check.py
done
# Fix libc headers to remain compatible with C99 compilers.
find /usr/include/ -type f -exec sed -i 's/\bextern _*inline_*\b/extern __inline __attribute__ ((__gnu_inline__))/g' {} +
# Now we can delete our built SSL
rm -rf /usr/local/ssl

View File

@ -1,91 +0,0 @@
#!/bin/bash
# Helper utilities for build
# Script used only in CD pipeline
OPENSSL_DOWNLOAD_URL=https://www.openssl.org/source/old/1.1.1/
CURL_DOWNLOAD_URL=https://curl.askapache.com/download
AUTOCONF_DOWNLOAD_URL=https://ftp.gnu.org/gnu/autoconf
function check_var {
if [ -z "$1" ]; then
echo "required variable not defined"
exit 1
fi
}
function do_openssl_build {
./config no-ssl2 no-shared -fPIC --prefix=/usr/local/ssl > /dev/null
make > /dev/null
make install > /dev/null
}
function check_sha256sum {
local fname=$1
check_var ${fname}
local sha256=$2
check_var ${sha256}
echo "${sha256} ${fname}" > ${fname}.sha256
sha256sum -c ${fname}.sha256
rm -f ${fname}.sha256
}
function build_openssl {
local openssl_fname=$1
check_var ${openssl_fname}
local openssl_sha256=$2
check_var ${openssl_sha256}
check_var ${OPENSSL_DOWNLOAD_URL}
curl -sLO ${OPENSSL_DOWNLOAD_URL}/${openssl_fname}.tar.gz
check_sha256sum ${openssl_fname}.tar.gz ${openssl_sha256}
tar -xzf ${openssl_fname}.tar.gz
(cd ${openssl_fname} && do_openssl_build)
rm -rf ${openssl_fname} ${openssl_fname}.tar.gz
}
function do_curl_build {
LIBS=-ldl ./configure --with-ssl --disable-shared > /dev/null
make > /dev/null
make install > /dev/null
}
function build_curl {
local curl_fname=$1
check_var ${curl_fname}
local curl_sha256=$2
check_var ${curl_sha256}
check_var ${CURL_DOWNLOAD_URL}
curl -sLO ${CURL_DOWNLOAD_URL}/${curl_fname}.tar.bz2
check_sha256sum ${curl_fname}.tar.bz2 ${curl_sha256}
tar -jxf ${curl_fname}.tar.bz2
(cd ${curl_fname} && do_curl_build)
rm -rf ${curl_fname} ${curl_fname}.tar.bz2
}
function do_standard_install {
./configure > /dev/null
make > /dev/null
make install > /dev/null
}
function build_autoconf {
local autoconf_fname=$1
check_var ${autoconf_fname}
local autoconf_sha256=$2
check_var ${autoconf_sha256}
check_var ${AUTOCONF_DOWNLOAD_URL}
curl -sLO ${AUTOCONF_DOWNLOAD_URL}/${autoconf_fname}.tar.gz
check_sha256sum ${autoconf_fname}.tar.gz ${autoconf_sha256}
tar -zxf ${autoconf_fname}.tar.gz
(cd ${autoconf_fname} && do_standard_install)
rm -rf ${autoconf_fname} ${autoconf_fname}.tar.gz
}

View File

@ -1,60 +0,0 @@
# Logic copied from PEP 513
def is_manylinux1_compatible():
# Only Linux, and only x86-64 / i686
from distutils.util import get_platform
if get_platform() not in ["linux-x86_64", "linux-i686", "linux-s390x"]:
return False
# Check for presence of _manylinux module
try:
import _manylinux
return bool(_manylinux.manylinux1_compatible)
except (ImportError, AttributeError):
# Fall through to heuristic check below
pass
# Check glibc version. CentOS 5 uses glibc 2.5.
return have_compatible_glibc(2, 5)
def have_compatible_glibc(major, minimum_minor):
import ctypes
process_namespace = ctypes.CDLL(None)
try:
gnu_get_libc_version = process_namespace.gnu_get_libc_version
except AttributeError:
# Symbol doesn't exist -> therefore, we are not linked to
# glibc.
return False
# Call gnu_get_libc_version, which returns a string like "2.5".
gnu_get_libc_version.restype = ctypes.c_char_p
version_str = gnu_get_libc_version()
# py2 / py3 compatibility:
if not isinstance(version_str, str):
version_str = version_str.decode("ascii")
# Parse string and check against requested version.
version = [int(piece) for piece in version_str.split(".")]
assert len(version) == 2
if major != version[0]:
return False
if minimum_minor > version[1]:
return False
return True
import sys
if is_manylinux1_compatible():
print(f"{sys.executable} is manylinux1 compatible")
sys.exit(0)
else:
print(f"{sys.executable} is NOT manylinux1 compatible")
sys.exit(1)

View File

@ -1,35 +0,0 @@
# cf. https://github.com/pypa/manylinux/issues/53
GOOD_SSL = "https://google.com"
BAD_SSL = "https://self-signed.badssl.com"
import sys
print("Testing SSL certificate checking for Python:", sys.version)
if sys.version_info[:2] < (2, 7) or sys.version_info[:2] < (3, 4):
print("This version never checks SSL certs; skipping tests")
sys.exit(0)
if sys.version_info[0] >= 3:
from urllib.request import urlopen
EXC = OSError
else:
from urllib import urlopen
EXC = IOError
print(f"Connecting to {GOOD_SSL} should work")
urlopen(GOOD_SSL)
print("...it did, yay.")
print(f"Connecting to {BAD_SSL} should fail")
try:
urlopen(BAD_SSL)
# If we get here then we failed:
print("...it DIDN'T!!!!!11!!1one!")
sys.exit(1)
except EXC:
print("...it did, yay.")

View File

@ -134,9 +134,9 @@ opt-einsum==3.3
#Pinned versions: 3.3
#test that import: test_linalg.py
optree==0.12.1
optree==0.11.0
#Description: A library for tree manipulation
#Pinned versions: 0.12.1
#Pinned versions: 0.11.0
#test that import: test_vmap.py, test_aotdispatch.py, test_dynamic_shapes.py,
#test_pytree.py, test_ops.py, test_control_flow.py, test_modules.py,
#common_utils.py, test_eager_transforms.py, test_python_dispatch.py,

View File

@ -1 +1,42 @@
This directory contains scripts for our continuous integration.
One important thing to keep in mind when reading the scripts here is
that they are all based off of Docker images, which we build for each of
the various system configurations we want to run on Jenkins. This means
it is very easy to run these tests yourself:
1. Figure out what Docker image you want. The general template for our
images look like:
``registry.pytorch.org/pytorch/pytorch-$BUILD_ENVIRONMENT:$DOCKER_VERSION``,
where ``$BUILD_ENVIRONMENT`` is one of the build environments
enumerated in
[pytorch-dockerfiles](https://github.com/pytorch/pytorch/blob/master/.ci/docker/build.sh). The dockerfile used by jenkins can be found under the `.ci` [directory](https://github.com/pytorch/pytorch/blob/master/.ci/docker)
2. Run ``docker run -it -u jenkins $DOCKER_IMAGE``, clone PyTorch and
run one of the scripts in this directory.
The Docker images are designed so that any "reasonable" build commands
will work; if you look in [build.sh](build.sh) you will see that it is a
very simple script. This is intentional. Idiomatic build instructions
should work inside all of our Docker images. You can tweak the commands
however you need (e.g., in case you want to rebuild with DEBUG, or rerun
the build with higher verbosity, etc.).
We have to do some work to make this so. Here is a summary of the
mechanisms we use:
- We install binaries to directories like `/usr/local/bin` which
are automatically part of your PATH.
- We add entries to the PATH using Docker ENV variables (so
they apply when you enter Docker) and `/etc/environment` (so they
continue to apply even if you sudo), instead of modifying
`PATH` in our build scripts.
- We use `/etc/ld.so.conf.d` to register directories containing
shared libraries, instead of modifying `LD_LIBRARY_PATH` in our
build scripts.
- We reroute well known paths like `/usr/bin/gcc` to alternate
implementations with `update-alternatives`, instead of setting
`CC` and `CXX` in our implementations.

View File

@ -230,10 +230,6 @@ if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* ]]
export BUILD_STATIC_RUNTIME_BENCHMARK=ON
fi
if [[ "$BUILD_ENVIRONMENT" == *-debug* ]]; then
export CMAKE_BUILD_TYPE=RelWithAssert
fi
# Do not change workspace permissions for ROCm CI jobs
# as it can leave workspace with bad permissions for cancelled jobs
if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then

View File

@ -6,7 +6,6 @@ from cryptography.hazmat.primitives import hashes, serialization
from cryptography.hazmat.primitives.asymmetric import rsa
from cryptography.x509.oid import NameOID
temp_dir = mkdtemp()
print(temp_dir)

View File

@ -18,7 +18,6 @@ time python test/run_test.py --verbose -i distributed/test_c10d_gloo
time python test/run_test.py --verbose -i distributed/test_c10d_nccl
time python test/run_test.py --verbose -i distributed/test_c10d_spawn_gloo
time python test/run_test.py --verbose -i distributed/test_c10d_spawn_nccl
time python test/run_test.py --verbose -i distributed/test_compute_comm_reordering
time python test/run_test.py --verbose -i distributed/test_store
time python test/run_test.py --verbose -i distributed/test_symmetric_memory
time python test/run_test.py --verbose -i distributed/test_pg_wrapper
@ -44,15 +43,16 @@ time python test/run_test.py --verbose -i distributed/_tensor/test_dtensor_compi
time python test/run_test.py --verbose -i distributed/test_device_mesh
# DTensor/TP tests
time python test/run_test.py --verbose -i distributed/tensor/parallel/test_ddp_2d_parallel
time python test/run_test.py --verbose -i distributed/tensor/parallel/test_fsdp_2d_parallel
time python test/run_test.py --verbose -i distributed/tensor/parallel/test_tp_examples
time python test/run_test.py --verbose -i distributed/tensor/parallel/test_tp_random_state
# FSDP2 tests
time python test/run_test.py --verbose -i distributed/_composable/fsdp/test_fully_shard_training -- -k test_2d_mlp_with_nd_mesh
# ND composability tests
time python test/run_test.py --verbose -i distributed/_composable/test_composability/test_2d_composability
time python test/run_test.py --verbose -i distributed/_composable/test_composability/test_pp_composability
# Pipelining composability tests
time python test/run_test.py --verbose -i distributed/pipelining/test_composability.py
# Other tests
time python test/run_test.py --verbose -i test_cuda_primary_ctx

View File

@ -3,7 +3,6 @@ import json
import math
import sys
parser = argparse.ArgumentParser()
parser.add_argument(
"--test-name", dest="test_name", action="store", required=True, help="test name"

View File

@ -3,7 +3,6 @@ import sys
import numpy
sample_data_list = sys.argv[1:]
sample_data_list = [float(v.strip()) for v in sample_data_list]

View File

@ -1,7 +1,6 @@
import json
import sys
data_file_path = sys.argv[1]
commit_hash = sys.argv[2]

View File

@ -1,6 +1,5 @@
import sys
log_file_path = sys.argv[1]
with open(log_file_path) as f:

View File

@ -249,7 +249,9 @@ fi
# This tests that the debug asserts are working correctly.
if [[ "$BUILD_ENVIRONMENT" == *-debug* ]]; then
echo "We are in debug mode: $BUILD_ENVIRONMENT. Expect the python assertion to fail"
(cd test && ! get_exit_code python -c "import torch; torch._C._crash_if_debug_asserts_fail(424242)")
# TODO: Enable the check after we setup the build to run debug asserts without having
# to do a full (and slow) debug build
# (cd test && ! get_exit_code python -c "import torch; torch._C._crash_if_debug_asserts_fail(424242)")
elif [[ "$BUILD_ENVIRONMENT" != *-bazel-* ]]; then
# Noop when debug is disabled. Skip bazel jobs because torch isn't available there yet.
echo "We are not in debug mode: $BUILD_ENVIRONMENT. Expect the assertion to pass"
@ -316,13 +318,14 @@ test_inductor_distributed() {
python test/run_test.py -i inductor/test_aot_inductor.py -k test_replicate_on_devices --verbose
python test/run_test.py -i distributed/test_c10d_functional_native.py --verbose
python test/run_test.py -i distributed/_tensor/test_dtensor_compile.py --verbose
python test/run_test.py -i distributed/tensor/parallel/test_fsdp_2d_parallel.py --verbose
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_comm.py --verbose
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_multi_group --verbose
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_with_activation_checkpointing --verbose
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_2d_mlp --verbose
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_hsdp --verbose
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_2d_transformer_checkpoint_resume --verbose
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_gradient_accumulation --verbose
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_state_dict.py -k test_dp_state_dict_save_load --verbose
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_frozen.py --verbose
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_compute_dtype --verbose
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_reduce_dtype --verbose
@ -403,7 +406,7 @@ if [[ "${TEST_CONFIG}" == *dynamic* ]]; then
DYNAMO_BENCHMARK_FLAGS+=(--dynamic-shapes --dynamic-batch-only)
fi
if [[ "${TEST_CONFIG}" == *cpu* ]]; then
if [[ "${TEST_CONFIG}" == *cpu_inductor* || "${TEST_CONFIG}" == *cpu_aot_inductor* ]]; then
DYNAMO_BENCHMARK_FLAGS+=(--device cpu)
else
DYNAMO_BENCHMARK_FLAGS+=(--device cuda)
@ -427,19 +430,6 @@ test_perf_for_dashboard() {
# TODO: All the accuracy tests can be skipped once the CI accuracy checking is stable enough
local targets=(accuracy performance)
local device=cuda
local taskset=""
if [[ "${TEST_CONFIG}" == *cpu* ]]; then
if [[ "${TEST_CONFIG}" == *cpu_x86* ]]; then
device=cpu_x86
elif [[ "${TEST_CONFIG}" == *cpu_aarch64* ]]; then
device=cpu_aarch64
fi
test_inductor_set_cpu_affinity
end_core=$(( $(test_inductor_get_core_number)-1 ))
taskset="taskset -c 0-$end_core"
fi
for mode in "${modes[@]}"; do
if [[ "$mode" == "inference" ]]; then
dtype=bfloat16
@ -455,56 +445,56 @@ test_perf_for_dashboard() {
fi
if [[ "$DASHBOARD_TAG" == *default-true* ]]; then
$taskset python "benchmarks/dynamo/$suite.py" \
python "benchmarks/dynamo/$suite.py" \
"${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" --disable-cudagraphs "$@" \
--output "$TEST_REPORTS_DIR/${backend}_no_cudagraphs_${suite}_${dtype}_${mode}_${device}_${target}.csv"
--output "$TEST_REPORTS_DIR/${backend}_no_cudagraphs_${suite}_${dtype}_${mode}_cuda_${target}.csv"
fi
if [[ "$DASHBOARD_TAG" == *cudagraphs-true* ]]; then
$taskset python "benchmarks/dynamo/$suite.py" \
python "benchmarks/dynamo/$suite.py" \
"${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" "$@" \
--output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_${suite}_${dtype}_${mode}_${device}_${target}.csv"
--output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_${suite}_${dtype}_${mode}_cuda_${target}.csv"
fi
if [[ "$DASHBOARD_TAG" == *dynamic-true* ]]; then
$taskset python "benchmarks/dynamo/$suite.py" \
python "benchmarks/dynamo/$suite.py" \
"${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" --dynamic-shapes \
--dynamic-batch-only "$@" \
--output "$TEST_REPORTS_DIR/${backend}_dynamic_${suite}_${dtype}_${mode}_${device}_${target}.csv"
--output "$TEST_REPORTS_DIR/${backend}_dynamic_${suite}_${dtype}_${mode}_cuda_${target}.csv"
fi
if [[ "$DASHBOARD_TAG" == *cppwrapper-true* ]] && [[ "$mode" == "inference" ]]; then
TORCHINDUCTOR_CPP_WRAPPER=1 $taskset python "benchmarks/dynamo/$suite.py" \
TORCHINDUCTOR_CPP_WRAPPER=1 python "benchmarks/dynamo/$suite.py" \
"${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" --disable-cudagraphs "$@" \
--output "$TEST_REPORTS_DIR/${backend}_cpp_wrapper_${suite}_${dtype}_${mode}_${device}_${target}.csv"
--output "$TEST_REPORTS_DIR/${backend}_cpp_wrapper_${suite}_${dtype}_${mode}_cuda_${target}.csv"
fi
if [[ "$DASHBOARD_TAG" == *freezing_cudagraphs-true* ]] && [[ "$mode" == "inference" ]]; then
$taskset python "benchmarks/dynamo/$suite.py" \
python "benchmarks/dynamo/$suite.py" \
"${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" "$@" --freezing \
--output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_freezing_${suite}_${dtype}_${mode}_${device}_${target}.csv"
--output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_freezing_${suite}_${dtype}_${mode}_cuda_${target}.csv"
fi
if [[ "$DASHBOARD_TAG" == *freeze_autotune_cudagraphs-true* ]] && [[ "$mode" == "inference" ]]; then
TORCHINDUCTOR_MAX_AUTOTUNE=1 $taskset python "benchmarks/dynamo/$suite.py" \
TORCHINDUCTOR_MAX_AUTOTUNE=1 python "benchmarks/dynamo/$suite.py" \
"${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" "$@" --freezing \
--output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_freezing_autotune_${suite}_${dtype}_${mode}_${device}_${target}.csv"
--output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_freezing_autotune_${suite}_${dtype}_${mode}_cuda_${target}.csv"
fi
if [[ "$DASHBOARD_TAG" == *aotinductor-true* ]] && [[ "$mode" == "inference" ]]; then
TORCHINDUCTOR_ABI_COMPATIBLE=1 $taskset python "benchmarks/dynamo/$suite.py" \
TORCHINDUCTOR_ABI_COMPATIBLE=1 python "benchmarks/dynamo/$suite.py" \
"${target_flag[@]}" --"$mode" --"$dtype" --export-aot-inductor --disable-cudagraphs "$@" \
--output "$TEST_REPORTS_DIR/${backend}_aot_inductor_${suite}_${dtype}_${mode}_${device}_${target}.csv"
--output "$TEST_REPORTS_DIR/${backend}_aot_inductor_${suite}_${dtype}_${mode}_cuda_${target}.csv"
fi
if [[ "$DASHBOARD_TAG" == *maxautotune-true* ]]; then
TORCHINDUCTOR_MAX_AUTOTUNE=1 $taskset python "benchmarks/dynamo/$suite.py" \
TORCHINDUCTOR_MAX_AUTOTUNE=1 python "benchmarks/dynamo/$suite.py" \
"${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" "$@" \
--output "$TEST_REPORTS_DIR/${backend}_max_autotune_${suite}_${dtype}_${mode}_${device}_${target}.csv"
--output "$TEST_REPORTS_DIR/${backend}_max_autotune_${suite}_${dtype}_${mode}_cuda_${target}.csv"
fi
if [[ "$DASHBOARD_TAG" == *cudagraphs_low_precision-true* ]] && [[ "$mode" == "inference" ]]; then
# TODO: This has a new dtype called quant and the benchmarks script needs to be updated to support this.
# The tentative command is as follows. It doesn't work now, but it's ok because we only need mock data
# to fill the dashboard.
$taskset python "benchmarks/dynamo/$suite.py" \
python "benchmarks/dynamo/$suite.py" \
"${target_flag[@]}" --"$mode" --quant --backend "$backend" "$@" \
--output "$TEST_REPORTS_DIR/${backend}_cudagraphs_low_precision_${suite}_quant_${mode}_${device}_${target}.csv" || true
--output "$TEST_REPORTS_DIR/${backend}_cudagraphs_low_precision_${suite}_quant_${mode}_cuda_${target}.csv" || true
# Copy cudagraph results as mock data, easiest choice?
cp "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_${suite}_${dtype}_${mode}_${device}_${target}.csv" \
"$TEST_REPORTS_DIR/${backend}_cudagraphs_low_precision_${suite}_quant_${mode}_${device}_${target}.csv"
cp "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_${suite}_${dtype}_${mode}_cuda_${target}.csv" \
"$TEST_REPORTS_DIR/${backend}_cudagraphs_low_precision_${suite}_quant_${mode}_cuda_${target}.csv"
fi
done
done
@ -585,7 +575,7 @@ test_dynamo_benchmark() {
elif [[ "${TEST_CONFIG}" == *perf* ]]; then
test_single_dynamo_benchmark "dashboard" "$suite" "$shard_id" "$@"
else
if [[ "${TEST_CONFIG}" == *cpu* ]]; then
if [[ "${TEST_CONFIG}" == *cpu_inductor* || "${TEST_CONFIG}" == *cpu_aot_inductor* ]]; then
local dt="float32"
if [[ "${TEST_CONFIG}" == *amp* ]]; then
dt="amp"
@ -656,11 +646,10 @@ test_inductor_torchbench_smoketest_perf() {
done
}
test_inductor_get_core_number() {
echo $(($(lscpu | grep 'Socket(s):' | awk '{print $2}') * $(lscpu | grep 'Core(s) per socket:' | awk '{print $4}')))
}
test_inductor_torchbench_cpu_smoketest_perf(){
TEST_REPORTS_DIR=$(pwd)/test/test-reports
mkdir -p "$TEST_REPORTS_DIR"
test_inductor_set_cpu_affinity(){
#set jemalloc
JEMALLOC_LIB="/usr/lib/x86_64-linux-gnu/libjemalloc.so.2"
IOMP_LIB="$(dirname "$(which python)")/../lib/libiomp5.so"
@ -668,39 +657,32 @@ test_inductor_set_cpu_affinity(){
export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:-1,muzzy_decay_ms:-1"
export KMP_AFFINITY=granularity=fine,compact,1,0
export KMP_BLOCKTIME=1
cores=$(test_inductor_get_core_number)
export OMP_NUM_THREADS=$cores
}
CORES=$(lscpu | grep Core | awk '{print $4}')
export OMP_NUM_THREADS=$CORES
end_core=$(( CORES-1 ))
test_inductor_torchbench_cpu_smoketest_perf(){
TEST_REPORTS_DIR=$(pwd)/test/test-reports
mkdir -p "$TEST_REPORTS_DIR"
test_inductor_set_cpu_affinity
end_core=$(( $(test_inductor_get_core_number)-1 ))
MODELS_SPEEDUP_TARGET=benchmarks/dynamo/expected_ci_speedup_inductor_torchbench_cpu.csv
grep -v '^ *#' < "$MODELS_SPEEDUP_TARGET" | while IFS=',' read -r -a model_cfg
do
local model_name=${model_cfg[0]}
local data_type=${model_cfg[2]}
local speedup_target=${model_cfg[5]}
local backend=${model_cfg[1]}
if [[ ${model_cfg[4]} == "cpp" ]]; then
local data_type=${model_cfg[1]}
local speedup_target=${model_cfg[4]}
if [[ ${model_cfg[3]} == "cpp" ]]; then
export TORCHINDUCTOR_CPP_WRAPPER=1
else
unset TORCHINDUCTOR_CPP_WRAPPER
fi
local output_name="$TEST_REPORTS_DIR/inductor_inference_${model_cfg[0]}_${model_cfg[1]}_${model_cfg[2]}_${model_cfg[3]}_cpu_smoketest.csv"
if [[ ${model_cfg[3]} == "dynamic" ]]; then
if [[ ${model_cfg[2]} == "dynamic" ]]; then
taskset -c 0-"$end_core" python benchmarks/dynamo/torchbench.py \
--inference --performance --"$data_type" -dcpu -n50 --only "$model_name" --dynamic-shapes \
--dynamic-batch-only --freezing --timeout 9000 --"$backend" --output "$output_name"
--dynamic-batch-only --freezing --timeout 9000 --backend=inductor --output "$output_name"
else
taskset -c 0-"$end_core" python benchmarks/dynamo/torchbench.py \
--inference --performance --"$data_type" -dcpu -n50 --only "$model_name" \
--freezing --timeout 9000 --"$backend" --output "$output_name"
--freezing --timeout 9000 --backend=inductor --output "$output_name"
fi
cat "$output_name"
# The threshold value needs to be actively maintained to make this check useful.
@ -1286,7 +1268,7 @@ elif [[ "${TEST_CONFIG}" == *timm* ]]; then
id=$((SHARD_NUMBER-1))
test_dynamo_benchmark timm_models "$id"
elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
if [[ "${TEST_CONFIG}" == *cpu* ]]; then
if [[ "${TEST_CONFIG}" == *cpu_inductor* || "${TEST_CONFIG}" == *cpu_aot_inductor* ]]; then
install_torchaudio cpu
else
install_torchaudio cuda
@ -1303,7 +1285,7 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
elif [[ "${TEST_CONFIG}" == *inductor_torchbench_cpu_smoketest_perf* ]]; then
checkout_install_torchbench timm_vision_transformer phlippe_densenet basic_gnn_gcn \
llama_v2_7b_16h resnet50 timm_efficientnet mobilenet_v3_large timm_resnest \
shufflenet_v2_x1_0 hf_GPT2 yolov3 mobilenet_v2 resnext50_32x4d hf_T5_base
shufflenet_v2_x1_0 hf_GPT2
PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_cpu_smoketest_perf
elif [[ "${TEST_CONFIG}" == *torchbench_gcp_smoketest* ]]; then
checkout_install_torchbench
@ -1312,7 +1294,7 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
checkout_install_torchbench
# Do this after checkout_install_torchbench to ensure we clobber any
# nightlies that torchbench may pull in
if [[ "${TEST_CONFIG}" != *cpu* ]]; then
if [[ "${TEST_CONFIG}" != *cpu_inductor* && "${TEST_CONFIG}" != *cpu_aot_inductor* ]]; then
install_torchrec_and_fbgemm
fi
PYTHONPATH=$(pwd)/torchbench test_dynamo_benchmark torchbench "$id"
@ -1320,19 +1302,21 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper_abi_compatible* ]]; then
install_torchvision
test_inductor_cpp_wrapper_abi_compatible
elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
elif [[ "${TEST_CONFIG}" == *inductor* && "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
install_torchvision
test_inductor_shard 1
test_inductor_aoti
test_inductor_distributed
elif [[ "${TEST_CONFIG}" == *inductor* && "${SHARD_NUMBER}" -gt 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
install_torchvision
test_inductor_shard "${SHARD_NUMBER}"
if [[ "${SHARD_NUMBER}" == 1 ]]; then
test_inductor_aoti
test_inductor_distributed
fi
elif [[ "${TEST_CONFIG}" == *dynamo* ]]; then
elif [[ "${TEST_CONFIG}" == *dynamo* && "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
install_torchvision
test_dynamo_shard 1
test_aten
elif [[ "${TEST_CONFIG}" == *dynamo* && $SHARD_NUMBER -gt 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
install_torchvision
test_dynamo_shard "${SHARD_NUMBER}"
if [[ "${SHARD_NUMBER}" == 1 ]]; then
test_aten
fi
elif [[ "${BUILD_ENVIRONMENT}" == *rocm* && -n "$TESTS_TO_INCLUDE" ]]; then
install_torchvision
test_python_shard "$SHARD_NUMBER"

View File

@ -4,7 +4,6 @@ import os
import subprocess
import sys
COMMON_TESTS = [
(
"Checking that torch is available",

View File

@ -5,7 +5,6 @@ import sys
import yaml
# Need to import modules that lie on an upward-relative path
sys.path.append(os.path.join(sys.path[0], ".."))

View File

@ -46,12 +46,14 @@ if [[ "\$python_nodot" = *310* ]]; then
PROTOBUF_PACKAGE="protobuf>=3.19.0"
fi
if [[ "\$python_nodot" = *39* ]]; then
if [[ "\$python_nodot" = *39* ]]; then
# There's an issue with conda channel priority where it'll randomly pick 1.19 over 1.20
# we set a lower boundary here just to be safe
NUMPY_PIN=">=1.20"
fi
# Move debug wheels out of the package dir so they don't get installed
mkdir -p /tmp/debug_final_pkgs
mv /final_pkgs/debug-*.zip /tmp/debug_final_pkgs || echo "no debug packages to move"
@ -81,7 +83,7 @@ if [[ "$PACKAGE_TYPE" == conda ]]; then
"numpy\${NUMPY_PIN}" \
mkl>=2018 \
ninja \
sympy>=1.12 \
sympy \
typing-extensions \
${PROTOBUF_PACKAGE}
if [[ "$DESIRED_CUDA" == 'cpu' ]]; then
@ -116,18 +118,9 @@ if [[ "$PACKAGE_TYPE" == libtorch ]]; then
cd /tmp/libtorch
fi
if [[ "$GPU_ARCH_TYPE" == xpu ]]; then
# Workaround for __mkl_tmp_MOD unbound variable issue, refer https://github.com/pytorch/pytorch/issues/130543
set +u
source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh
fi
# Test the package
/builder/check_binary.sh
# Clean temp files
cd /builder && git clean -ffdx
# =================== The above code will be executed inside Docker container ===================
EOL
echo

View File

@ -100,20 +100,6 @@ if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_B
fi
fi
# Set triton via PYTORCH_EXTRA_INSTALL_REQUIREMENTS for triton xpu package
if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*xpu.* && $(uname) == "Linux" ]]; then
TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}"
if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then
TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton-xpu.txt)
TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}+${TRITON_SHORTHASH}"
fi
if [[ -z "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" ]]; then
export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${TRITON_REQUIREMENT}"
else
export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS} | ${TRITON_REQUIREMENT}"
fi
fi
cat >"$envfile" <<EOL
# =================== The following code will be executed inside Docker container ===================
export TZ=UTC

View File

@ -29,11 +29,6 @@ if [[ "${USE_SPLIT_BUILD:-false}" == "true" ]]; then
UPLOAD_SUBFOLDER="${UPLOAD_SUBFOLDER}_pypi_pkg"
fi
# this is special build with all dependencies packaged
if [[ ${BUILD_NAME} == *-full* ]]; then
UPLOAD_SUBFOLDER="${UPLOAD_SUBFOLDER}_full"
fi
# Sleep 2 minutes between retries for conda upload
retry () {
"$@" || (sleep 5m && "$@") || (sleep 5m && "$@") || (sleep 5m && "$@") || (sleep 5m && "$@")

View File

@ -8,7 +8,6 @@ import time
import requests
AZURE_PIPELINE_BASE_URL = "https://aiinfra.visualstudio.com/PyTorch/"
AZURE_DEVOPS_PAT_BASE64 = os.environ.get("AZURE_DEVOPS_PAT_BASE64_SECRET", "")
PIPELINE_ID = "911"

View File

@ -5,7 +5,7 @@ git submodule sync
git submodule update --init --recursive
# This takes some time
make setup-lint
make setup_lint
# Add CMAKE_PREFIX_PATH to bashrc
echo 'export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}' >> ~/.bashrc

View File

@ -2,7 +2,7 @@
# NOTE: **Mirror any changes** to this file the [tool.ruff] config in pyproject.toml
# before we can fully move to use ruff
enable-extensions = G
select = B,C,E,F,G,P,SIM1,SIM911,T4,W,B9,TOR0,TOR1,TOR2,TOR9
select = B,C,E,F,G,P,SIM1,T4,W,B9,TOR0,TOR1,TOR2,TOR9
max-line-length = 120
# C408 ignored because we like the dict keyword argument syntax
# E501 is not flexible enough, we're using B950 instead

View File

@ -40,7 +40,3 @@ e6ec0efaf87703c5f889cfc20b29be455885d58d
a53cda1ddc15336dc1ff0ce1eff2a49cdc5f882e
# 2024-01-02 clangformat: fused adam #116583
9dc68d1aa9e554d09344a10fff69f7b50b2d23a0
# 2024-06-28 enable UFMT in `torch/storage.py`
d80939e5e9337e8078f11489afefec59fd42f93b
# 2024-06-28 enable UFMT in `torch.utils.data`
7cf0b90e49689d45be91aa539fdf54cf2ea8a9a3

View File

@ -9,16 +9,14 @@ self-hosted-runner:
- linux.large
- linux.2xlarge
- linux.4xlarge
- linux.9xlarge.ephemeral
- linux.12xlarge
- linux.12xlarge.ephemeral
- linux.24xlarge
- linux.arm64.2xlarge
- linux.4xlarge.nvidia.gpu
- linux.8xlarge.nvidia.gpu
- linux.16xlarge.nvidia.gpu
- linux.g5.4xlarge.nvidia.gpu
# Pytorch/pytorch AWS Linux Runners on Linux Foundation account
# Organization-wide AWS Linux Runners on Linux Foundation account
- lf.linux.large
- lf.linux.2xlarge
- lf.linux.4xlarge
@ -29,28 +27,6 @@ self-hosted-runner:
- lf.linux.8xlarge.nvidia.gpu
- lf.linux.16xlarge.nvidia.gpu
- lf.linux.g5.4xlarge.nvidia.gpu
# Organization-wide AWS Linux Runners with new Amazon 2023 AMI
- amz2023.linux.large
- amz2023.linux.2xlarge
- amz2023.linux.4xlarge
- amz2023.linux.12xlarge
- amz2023.linux.24xlarge
- amz2023.linux.arm64.2xlarge
- amz2023.linux.4xlarge.nvidia.gpu
- amz2023.linux.8xlarge.nvidia.gpu
- amz2023.linux.16xlarge.nvidia.gpu
- amz2023.linux.g5.4xlarge.nvidia.gpu
# Pytorch/pytorch AWS Linux Runners with the new Amazon 2023 AMI on Linux Foundation account
- amz2023.lf.linux.large
- amz2023.lf.linux.2xlarge
- amz2023.lf.linux.4xlarge
- amz2023.lf.linux.12xlarge
- amz2023.lf.linux.24xlarge
- amz2023.lf.linux.arm64.2xlarge
- amz2023.lf.linux.4xlarge.nvidia.gpu
- amz2023.lf.linux.8xlarge.nvidia.gpu
- amz2023.lf.linux.16xlarge.nvidia.gpu
- amz2023.lf.linux.g5.4xlarge.nvidia.gpu
# Repo-specific IBM hosted S390x runner
- linux.s390x
# Organization wide AWS Windows runners
@ -71,5 +47,3 @@ self-hosted-runner:
- macos-latest-xlarge
- macos-13-xlarge
- macos-14-xlarge
# Organization-wide Intel hosted XPU runners
- linux.idc.xpu

View File

@ -36,8 +36,7 @@ runs:
"${DOCKER_IMAGE}"
)
echo "CONTAINER_NAME=${container_name}" >> "$GITHUB_ENV"
if [[ "${GPU_ARCH_TYPE}" != "rocm" && "${BUILD_ENVIRONMENT}" != "linux-aarch64-binary-manywheel" && "${BUILD_ENVIRONMENT}" != "linux-s390x-binary-manywheel" && "${GPU_ARCH_TYPE}" != "xpu" ]]; then
if [[ "${GPU_ARCH_TYPE}" != "rocm" && "${BUILD_ENVIRONMENT}" != "linux-aarch64-binary-manywheel" && "${BUILD_ENVIRONMENT}" != "linux-s390x-binary-manywheel" ]]; then
# Propagate download.pytorch.org IP to container. This is only needed on Linux non aarch64 runner
grep download.pytorch.org /etc/hosts | docker exec -i "${container_name}" bash -c "/bin/cat >> /etc/hosts"
fi
@ -48,9 +47,10 @@ runs:
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
- name: Cleanup docker
if: always() && (env.BUILD_ENVIRONMENT == 'linux-s390x-binary-manywheel' || env.GPU_ARCH_TYPE == 'xpu')
if: always() && env.BUILD_ENVIRONMENT == 'linux-s390x-binary-manywheel'
shell: bash
run: |
# on s390x or xpu stop the container for clean worker stop
# on s390x stop the container for clean worker stop
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop "${{ env.CONTAINER_NAME }}" || true
docker stop $(docker ps -q) || true

View File

@ -1 +1 @@
69b2a0adc2ec03ab99990d7e8be3d4510438c148
b829e936f7cc61b48149f5f957a451a38bf2a178

View File

@ -1 +1 @@
5ea4535f0699f366adb554183a65ebf7dc34a8be
6f0b61e5d782913a0fc7743812f2a8e522189111

View File

@ -152,130 +152,3 @@ runner_types:
is_ephemeral: false
max_available: 250
os: windows
### Setup runner types to test the Amazon Linux 2023 AMI
lf.c.amz2023.linux.12xlarge:
disk_size: 200
instance_type: c5.12xlarge
is_ephemeral: false
max_available: 1000
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.c.amz2023.linux.24xl.spr-metal:
disk_size: 200
instance_type: c7i.metal-24xl
is_ephemeral: false
max_available: 30
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.c.amz2023.linux.16xlarge.spr:
disk_size: 200
instance_type: c7i.16xlarge
is_ephemeral: false
max_available: 30
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.c.amz2023.linux.12xlarge.ephemeral:
disk_size: 200
instance_type: c5.12xlarge
is_ephemeral: true
max_available: 300
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.c.amz2023.linux.16xlarge.nvidia.gpu:
disk_size: 150
instance_type: g3.16xlarge
is_ephemeral: false
max_available: 30
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.c.amz2023.linux.24xlarge:
disk_size: 150
instance_type: c5.24xlarge
is_ephemeral: false
max_available: 250
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.c.amz2023.linux.2xlarge:
disk_size: 150
instance_type: c5.2xlarge
is_ephemeral: false
max_available: 3120
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.c.amz2023.linux.4xlarge:
disk_size: 150
instance_type: c5.4xlarge
is_ephemeral: false
max_available: 1000
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.c.amz2023.linux.4xlarge.nvidia.gpu:
disk_size: 150
instance_type: g3.4xlarge
is_ephemeral: false
max_available: 520
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.c.amz2023.linux.8xlarge.nvidia.gpu:
disk_size: 150
instance_type: g3.8xlarge
is_ephemeral: false
max_available: 400
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.c.amz2023.linux.g4dn.12xlarge.nvidia.gpu:
disk_size: 150
instance_type: g4dn.12xlarge
is_ephemeral: false
max_available: 50
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.c.amz2023.linux.g4dn.metal.nvidia.gpu:
disk_size: 150
instance_type: g4dn.metal
is_ephemeral: false
max_available: 30
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.c.amz2023.linux.g5.48xlarge.nvidia.gpu:
disk_size: 150
instance_type: g5.48xlarge
is_ephemeral: false
max_available: 20
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.c.amz2023.linux.g5.12xlarge.nvidia.gpu:
disk_size: 150
instance_type: g5.12xlarge
is_ephemeral: false
max_available: 150
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.c.amz2023.linux.g5.4xlarge.nvidia.gpu:
disk_size: 150
instance_type: g5.4xlarge
is_ephemeral: false
max_available: 1200
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.c.amz2023.linux.large:
disk_size: 15
instance_type: c5.large
is_ephemeral: false
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.c.amz2023.linux.arm64.2xlarge:
disk_size: 256
instance_type: t4g.2xlarge
is_ephemeral: false
max_available: 200
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.c.amz2023.linux.arm64.m7g.2xlarge:
disk_size: 256
instance_type: m7g.2xlarge
is_ephemeral: false
max_available: 20
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64

View File

@ -152,130 +152,3 @@ runner_types:
is_ephemeral: false
max_available: 250
os: windows
### Setup runner types to test the Amazon Linux 2023 AMI
lf.amz2023.linux.12xlarge:
disk_size: 200
instance_type: c5.12xlarge
is_ephemeral: false
max_available: 1000
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.amz2023.linux.24xl.spr-metal:
disk_size: 200
instance_type: c7i.metal-24xl
is_ephemeral: false
max_available: 30
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.amz2023.linux.16xlarge.spr:
disk_size: 200
instance_type: c7i.16xlarge
is_ephemeral: false
max_available: 30
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.amz2023.linux.12xlarge.ephemeral:
disk_size: 200
instance_type: c5.12xlarge
is_ephemeral: true
max_available: 300
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.amz2023.linux.16xlarge.nvidia.gpu:
disk_size: 150
instance_type: g3.16xlarge
is_ephemeral: false
max_available: 30
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.amz2023.linux.24xlarge:
disk_size: 150
instance_type: c5.24xlarge
is_ephemeral: false
max_available: 250
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.amz2023.linux.2xlarge:
disk_size: 150
instance_type: c5.2xlarge
is_ephemeral: false
max_available: 3120
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.amz2023.linux.4xlarge:
disk_size: 150
instance_type: c5.4xlarge
is_ephemeral: false
max_available: 1000
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.amz2023.linux.4xlarge.nvidia.gpu:
disk_size: 150
instance_type: g3.4xlarge
is_ephemeral: false
max_available: 520
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.amz2023.linux.8xlarge.nvidia.gpu:
disk_size: 150
instance_type: g3.8xlarge
is_ephemeral: false
max_available: 400
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.amz2023.linux.g4dn.12xlarge.nvidia.gpu:
disk_size: 150
instance_type: g4dn.12xlarge
is_ephemeral: false
max_available: 50
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.amz2023.linux.g4dn.metal.nvidia.gpu:
disk_size: 150
instance_type: g4dn.metal
is_ephemeral: false
max_available: 30
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.amz2023.linux.g5.48xlarge.nvidia.gpu:
disk_size: 150
instance_type: g5.48xlarge
is_ephemeral: false
max_available: 20
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.amz2023.linux.g5.12xlarge.nvidia.gpu:
disk_size: 150
instance_type: g5.12xlarge
is_ephemeral: false
max_available: 150
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.amz2023.linux.g5.4xlarge.nvidia.gpu:
disk_size: 150
instance_type: g5.4xlarge
is_ephemeral: false
max_available: 1200
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.amz2023.linux.large:
disk_size: 15
instance_type: c5.large
is_ephemeral: false
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.amz2023.linux.arm64.2xlarge:
disk_size: 256
instance_type: t4g.2xlarge
is_ephemeral: false
max_available: 200
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
lf.amz2023.linux.arm64.m7g.2xlarge:
disk_size: 256
instance_type: m7g.2xlarge
is_ephemeral: false
max_available: 20
os: linux
ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64

View File

@ -286,7 +286,6 @@
- test/cpp/dist_autograd/**
- test/cpp/rpc/**
approved_by:
- wconstab
- mrshenli
- pritamdamania87
- zhaojuanmao
@ -313,25 +312,6 @@
- Lint
- pull
- name: DCP
patterns:
- torch/distributed/checkpoint/**
approved_by:
- LucasLLC
- fegin
- wz337
- saumishr
- daulet-askarov
- pradeepdfb
- kirtiteja
- mhorowitz
- saiteja64
mandatory_checks_name:
- EasyCLA
- Lint
- pull
- name: IDEEP
patterns:
- third_party/ideep
@ -407,7 +387,7 @@
- torch/_inductor/codegen/cpp_template.py
- torch/_inductor/codegen/cpp_gemm_template.py
- test/inductor/test_mkldnn_pattern_matcher.py
- test/inductor/test_cpu_repro.py
- test/inductor/test_cpu_repo.py
- test/inductor/test_cpu_cpp_wrapper.py
- test/inductor/test_cpu_select_algorithm.py
- aten/src/ATen/cpu/**

View File

@ -6,7 +6,6 @@ ciflow_push_tags:
- ciflow/binaries_libtorch
- ciflow/binaries_wheel
- ciflow/inductor
- ciflow/inductor-rocm
- ciflow/inductor-perf-compare
- ciflow/inductor-micro-benchmark
- ciflow/inductor-cu124

View File

@ -1,4 +1,4 @@
# iOS simulator requirements
coremltools==5.0b5
protobuf==3.20.2
optree==0.12.1
optree==0.11.0

View File

@ -17,16 +17,16 @@ pytest-xdist==3.3.1
pytest-rerunfailures==10.3
pytest-flakefinder==1.1.0
scipy==1.10.1
sympy==1.12.1 ; python_version == "3.8"
sympy>=1.13.0 ; python_version >= "3.9"
sympy==1.11.1
unittest-xml-reporting<=3.2.0,>=2.0.0
xdoctest==1.1.0
filelock==3.6.0
sympy==1.11.1
pytest-cpp==2.3.0
rockset==1.0.3
z3-solver==4.12.2.0
tensorboard==2.13.0
optree==0.12.1
optree==0.11.0
# NB: test_hparams_* from test_tensorboard is failing with protobuf 5.26.0 in
# which the stringify metadata is wrong when escaping double quote
protobuf==3.20.2

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python3
import os
import shutil
import sys
@ -8,17 +7,12 @@ from subprocess import check_call
from tempfile import TemporaryDirectory
from typing import Optional
SCRIPT_DIR = Path(__file__).parent
REPO_DIR = SCRIPT_DIR.parent.parent
def read_triton_pin(device: str = "cuda") -> str:
triton_file = "triton.txt"
if device == "rocm":
triton_file = "triton-rocm.txt"
elif device == "xpu":
triton_file = "triton-xpu.txt"
def read_triton_pin(rocm_hash: bool = False) -> str:
triton_file = "triton.txt" if not rocm_hash else "triton-rocm.txt"
with open(REPO_DIR / ".ci" / "docker" / "ci_commit_pins" / triton_file) as f:
return f.read().strip()
@ -55,7 +49,7 @@ def build_triton(
version: str,
commit_hash: str,
build_conda: bool = False,
device: str = "cuda",
build_rocm: bool = False,
py_version: Optional[str] = None,
release: bool = False,
) -> Path:
@ -75,14 +69,11 @@ def build_triton(
triton_basedir = Path(tmpdir) / "triton"
triton_pythondir = triton_basedir / "python"
triton_repo = "https://github.com/openai/triton"
if device == "rocm":
if build_rocm:
triton_pkg_name = "pytorch-triton-rocm"
elif device == "xpu":
triton_pkg_name = "pytorch-triton-xpu"
triton_repo = "https://github.com/intel/intel-xpu-backend-for-triton"
else:
triton_pkg_name = "pytorch-triton"
check_call(["git", "clone", triton_repo, "triton"], cwd=tmpdir)
check_call(["git", "clone", triton_repo], cwd=tmpdir)
if release:
ver, rev, patch = version.split(".")
check_call(
@ -149,7 +140,7 @@ def build_triton(
expected_version=None,
)
if device == "rocm":
if build_rocm:
check_call(
[f"{SCRIPT_DIR}/amd/package_triton_wheel.sh"],
cwd=triton_basedir,
@ -164,7 +155,7 @@ def build_triton(
whl_path = next(iter((triton_pythondir / "dist").glob("*.whl")))
shutil.copy(whl_path, Path.cwd())
if device == "rocm":
if build_rocm:
check_call(
[f"{SCRIPT_DIR}/amd/patch_triton_wheel.sh", Path.cwd()],
cwd=triton_basedir,
@ -179,19 +170,17 @@ def main() -> None:
parser = ArgumentParser("Build Triton binaries")
parser.add_argument("--release", action="store_true")
parser.add_argument("--build-conda", action="store_true")
parser.add_argument(
"--device", type=str, default="cuda", choices=["cuda", "rocm", "xpu"]
)
parser.add_argument("--build-rocm", action="store_true")
parser.add_argument("--py-version", type=str)
parser.add_argument("--commit-hash", type=str)
parser.add_argument("--triton-version", type=str, default=read_triton_version())
args = parser.parse_args()
build_triton(
device=args.device,
build_rocm=args.build_rocm,
commit_hash=args.commit_hash
if args.commit_hash
else read_triton_pin(args.device),
else read_triton_pin(args.build_rocm),
version=args.triton_version,
build_conda=args.build_conda,
py_version=args.py_version,

View File

@ -5,6 +5,7 @@ import sys
from typing import Any
from github_utils import gh_delete_comment, gh_post_pr_comment
from gitutils import get_git_remote_name, get_git_repo_dir, GitRepo
from label_utils import has_required_labels, is_label_err_comment, LABEL_ERR_MSG
from trymerge import GitHubPR

View File

@ -4,9 +4,11 @@ import json
import os
import re
from typing import Any, cast, Dict, List, Optional
from urllib.error import HTTPError
from github_utils import gh_fetch_url, gh_post_pr_comment, gh_query_issues_by_labels
from gitutils import get_git_remote_name, get_git_repo_dir, GitRepo
from trymerge import get_pr_commit_sha, GitHubPR

View File

@ -10,7 +10,6 @@ import requests
import rockset # type: ignore[import]
from gitutils import retries_decorator
LOGS_QUERY = """
with
shas as (

View File

@ -1,12 +1,10 @@
#!/usr/bin/env python3
import sys
from pathlib import Path
from typing import Any, cast, Dict, List, Set
import yaml
GITHUB_DIR = Path(__file__).parent.parent

View File

@ -1,6 +1,7 @@
import json
import subprocess
import sys
from enum import Enum
from pathlib import Path
from typing import NamedTuple, Optional

View File

@ -9,7 +9,6 @@ from typing import Any, Callable, Dict, List, Set
from github_utils import gh_fetch_json_dict, gh_graphql
from gitutils import GitRepo
SEC_IN_DAY = 24 * 60 * 60
CLOSED_PR_RETENTION = 30 * SEC_IN_DAY
NO_PR_RETENTION = 1.5 * 365 * SEC_IN_DAY

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3
import sys
from pathlib import Path
import yaml

View File

@ -14,6 +14,7 @@ import json
from typing import Any
import boto3 # type: ignore[import]
from label_utils import gh_get_labels

View File

@ -15,7 +15,6 @@ from urllib.request import Request, urlopen
import yaml
REENABLE_TEST_REGEX = "(?i)(Close(d|s)?|Resolve(d|s)?|Fix(ed|es)?) (#|https://github.com/pytorch/pytorch/issues/)([0-9]+)"
PREFIX = "test-config/"

View File

@ -8,13 +8,11 @@ architectures:
* CPU
* Latest CUDA
* Latest ROCM
* Latest XPU
"""
import os
from typing import Dict, List, Optional, Tuple
CUDA_ARCHES = ["11.8", "12.1", "12.4"]
@ -26,7 +24,6 @@ CUDA_ARCHES_CUDNN_VERSION = {"11.8": "9", "12.1": "9", "12.4": "9"}
ROCM_ARCHES = ["6.0", "6.1"]
XPU_ARCHES = ["xpu"]
CPU_CXX11_ABI_ARCH = ["cpu-cxx11-abi"]
@ -135,8 +132,6 @@ def arch_type(arch_version: str) -> str:
return "cuda"
elif arch_version in ROCM_ARCHES:
return "rocm"
elif arch_version in XPU_ARCHES:
return "xpu"
elif arch_version in CPU_CXX11_ABI_ARCH:
return "cpu-cxx11-abi"
elif arch_version in CPU_AARCH64_ARCH:
@ -161,7 +156,6 @@ WHEEL_CONTAINER_IMAGES = {
gpu_arch: f"pytorch/manylinux-builder:rocm{gpu_arch}-{DEFAULT_TAG}"
for gpu_arch in ROCM_ARCHES
},
"xpu": f"pytorch/manylinux2_28-builder:xpu-{DEFAULT_TAG}",
"cpu": f"pytorch/manylinux-builder:cpu-{DEFAULT_TAG}",
"cpu-cxx11-abi": f"pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-{DEFAULT_TAG}",
"cpu-aarch64": f"pytorch/manylinuxaarch64-builder:cpu-aarch64-{DEFAULT_TAG}",
@ -227,7 +221,6 @@ def translate_desired_cuda(gpu_arch_type: str, gpu_arch_version: str) -> str:
"cuda": f"cu{gpu_arch_version.replace('.', '')}",
"cuda-aarch64": "cu124",
"rocm": f"rocm{gpu_arch_version}",
"xpu": "xpu",
}.get(gpu_arch_type, gpu_arch_version)
@ -332,13 +325,13 @@ def generate_wheels_matrix(
package_type = "manywheel"
if python_versions is None:
python_versions = FULL_PYTHON_VERSIONS + ["3.13"]
python_versions = FULL_PYTHON_VERSIONS
if arches is None:
# Define default compute archivectures
arches = ["cpu"]
if os == "linux":
arches += CPU_CXX11_ABI_ARCH + CUDA_ARCHES + ROCM_ARCHES + XPU_ARCHES
arches += CPU_CXX11_ABI_ARCH + CUDA_ARCHES + ROCM_ARCHES
elif os == "windows":
arches += CUDA_ARCHES
elif os == "linux-aarch64":
@ -361,16 +354,9 @@ def generate_wheels_matrix(
or arch_version == "cpu-aarch64"
or arch_version == "cpu-s390x"
or arch_version == "cuda-aarch64"
or arch_version == "xpu"
else arch_version
)
# TODO: Enable python 3.13 on rocm, xpu, aarch64, windows
if (
gpu_arch_type in ["rocm", "xpu"] or os != "linux"
) and python_version == "3.13":
continue
# 12.1 linux wheels require PYTORCH_EXTRA_INSTALL_REQUIREMENTS to install
if (
arch_version in ["12.4", "12.1", "11.8"]
@ -410,7 +396,9 @@ def generate_wheels_matrix(
gpu_arch_type, gpu_arch_version
),
"use_split_build": "True",
"devtoolset": "",
"devtoolset": (
"cxx11-abi" if arch_version == "cuda-aarch64" else ""
),
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
"package_type": package_type,
"pytorch_extra_install_requirements": (
@ -423,26 +411,6 @@ def generate_wheels_matrix(
),
}
)
# Special build building to use on Colab. PyThon 3.10 for 12.1 CUDA
if python_version == "3.10" and arch_version == "12.1":
ret.append(
{
"python_version": python_version,
"gpu_arch_type": gpu_arch_type,
"gpu_arch_version": gpu_arch_version,
"desired_cuda": translate_desired_cuda(
gpu_arch_type, gpu_arch_version
),
"use_split_build": "False",
"devtoolset": "",
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
"package_type": package_type,
"pytorch_extra_install_requirements": "",
"build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}-full".replace( # noqa: B950
".", "_"
),
}
)
else:
ret.append(
{
@ -453,9 +421,7 @@ def generate_wheels_matrix(
gpu_arch_type, gpu_arch_version
),
"devtoolset": (
"cxx11-abi"
if arch_version in ["cpu-cxx11-abi", "xpu"]
else ""
"cxx11-abi" if arch_version == "cpu-cxx11-abi" else ""
),
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
"package_type": package_type,

View File

@ -8,8 +8,8 @@ from typing import Dict, Iterable, List, Literal, Set
from typing_extensions import TypedDict # Python 3.11+
import generate_binary_build_matrix # type: ignore[import]
import jinja2
import jinja2
Arch = Literal["windows", "linux", "macos"]

View File

@ -16,7 +16,6 @@ from typing import Dict, List
import generate_binary_build_matrix
DOCKER_IMAGE_TYPES = ["runtime", "devel"]

View File

@ -4,11 +4,11 @@ import argparse
import os
import re
import subprocess
from datetime import datetime
from distutils.util import strtobool
from pathlib import Path
LEADING_V_PATTERN = re.compile("^v")
TRAILING_RC_PATTERN = re.compile("-rc[0-9]*$")
LEGACY_BASE_VERSION_SUFFIX_PATTERN = re.compile("a0$")

View File

@ -11,6 +11,7 @@ import sys
import time
import urllib
import urllib.parse
from typing import Any, Callable, Dict, List, Optional, Tuple
from urllib.request import Request, urlopen

View File

@ -3,6 +3,7 @@
import json
import os
import warnings
from dataclasses import dataclass
from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Union
from urllib.error import HTTPError

View File

@ -19,7 +19,6 @@ from typing import (
Union,
)
T = TypeVar("T")
RE_GITHUB_URL_MATCH = re.compile("^https://.*@?github.com/(.+)/(.+)$")

View File

@ -1,12 +1,12 @@
"""GitHub Label Utilities."""
import json
from functools import lru_cache
from typing import Any, List, Tuple, TYPE_CHECKING, Union
from github_utils import gh_fetch_url_and_headers, GitHubComment
# TODO: this is a temp workaround to avoid circular dependencies,
# and should be removed once GitHubPR is refactored out of trymerge script.
if TYPE_CHECKING:

View File

@ -9,7 +9,6 @@ from pytest_caching_utils import (
upload_pytest_cache,
)
TEMP_DIR = "./tmp" # a backup location in case one isn't provided

View File

@ -14,7 +14,6 @@ from file_io_utils import (
zip_folder,
)
PYTEST_CACHE_KEY_PREFIX = "pytest_cache"
PYTEST_CACHE_DIR_NAME = ".pytest_cache"
BUCKET = "gha-artifacts"

View File

@ -12,7 +12,6 @@ from github.Issue import Issue
WORKFLOW_LABEL_META = "" # use meta runners
WORKFLOW_LABEL_LF = "lf." # use runners from the linux foundation
WORKFLOW_LABEL_LF_CANARY = "lf.c." # use canary runners from the linux foundation
GITHUB_OUTPUT = os.getenv("GITHUB_OUTPUT", "")
GH_OUTPUT_KEY_LABEL_TYPE = "label-type"
@ -204,10 +203,6 @@ def main() -> None:
)
label_type = WORKFLOW_LABEL_META
# For Canary builds use canary runners
if args.github_repo == "pytorch/pytorch-canary" and label_type == WORKFLOW_LABEL_LF:
label_type = WORKFLOW_LABEL_LF_CANARY
set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, label_type)

View File

@ -2,7 +2,7 @@
set -eoux pipefail
SYNC_BRANCH=pytorch-stable-prototype
SYNC_BRANCH=fbcode/pytorch-stable-prototype
git config user.email "fake@example.com"
git config user.name "PyTorch Stable Bot"
@ -11,9 +11,7 @@ git fetch origin main
git fetch origin "$SYNC_BRANCH"
git checkout "$SYNC_BRANCH"
# Using a hardcoded SHA here is a massive speedup as we can skip the entire history of the pytorch GitHub repo.
# This specific SHA was chosen as it was before the "branch point" of the stable branch
for SHA in $(git log ba3b05fdf37ddbc3c301294d6a560a816335e717..origin/main --pretty="%h" -- torch/distributed torch/csrc/distributed test/distributed test/cpp/c10d benchmarks/distributed)
for SHA in $(git log 4333e122d4b74cdf84351ed2907045c6a767b4cd..origin/main --pretty="%h" --reverse -- torch/distributed torch/csrc/distributed test/distributed test/cpp/c10d benchmarks/distributed)
do
# `git merge-base --is-ancestor` exits with code 0 if the given SHA is an ancestor, and non-0 otherwise
if git merge-base --is-ancestor $SHA HEAD || [[ $(git log --grep="(cherry picked from commit $SHA") ]]
@ -22,12 +20,7 @@ do
continue
fi
echo "Copying $SHA"
git cherry-pick -x "$SHA" -X theirs
git reset --soft HEAD~1
git add torch/distributed torch/csrc/distributed test/distributed test/cpp/c10d benchmarks/distributed
git checkout .
git commit --reuse-message=HEAD@{1}
git clean -f
git cherry-pick -x "$SHA"
done
if [[ "${WITH_PUSH}" == true ]]; then

View File

@ -41,7 +41,7 @@ def main() -> None:
)
options = parser.parse_args()
tagged_images: Dict[str, bool] = {}
tagged_images: Dict[str, bool] = dict()
platform_images = [
generate_binary_build_matrix.WHEEL_CONTAINER_IMAGES,
generate_binary_build_matrix.LIBTORCH_CONTAINER_IMAGES,

View File

@ -7,7 +7,6 @@ cd llm-target-determinator
pip install -q -r requirements.txt
cd ../codellama
pip install -e .
pip install numpy==1.26.0
# Run indexer
cd ../llm-target-determinator

View File

@ -17,7 +17,9 @@ from unittest import main, mock, skip, TestCase
from urllib.error import HTTPError
from github_utils import gh_graphql
from gitutils import get_git_remote_name, get_git_repo_dir, GitRepo
from trymerge import (
categorize_checks,
DRCI_CHECKRUN_NAME,
@ -37,7 +39,6 @@ from trymerge import (
validate_revert,
)
if "GIT_REMOTE_URL" not in os.environ:
os.environ["GIT_REMOTE_URL"] = "https://github.com/pytorch/pytorch"

View File

@ -45,6 +45,7 @@ from github_utils import (
gh_update_pr_state,
GitHubComment,
)
from gitutils import (
are_ghstack_branches_in_sync,
get_git_remote_name,
@ -61,7 +62,6 @@ from label_utils import (
)
from trymerge_explainer import get_revert_message, TryMergeExplainer
# labels
MERGE_IN_PROGRESS_LABEL = "merging"
MERGE_COMPLETE_LABEL = "merged"
@ -1459,7 +1459,7 @@ def find_matching_merge_rule(
if not skip_internal_checks and pr.has_internal_changes():
raise RuntimeError(
"This PR has internal changes and must be landed via Phabricator! Please try reimporting/rexporting the PR!"
"This PR has internal changes and must be landed via Phabricator"
)
# Categorize all checks when skip_mandatory_checks (force merge) is set. Do it here

View File

@ -11,7 +11,6 @@ from github_utils import gh_post_pr_comment as gh_post_comment
from gitutils import get_git_remote_name, get_git_repo_dir, GitRepo
from trymerge import GitHubPR
SAME_SHA_ERROR = (
"\n```\nAborting rebase because rebasing the branch resulted in the same sha as the target branch.\n"
+ "This usually happens because the PR has already been merged. Please rebase locally and push.\n```"

View File

@ -81,7 +81,7 @@ jobs:
!{{ config["build_name"] }}-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs: !{{ config["build_name"] }}-build
{%- if config["gpu_arch_type"] not in ["rocm", "xpu"] %}
{%- if config["gpu_arch_type"] != "rocm" %}
uses: ./.github/workflows/_binary-test-linux.yml
with:!{{ upload.binary_env_as_input(config) }}
build_name: !{{ config["build_name"] }}
@ -101,40 +101,6 @@ jobs:
{%- endif %}
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
{%- elif config["gpu_arch_type"] == "xpu" %}
runs-on: linux.idc.xpu
timeout-minutes: !{{ common.timeout_minutes }}
!{{ upload.binary_env(config) }}
permissions:
id-token: write
contents: read
steps:
- name: Setup XPU
uses: ./.github/actions/setup-xpu
- name: configure aws credentials
id: aws_creds
uses: aws-actions/configure-aws-credentials@v1.7.0
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
aws-region: us-east-1
- name: Login to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2
- uses: !{{ common.download_artifact_action }}
name: Download Build Artifacts
with:
name: !{{ config["build_name"] }}
path: "${{ runner.temp }}/artifacts/"
!{{ common.checkout(deep_clone=False, directory="pytorch") }}
!{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch) }}
- name: Pull Docker image
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
with:
docker-image: !{{ config["container_image"] }}
- name: Test Pytorch binary
uses: ./pytorch/.github/actions/test-pytorch-binary
- name: Teardown XPU
uses: ./.github/actions/teardown-xpu
{%- else %}
runs-on: linux.rocm.gpu
timeout-minutes: !{{ common.timeout_minutes }}

105
.github/workflows/_linux-build-rg.yml vendored Normal file
View File

@ -0,0 +1,105 @@
name: linux-build-rg
on:
workflow_call:
inputs:
build-environment:
required: true
type: string
description: Top-level label for what's being built/tested.
docker-image-name:
required: true
type: string
description: Name of the base docker image to build with.
build-generates-artifacts:
required: false
type: boolean
default: true
description: If set, upload generated build artifacts.
build-with-debug:
required: false
type: boolean
default: false
description: If set, build in debug mode.
sync-tag:
required: false
type: string
default: ""
description: |
If this is set, our linter will use this to make sure that every other
job with the same `sync-tag` is identical.
cuda-arch-list:
required: false
type: string
default: "5.2"
description: |
List of CUDA architectures CI build should target.
runner-group:
required: false
type: string
default: "arc-lf-linux.2xlarge"
description: Runner group to select group type
test-matrix:
required: false
type: string
description: |
An option JSON description of what test configs to run later on. This
is moved here from the Linux test workflow so that we can apply filter
logic using test-config labels earlier and skip unnecessary builds
s3-bucket:
description: S3 bucket to download artifact
required: false
type: string
default: "gha-artifacts"
aws-role-to-assume:
description: role to assume for downloading artifacts
required: false
type: string
default: ""
secrets:
HUGGING_FACE_HUB_TOKEN:
required: false
description: |
HF Auth token to avoid rate limits when downloading models or datasets from hub
outputs:
docker-image:
value: ${{ jobs.build.outputs.docker-image }}
description: The docker image containing the built PyTorch.
test-matrix:
value: ${{ jobs.build.outputs.test-matrix }}
description: An optional JSON description of what test configs to run later on.
jobs:
build:
# Don't run on forked repos
if: github.repository_owner == 'pytorch'
runs-on:
group: ${{ inputs.runner-group }}
timeout-minutes: 240
outputs:
docker-image: ${{ steps.linux-build.outputs.docker-image }}
test-matrix: ${{ steps.linux-build.outputs.test-matrix }}
steps:
# [pytorch repo ref]
# Use a pytorch/pytorch reference instead of a reference to the local
# checkout because when we run this action we don't *have* a local
# checkout. In other cases you should prefer a local checkout.
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
- name: Linux Build
id: linux-build
uses: ./.github/actions/linux-build
with:
build-environment: ${{ inputs.build-environment }}
docker-image-name: ${{ inputs.docker-image-name }}
build-generates-artifacts: ${{ inputs.build-generates-artifacts }}
build-with-debug: ${{ inputs.build-with-debug }}
sync-tag: ${{ inputs.sync-tag }}
cuda-arch-list: ${{ inputs.cuda-arch-list }}
test-matrix: ${{ inputs.test-matrix }}
s3-bucket: ${{ inputs.s3-bucket }}
aws-role-to-assume: ${{ inputs.aws-role-to-assume }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}

View File

@ -39,7 +39,7 @@ on:
type: string
default: "linux.2xlarge"
description: |
Label of the runner this job should run on.
List of CUDA architectures CI build should target.
test-matrix:
required: false
type: string
@ -228,7 +228,7 @@ jobs:
- name: Store PyTorch Build Artifacts on S3
uses: seemethere/upload-artifact-s3@v5
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build != 'true'
with:
name: ${{ inputs.build-environment }}
retention-days: 14
@ -236,9 +236,9 @@ jobs:
path: artifacts.zip
s3-bucket: ${{ inputs.s3-bucket }}
- name: Store PyTorch Build Artifacts on S3 for split build
- name: Store PyTorch Build Artifacts on S3
uses: seemethere/upload-artifact-s3@v5
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build == 'true'
with:
name: ${{ inputs.build-environment }}-experimental-split-build
retention-days: 14

85
.github/workflows/_linux-test-label.yml vendored Normal file
View File

@ -0,0 +1,85 @@
name: linux-test-rg
on:
workflow_call:
inputs:
build-environment:
required: true
type: string
description: Top-level label for what's being built/tested.
test-matrix:
required: true
type: string
description: JSON description of what test configs to run.
docker-image:
required: true
type: string
description: Docker image to run in.
sync-tag:
required: false
type: string
default: ""
description: |
If this is set, our linter will use this to make sure that every other
job with the same `sync-tag` is identical.
timeout-minutes:
required: false
type: number
default: 240
description: |
Set the maximum (in minutes) how long the workflow should take to finish
use-gha:
required: false
type: string
default: ""
description: If set to any value, upload to GHA. Otherwise upload to S3.
dashboard-tag:
required: false
type: string
default: ""
s3-bucket:
description: S3 bucket to download artifact
required: false
type: string
default: "gha-artifacts"
aws-role-to-assume:
description: role to assume for downloading artifacts
required: false
type: string
default: ""
secrets:
HUGGING_FACE_HUB_TOKEN:
required: false
description: |
HF Auth token to avoid rate limits when downloading models or datasets from hub
env:
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
jobs:
test:
# Don't run on forked repos or empty test matrix
if: github.repository_owner == 'pytorch' && toJSON(fromJSON(inputs.test-matrix).include) != '[]'
strategy:
matrix: ${{ fromJSON(inputs.test-matrix) }}
fail-fast: false
runs-on: ${{ matrix.runner }}
timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
steps:
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
- name: Linux Test
id: linux-test
uses: ./.github/actions/linux-test
with:
build-environment: ${{ inputs.build-environment }}
test-matrix: ${{ inputs.test-matrix }}
docker-image: ${{ inputs.docker-image }}
sync-tag: ${{ inputs.sync-tag }}
use-gha: ${{ inputs.use-gha }}
dashboard-tag: ${{ inputs.dashboard-tag }}
s3-bucket: ${{ inputs.s3-bucket }}
aws-role-to-assume: ${{ inputs.aws-role-to-assume }}
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

86
.github/workflows/_linux-test-rg.yml vendored Normal file
View File

@ -0,0 +1,86 @@
name: linux-test-label
on:
workflow_call:
inputs:
build-environment:
required: true
type: string
description: Top-level label for what's being built/tested.
test-matrix:
required: true
type: string
description: JSON description of what test configs to run.
docker-image:
required: true
type: string
description: Docker image to run in.
sync-tag:
required: false
type: string
default: ""
description: |
If this is set, our linter will use this to make sure that every other
job with the same `sync-tag` is identical.
timeout-minutes:
required: false
type: number
default: 240
description: |
Set the maximum (in minutes) how long the workflow should take to finish
use-gha:
required: false
type: string
default: ""
description: If set to any value, upload to GHA. Otherwise upload to S3.
dashboard-tag:
required: false
type: string
default: ""
s3-bucket:
description: S3 bucket to download artifact
required: false
type: string
default: "gha-artifacts"
aws-role-to-assume:
description: role to assume for downloading artifacts
required: false
type: string
default: ""
secrets:
HUGGING_FACE_HUB_TOKEN:
required: false
description: |
HF Auth token to avoid rate limits when downloading models or datasets from hub
env:
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
jobs:
test:
# Don't run on forked repos or empty test matrix
if: github.repository_owner == 'pytorch' && toJSON(fromJSON(inputs.test-matrix).include) != '[]'
strategy:
matrix: ${{ fromJSON(inputs.test-matrix) }}
fail-fast: false
runs-on:
group: ${{ matrix.runner }}
timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
steps:
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
- name: Linux Test
id: linux-test
uses: ./.github/actions/linux-test
with:
build-environment: ${{ inputs.build-environment }}
test-matrix: ${{ inputs.test-matrix }}
docker-image: ${{ inputs.docker-image }}
sync-tag: ${{ inputs.sync-tag }}
use-gha: ${{ inputs.use-gha }}
dashboard-tag: ${{ inputs.dashboard-tag }}
s3-bucket: ${{ inputs.s3-bucket }}
aws-role-to-assume: ${{ inputs.aws-role-to-assume }}
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

View File

@ -54,7 +54,6 @@ jobs:
# Hardcoding below is temporary for testing ALI runners
# This file below should match the script found in .github/scripts/runner_determinator.py
- name: Hardcode runner-determinator script
id: hardcode-script
run: |
cat <<EOF > runner_determinator.py
# flake8: noqa: G004
@ -71,7 +70,6 @@ jobs:
WORKFLOW_LABEL_META = "" # use meta runners
WORKFLOW_LABEL_LF = "lf." # use runners from the linux foundation
WORKFLOW_LABEL_LF_CANARY = "lf.c." # use canary runners from the linux foundation
GITHUB_OUTPUT = os.getenv("GITHUB_OUTPUT", "")
GH_OUTPUT_KEY_LABEL_TYPE = "label-type"
@ -263,12 +261,9 @@ jobs:
)
label_type = WORKFLOW_LABEL_META
# For Canary builds use canary runners
if args.github_repo == "pytorch/pytorch-canary" and label_type == WORKFLOW_LABEL_LF:
label_type = WORKFLOW_LABEL_LF_CANARY
set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, label_type)
if __name__ == "__main__":
main()
EOF

View File

@ -30,12 +30,6 @@ on:
An option JSON description of what test configs to run later on. This
is moved here from the Linux test workflow so that we can apply filter
logic using test-config labels earlier and skip unnecessary builds
runner:
required: false
type: string
default: "windows.4xlarge.nonephemeral"
description: |
Label of the runner this job should run on.
outputs:
test-matrix:
@ -49,7 +43,7 @@ jobs:
build:
# Don't run on forked repos.
if: github.repository_owner == 'pytorch'
runs-on: ${{ inputs.runner }}
runs-on: [self-hosted, windows.4xlarge.nonephemeral]
timeout-minutes: 240
outputs:
test-matrix: ${{ steps.filter.outputs.test-matrix }}

View File

@ -188,7 +188,7 @@ jobs:
run: |
pushd "${PYTORCH_FINAL_PACKAGE_DIR}"
# shellcheck disable=SC2046,SC2102
python3 -mpip install $(echo *.whl)[opt-einsum,optree] optree==0.12.1
python3 -mpip install $(echo *.whl)[opt-einsum,optree]
popd
.ci/pytorch/win-test.sh

Some files were not shown because too many files have changed in this diff Show More