Compare commits

..

1 Commits

Author SHA1 Message Date
2d757f6517 apply 2025-02-18 11:24:34 -08:00
1825 changed files with 37236 additions and 94611 deletions

View File

@ -20,7 +20,7 @@ cd /
# on the mounted pytorch repo
git config --global --add safe.directory /pytorch
pip install -r /pytorch/requirements.txt
pip install auditwheel==6.2.0
pip install auditwheel
if [ "$DESIRED_CUDA" = "cpu" ]; then
echo "BASE_CUDA_VERSION is not set. Building cpu wheel."
#USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files

View File

@ -39,7 +39,7 @@ def build_ArmComputeLibrary() -> None:
"clone",
"https://github.com/ARM-software/ComputeLibrary.git",
"-b",
"v25.02",
"v24.09",
"--depth",
"1",
"--shallow-submodules",
@ -99,14 +99,10 @@ def update_wheel(wheel_path, desired_cuda) -> None:
if "126" in desired_cuda:
libs_to_copy += [
"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.6",
"/usr/local/cuda/lib64/libcufile.so.0",
"/usr/local/cuda/lib64/libcufile_rdma.so.1",
]
elif "128" in desired_cuda:
libs_to_copy += [
"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.8",
"/usr/local/cuda/lib64/libcufile.so.0",
"/usr/local/cuda/lib64/libcufile_rdma.so.1",
]
else:
libs_to_copy += [
@ -208,7 +204,7 @@ if __name__ == "__main__":
else:
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 "
elif branch.startswith(("v1.", "v2.")):
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1 : branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "
if enable_mkldnn:
build_ArmComputeLibrary()

View File

@ -19,11 +19,13 @@ import boto3
# AMI images for us-east-1, change the following based on your ~/.aws/config
os_amis = {
"ubuntu18_04": "ami-078eece1d8119409f", # login_name: ubuntu
"ubuntu20_04": "ami-052eac90edaa9d08f", # login_name: ubuntu
"ubuntu22_04": "ami-0c6c29c5125214c77", # login_name: ubuntu
"redhat8": "ami-0698b90665a2ddcf1", # login_name: ec2-user
}
ubuntu18_04_ami = os_amis["ubuntu18_04"]
ubuntu20_04_ami = os_amis["ubuntu20_04"]
@ -327,7 +329,7 @@ def build_ArmComputeLibrary(host: RemoteHost, git_clone_flags: str = "") -> None
]
)
host.run_cmd(
f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v25.02 {git_clone_flags}"
f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v24.09 {git_clone_flags}"
)
host.run_cmd(f"cd ComputeLibrary && scons Werror=1 -j8 {acl_build_flags}")
@ -657,6 +659,18 @@ def configure_system(
"sudo apt-get install -y python3-dev python3-yaml python3-setuptools python3-wheel python3-pip"
)
host.run_cmd("pip3 install dataclasses typing-extensions")
# Install and switch to gcc-8 on Ubuntu-18.04
if not host.using_docker() and host.ami == ubuntu18_04_ami and compiler == "gcc-8":
host.run_cmd("sudo apt-get install -y g++-8 gfortran-8")
host.run_cmd(
"sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 100"
)
host.run_cmd(
"sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-8 100"
)
host.run_cmd(
"sudo update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-8 100"
)
if not use_conda:
print("Installing Cython + numpy from PyPy")
host.run_cmd("sudo pip3 install Cython")
@ -747,7 +761,7 @@ def start_build(
version = host.check_output("cat pytorch/version.txt").strip()[:-2]
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1"
if branch.startswith(("v1.", "v2.")):
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1 : branch.find('-')]} PYTORCH_BUILD_NUMBER=1"
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1"
if host.using_docker():
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
if enable_mkldnn:

View File

@ -1,8 +1,4 @@
#!/bin/bash
# The purpose of this script is to:
# 1. Extract the set of parameters to be used for a docker build based on the provided image name.
# 2. Run docker build with the parameters found in step 1.
# 3. Run the built image and print out the expected and actual versions of packages installed.
set -ex
@ -99,12 +95,13 @@ fi
# configuration, so we hardcode everything here rather than do it
# from scratch
case "$image" in
pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc11)
CUDA_VERSION=12.6.3
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9)
CUDA_VERSION=12.4.1
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=11
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
@ -118,6 +115,7 @@ case "$image" in
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
@ -132,6 +130,7 @@ case "$image" in
ANACONDA_PYTHON_VERSION=3.12
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
@ -146,61 +145,7 @@ case "$image" in
ANACONDA_PYTHON_VERSION=3.13
GCC_VERSION=9
PROTOBUF=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
CONDA_CMAKE=yes
TRITON=yes
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9)
CUDA_VERSION=12.6.3
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9-inductor-benchmarks)
CUDA_VERSION=12.6.3
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
CONDA_CMAKE=yes
TRITON=yes
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-focal-cuda12.6-cudnn9-py3.12-gcc9-inductor-benchmarks)
CUDA_VERSION=12.6.3
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.12
GCC_VERSION=9
PROTOBUF=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
CONDA_CMAKE=yes
TRITON=yes
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-focal-cuda12.6-cudnn9-py3.13-gcc9-inductor-benchmarks)
CUDA_VERSION=12.6.3
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.13
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
@ -215,6 +160,7 @@ case "$image" in
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
@ -226,6 +172,7 @@ case "$image" in
ANACONDA_PYTHON_VERSION=3.9
CLANG_VERSION=10
PROTOBUF=yes
DB=yes
VISION=yes
CONDA_CMAKE=yes
ONNX=yes
@ -234,7 +181,10 @@ case "$image" in
ANACONDA_PYTHON_VERSION=3.9
CLANG_VERSION=10
PROTOBUF=yes
DB=yes
VISION=yes
VULKAN_SDK_VERSION=1.2.162.1
SWIFTSHADER=yes
CONDA_CMAKE=yes
TRITON=yes
;;
@ -242,7 +192,10 @@ case "$image" in
ANACONDA_PYTHON_VERSION=3.11
CLANG_VERSION=10
PROTOBUF=yes
DB=yes
VISION=yes
VULKAN_SDK_VERSION=1.2.162.1
SWIFTSHADER=yes
CONDA_CMAKE=yes
TRITON=yes
;;
@ -250,6 +203,7 @@ case "$image" in
ANACONDA_PYTHON_VERSION=3.9
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
CONDA_CMAKE=yes
TRITON=yes
@ -258,6 +212,7 @@ case "$image" in
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=11
PROTOBUF=yes
DB=yes
VISION=yes
ROCM_VERSION=6.2.4
NINJA_VERSION=1.9.0
@ -272,6 +227,7 @@ case "$image" in
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=11
PROTOBUF=yes
DB=yes
VISION=yes
ROCM_VERSION=6.3
NINJA_VERSION=1.9.0
@ -286,6 +242,7 @@ case "$image" in
ANACONDA_PYTHON_VERSION=3.9
GCC_VERSION=11
PROTOBUF=yes
DB=yes
VISION=yes
XPU_VERSION=0.5
NINJA_VERSION=1.9.0
@ -296,6 +253,7 @@ case "$image" in
ANACONDA_PYTHON_VERSION=3.9
GCC_VERSION=11
PROTOBUF=yes
DB=yes
VISION=yes
XPU_VERSION=2025.0
NINJA_VERSION=1.9.0
@ -306,6 +264,7 @@ case "$image" in
ANACONDA_PYTHON_VERSION=3.9
GCC_VERSION=11
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
CONDA_CMAKE=yes
@ -319,6 +278,7 @@ case "$image" in
CUDNN_VERSION=9
CLANG_VERSION=12
PROTOBUF=yes
DB=yes
VISION=yes
TRITON=yes
;;
@ -326,6 +286,7 @@ case "$image" in
ANACONDA_PYTHON_VERSION=3.9
CLANG_VERSION=12
PROTOBUF=yes
DB=yes
VISION=yes
CONDA_CMAKE=yes
TRITON=yes
@ -346,6 +307,7 @@ case "$image" in
ANACONDA_PYTHON_VERSION=3.9
GCC_VERSION=11
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
CONDA_CMAKE=yes
@ -360,7 +322,7 @@ case "$image" in
EXECUTORCH=yes
;;
pytorch-linux-jammy-py3.12-halide)
CUDA_VERSION=12.6
CUDA_VERSION=12.4
ANACONDA_PYTHON_VERSION=3.12
GCC_VERSION=11
CONDA_CMAKE=yes
@ -368,7 +330,7 @@ case "$image" in
TRITON=yes
;;
pytorch-linux-jammy-py3.12-triton-cpu)
CUDA_VERSION=12.6
CUDA_VERSION=12.4
ANACONDA_PYTHON_VERSION=3.12
GCC_VERSION=11
CONDA_CMAKE=yes
@ -378,19 +340,20 @@ case "$image" in
# TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
# We will need to update mypy version eventually, but that's for another day. The task
# would be to upgrade mypy to 1.0.0 with Python 3.11
PYTHON_VERSION=3.9
PIP_CMAKE=yes
ANACONDA_PYTHON_VERSION=3.9
CONDA_CMAKE=yes
;;
pytorch-linux-jammy-cuda11.8-cudnn9-py3.9-linter)
PYTHON_VERSION=3.9
ANACONDA_PYTHON_VERSION=3.9
CUDA_VERSION=11.8
PIP_CMAKE=yes
CONDA_CMAKE=yes
;;
pytorch-linux-jammy-aarch64-py3.10-gcc11)
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=11
ACL=yes
PROTOBUF=yes
DB=yes
VISION=yes
CONDA_CMAKE=yes
# snadampal: skipping llvm src build install because the current version
@ -402,6 +365,7 @@ case "$image" in
GCC_VERSION=11
ACL=yes
PROTOBUF=yes
DB=yes
VISION=yes
CONDA_CMAKE=yes
# snadampal: skipping llvm src build install because the current version
@ -412,6 +376,7 @@ case "$image" in
*)
# Catch-all for builds that are not hardcoded.
PROTOBUF=yes
DB=yes
VISION=yes
echo "image '$image' did not match an existing build configuration"
if [[ "$image" == *py* ]]; then
@ -467,6 +432,7 @@ docker build \
--build-arg "BUILD_ENVIRONMENT=${image}" \
--build-arg "PROTOBUF=${PROTOBUF:-}" \
--build-arg "LLVMDEV=${LLVMDEV:-}" \
--build-arg "DB=${DB:-}" \
--build-arg "VISION=${VISION:-}" \
--build-arg "UBUNTU_VERSION=${UBUNTU_VERSION}" \
--build-arg "CENTOS_VERSION=${CENTOS_VERSION}" \
@ -474,12 +440,13 @@ docker build \
--build-arg "GLIBC_VERSION=${GLIBC_VERSION}" \
--build-arg "CLANG_VERSION=${CLANG_VERSION}" \
--build-arg "ANACONDA_PYTHON_VERSION=${ANACONDA_PYTHON_VERSION}" \
--build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \
--build-arg "GCC_VERSION=${GCC_VERSION}" \
--build-arg "CUDA_VERSION=${CUDA_VERSION}" \
--build-arg "CUDNN_VERSION=${CUDNN_VERSION}" \
--build-arg "TENSORRT_VERSION=${TENSORRT_VERSION}" \
--build-arg "GRADLE_VERSION=${GRADLE_VERSION}" \
--build-arg "VULKAN_SDK_VERSION=${VULKAN_SDK_VERSION}" \
--build-arg "SWIFTSHADER=${SWIFTSHADER}" \
--build-arg "CMAKE_VERSION=${CMAKE_VERSION:-}" \
--build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
--build-arg "KATEX=${KATEX:-}" \
@ -489,7 +456,6 @@ docker build \
--build-arg "UCX_COMMIT=${UCX_COMMIT}" \
--build-arg "UCC_COMMIT=${UCC_COMMIT}" \
--build-arg "CONDA_CMAKE=${CONDA_CMAKE}" \
--build-arg "PIP_CMAKE=${PIP_CMAKE}" \
--build-arg "TRITON=${TRITON}" \
--build-arg "TRITON_CPU=${TRITON_CPU}" \
--build-arg "ONNX=${ONNX}" \

View File

@ -55,6 +55,13 @@ RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
RUN rm install_protobuf.sh
ENV INSTALLED_PROTOBUF ${PROTOBUF}
# (optional) Install database packages like LMDB and LevelDB
ARG DB
COPY ./common/install_db.sh install_db.sh
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
RUN rm install_db.sh
ENV INSTALLED_DB ${DB}
# (optional) Install vision packages like OpenCV
ARG VISION
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./

View File

@ -1 +1 @@
01a22b6f16d117454b7d21ebdc691b0785b84a7f
5e4d6b6380d575e48e37e9d987fded4ec588e7bc

View File

@ -1 +1 @@
v2.26.2-1
v2.25.1-1

View File

@ -1 +1 @@
83111ab22be6e4a588d184ac45175986a7dde9fc
e98b6fcb8df5b44eb0d0addb6767c573d37ba024

View File

@ -1 +1 @@
96316ce50fade7e209553aba4898cd9b82aab83b
4b3bb1f8da0ded6ccd572dd1358ef45af5a1befe

View File

@ -1,6 +1,6 @@
set -euo pipefail
readonly version=v25.02
readonly version=v24.04
readonly src_host=https://github.com/ARM-software
readonly src_repo=ComputeLibrary

View File

@ -4,10 +4,16 @@ set -ex
if [ -n "$CLANG_VERSION" ]; then
if [[ $UBUNTU_VERSION == 22.04 ]]; then
if [[ $CLANG_VERSION == 9 && $UBUNTU_VERSION == 18.04 ]]; then
sudo apt-get update
# gpg-agent is not available by default on 18.04
sudo apt-get install -y --no-install-recommends gpg-agent
wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
apt-add-repository "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-${CLANG_VERSION} main"
elif [[ $UBUNTU_VERSION == 22.04 ]]; then
# work around ubuntu apt-get conflicts
sudo apt-get -y -f install
wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
if [[ $CLANG_VERSION == 18 ]]; then
apt-add-repository "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main"
fi
@ -35,7 +41,7 @@ if [ -n "$CLANG_VERSION" ]; then
# clang's packaging is a little messed up (the runtime libs aren't
# added into the linker path), so give it a little help
clang_lib=("/usr/lib/llvm-$CLANG_VERSION/lib/clang/"*"/lib/linux")
echo "$clang_lib" >/etc/ld.so.conf.d/clang.conf
echo "$clang_lib" > /etc/ld.so.conf.d/clang.conf
ldconfig
# Cleanup package manager

View File

@ -66,7 +66,7 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
# Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README
if [[ $(uname -m) == "aarch64" ]]; then
conda_install "openblas==0.3.29=*openmp*"
conda_install "openblas==0.3.28=*openmp*"
else
conda_install "mkl=2021.4.0 mkl-include=2021.4.0"
fi

View File

@ -240,7 +240,7 @@ function prune_126 {
}
function install_128 {
CUDNN_VERSION=9.8.0.87
CUDNN_VERSION=9.7.1.26
echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
rm -rf /usr/local/cuda-12.8 /usr/local/cuda
# install CUDA 12.8.0 in the same container

View File

@ -3,8 +3,19 @@
set -ex
NCCL_VERSION=v2.26.2-1
CUDNN_VERSION=9.8.0.87
NCCL_VERSION=v2.21.5-1
CUDNN_VERSION=9.5.1.17
function install_cusparselt_062 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && pushd tmp_cusparselt
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.6.2.3-archive.tar.xz
tar xf libcusparse_lt-linux-sbsa-0.6.2.3-archive.tar.xz
cp -a libcusparse_lt-linux-sbsa-0.6.2.3-archive/include/* /usr/local/cuda/include/
cp -a libcusparse_lt-linux-sbsa-0.6.2.3-archive/lib/* /usr/local/cuda/lib64/
popd
rm -rf tmp_cusparselt
}
function install_cusparselt_063 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
@ -17,7 +28,140 @@ function install_cusparselt_063 {
rm -rf tmp_cusparselt
}
function install_124 {
CUDNN_VERSION=9.1.0.70
echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
rm -rf /usr/local/cuda-12.4 /usr/local/cuda
# install CUDA 12.4.1 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux_sbsa.run
chmod +x cuda_12.4.1_550.54.15_linux_sbsa.run
./cuda_12.4.1_550.54.15_linux_sbsa.run --toolkit --silent
rm -f cuda_12.4.1_550.54.15_linux_sbsa.run
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.4 /usr/local/cuda
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz
tar xf cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz
cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_cudnn
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
git clone -b ${NCCL_VERSION} --depth 1 https://github.com/NVIDIA/nccl.git
cd nccl && make -j src.build
cp -a build/include/* /usr/local/cuda/include/
cp -a build/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf nccl
install_cusparselt_063
ldconfig
}
function prune_124 {
echo "Pruning CUDA 12.4"
#####################################################################################
# CUDA 12.4 prune static libs
#####################################################################################
export NVPRUNE="/usr/local/cuda-12.4/bin/nvprune"
export CUDA_LIB_DIR="/usr/local/cuda-12.4/lib64"
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
if [[ -n "$OVERRIDE_GENCODE" ]]; then
export GENCODE=$OVERRIDE_GENCODE
fi
# all CUDA libs except CuDNN and CuBLAS
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
| xargs -I {} bash -c \
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
# prune CuDNN and CuBLAS
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
#####################################################################################
# CUDA 12.4 prune visual tools
#####################################################################################
export CUDA_BASE="/usr/local/cuda-12.4/"
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/
}
function install_126 {
echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
rm -rf /usr/local/cuda-12.6 /usr/local/cuda
# install CUDA 12.6.3 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda_12.6.3_560.35.05_linux_sbsa.run
chmod +x cuda_12.6.3_560.35.05_linux_sbsa.run
./cuda_12.6.3_560.35.05_linux_sbsa.run --toolkit --silent
rm -f cuda_12.6.3_560.35.05_linux_sbsa.run
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.6 /usr/local/cuda
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz
tar xf cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz
cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_cudnn
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
git clone -b ${NCCL_VERSION} --depth 1 https://github.com/NVIDIA/nccl.git
cd nccl && make -j src.build
cp -a build/include/* /usr/local/cuda/include/
cp -a build/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf nccl
install_cusparselt_063
ldconfig
}
function prune_126 {
echo "Pruning CUDA 12.6"
#####################################################################################
# CUDA 12.6 prune static libs
#####################################################################################
export NVPRUNE="/usr/local/cuda-12.6/bin/nvprune"
export CUDA_LIB_DIR="/usr/local/cuda-12.6/lib64"
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
if [[ -n "$OVERRIDE_GENCODE" ]]; then
export GENCODE=$OVERRIDE_GENCODE
fi
if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then
export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN
fi
# all CUDA libs except CuDNN and CuBLAS
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
| xargs -I {} bash -c \
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
# prune CuDNN and CuBLAS
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
#####################################################################################
# CUDA 12.6 prune visual tools
#####################################################################################
export CUDA_BASE="/usr/local/cuda-12.6/"
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/
}
function install_128 {
CUDNN_VERSION=9.7.1.26
echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
rm -rf /usr/local/cuda-12.8 /usr/local/cuda
# install CUDA 12.8.0 in the same container
@ -54,6 +198,10 @@ function install_128 {
while test $# -gt 0
do
case "$1" in
12.4) install_124; prune_124
;;
12.6) install_126; prune_126
;;
12.8) install_128;
;;
*) echo "bad argument $1"; exit 1

View File

@ -5,7 +5,7 @@ if [[ -n "${CUDNN_VERSION}" ]]; then
mkdir tmp_cudnn
pushd tmp_cudnn
if [[ ${CUDA_VERSION:0:4} == "12.8" ]]; then
CUDNN_NAME="cudnn-linux-x86_64-9.8.0.87_cuda12-archive"
CUDNN_NAME="cudnn-linux-x86_64-9.7.1.26_cuda12-archive"
elif [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then
CUDNN_NAME="cudnn-linux-x86_64-9.5.1.17_cuda12-archive"
elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then

38
.ci/docker/common/install_db.sh Executable file
View File

@ -0,0 +1,38 @@
#!/bin/bash
set -ex
install_ubuntu() {
apt-get update
# Cleanup
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
}
install_centos() {
# Need EPEL for many packages we depend on.
# See http://fedoraproject.org/wiki/EPEL
yum --enablerepo=extras install -y epel-release
# Cleanup
yum clean all
rm -rf /var/cache/yum
rm -rf /var/lib/yum/yumdb
rm -rf /var/lib/yum/history
}
# Install base packages depending on the base OS
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$ID" in
ubuntu)
install_ubuntu
;;
centos)
install_centos
;;
*)
echo "Unable to determine OS..."
exit 1
;;
esac

View File

@ -53,7 +53,7 @@ setup_executorch() {
export EXECUTORCH_BUILD_PYBIND=ON
export CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON"
as_jenkins .ci/scripts/setup-linux.sh --build-tool cmake || true
as_jenkins .ci/scripts/setup-linux.sh cmake || true
popd
}

View File

@ -2,6 +2,8 @@
set -ex
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
if [ -n "${UBUNTU_VERSION}" ]; then
apt update
apt-get install -y clang doxygen git graphviz nodejs npm libtinfo5
@ -13,8 +15,8 @@ chown -R jenkins pytorch
pushd pytorch
# Install all linter dependencies
pip install -r requirements.txt
lintrunner init
pip_install -r requirements.txt
conda_run lintrunner init
# Cache .lintbin directory as part of the Docker image
cp -r .lintbin /tmp

View File

@ -4,15 +4,10 @@ set -ex
[ -n "$NINJA_VERSION" ]
arch=$(uname -m)
if [ "$arch" == "aarch64" ]; then
url="https://github.com/ninja-build/ninja/releases/download/v${NINJA_VERSION}/ninja-linux-aarch64.zip"
else
url="https://github.com/ninja-build/ninja/releases/download/v${NINJA_VERSION}/ninja-linux.zip"
fi
url="https://github.com/ninja-build/ninja/releases/download/v${NINJA_VERSION}/ninja-linux.zip"
pushd /tmp
wget --no-verbose --output-document=ninja-linux.zip "$url"
unzip ninja-linux.zip -d /usr/local/bin
rm -f ninja-linux.zip
popd
popd

View File

@ -32,7 +32,7 @@ pip_install coloredlogs packaging
pip_install onnxruntime==1.18.1
pip_install onnx==1.17.0
pip_install onnxscript==0.2.2 --no-deps
pip_install onnxscript==0.1.0 --no-deps
# required by onnxscript
pip_install ml_dtypes

View File

@ -4,7 +4,7 @@
set -ex
cd /
git clone https://github.com/OpenMathLib/OpenBLAS.git -b v0.3.29 --depth 1 --shallow-submodules
git clone https://github.com/OpenMathLib/OpenBLAS.git -b v0.3.28 --depth 1 --shallow-submodules
OPENBLAS_BUILD_FLAGS="

View File

@ -1,18 +0,0 @@
#!/bin/bash
set -ex
apt-get update
# Use deadsnakes in case we need an older python version
sudo add-apt-repository ppa:deadsnakes/ppa
apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python3-pip python${PYTHON_VERSION}-venv
# Use a venv because uv and some other package managers don't support --user install
ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python
python -m venv /var/lib/jenkins/ci_env
source /var/lib/jenkins/ci_env/bin/activate
python -mpip install --upgrade pip
python -mpip install -r /opt/requirements-ci.txt
if [ -n "${PIP_CMAKE}" ]; then
python -mpip install cmake==3.31.6
fi

View File

@ -8,6 +8,10 @@ ver() {
install_ubuntu() {
apt-get update
if [[ $UBUNTU_VERSION == 18.04 ]]; then
# gpg-agent is not available by default on 18.04
apt-get install -y --no-install-recommends gpg-agent
fi
if [[ $UBUNTU_VERSION == 20.04 ]]; then
# gpg-agent is not available by default on 20.04
apt-get install -y --no-install-recommends gpg-agent

View File

@ -25,9 +25,7 @@ python3 -m pip install meson ninja
###########################
### clone repo
###########################
# TEMPORARY FIX: https://gitlab.freedesktop.org/mesa/drm.git is down until 2025/03/22
# GIT_SSL_NO_VERIFY=true git clone https://gitlab.freedesktop.org/mesa/drm.git
GIT_SSL_NO_VERIFY=true git clone git://anongit.freedesktop.org/mesa/drm
GIT_SSL_NO_VERIFY=true git clone https://gitlab.freedesktop.org/mesa/drm.git
pushd drm
###########################
@ -117,7 +115,7 @@ index a5007ffc..13fa07fc 100644
if (!fp) {
- fprintf(stderr, "%s: %s\n", AMDGPU_ASIC_ID_TABLE,
- strerror(errno));
+ //fprintf(stderr, "amdgpu.ids: No such file or directory\n");
+ fprintf(stderr, "amdgpu.ids: No such file or directory\n");
return;
}

View File

@ -0,0 +1,24 @@
#!/bin/bash
set -ex
[ -n "${SWIFTSHADER}" ]
retry () {
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
}
_https_amazon_aws=https://ossci-android.s3.amazonaws.com
# SwiftShader
_swiftshader_dir=/var/lib/jenkins/swiftshader
_swiftshader_file_targz=swiftshader-abe07b943-prebuilt.tar.gz
mkdir -p $_swiftshader_dir
_tmp_swiftshader_targz="/tmp/${_swiftshader_file_targz}"
curl --silent --show-error --location --fail --retry 3 \
--output "${_tmp_swiftshader_targz}" "$_https_amazon_aws/${_swiftshader_file_targz}"
tar -C "${_swiftshader_dir}" -xzf "${_tmp_swiftshader_targz}"
export VK_ICD_FILENAMES="${_swiftshader_dir}/build/Linux/vk_swiftshader_icd.json"

View File

@ -60,15 +60,15 @@ if [ -n "${UBUNTU_VERSION}" ] && [ -n "${GCC_VERSION}" ] && [[ "${GCC_VERSION}"
# Triton needs at least gcc-9 to build
apt-get install -y g++-9
CXX=g++-9 pip_install .
CXX=g++-9 pip_install -e .
elif [ -n "${UBUNTU_VERSION}" ] && [ -n "${CLANG_VERSION}" ]; then
# Triton needs <filesystem> which surprisingly is not available with clang-9 toolchain
add-apt-repository -y ppa:ubuntu-toolchain-r/test
apt-get install -y g++-9
CXX=g++-9 pip_install .
CXX=g++-9 pip_install -e .
else
pip_install .
pip_install -e .
fi
if [ -n "${CONDA_CMAKE}" ]; then

View File

@ -0,0 +1,24 @@
#!/bin/bash
set -ex
[ -n "${VULKAN_SDK_VERSION}" ]
retry () {
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
}
_vulkansdk_dir=/var/lib/jenkins/vulkansdk
_tmp_vulkansdk_targz=/tmp/vulkansdk.tar.gz
curl \
--silent \
--show-error \
--location \
--fail \
--retry 3 \
--output "${_tmp_vulkansdk_targz}" "https://ossci-android.s3.amazonaws.com/vulkansdk-linux-x86_64-${VULKAN_SDK_VERSION}.tar.gz"
mkdir -p "${_vulkansdk_dir}"
tar -C "${_vulkansdk_dir}" -xzf "${_tmp_vulkansdk_targz}" --strip-components 1
rm -rf "${_tmp_vulkansdk_targz}"

View File

@ -39,7 +39,7 @@ case ${GPU_ARCH_TYPE} in
BASE_TARGET=rocm
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-complete
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx942"
DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}"
;;
*)

View File

@ -18,14 +18,15 @@ COPY ./common/install_user.sh install_user.sh
RUN bash ./install_user.sh && rm install_user.sh
# Install conda and other packages (e.g., numpy, pytest)
ARG PYTHON_VERSION
ARG PIP_CMAKE
# Put venv into the env vars so users don't need to activate it
ENV PATH /var/lib/jenkins/ci_env/bin:$PATH
ENV VIRTUAL_ENV /var/lib/jenkins/ci_env
COPY requirements-ci.txt /opt/requirements-ci.txt
COPY ./common/install_python.sh install_python.sh
RUN bash ./install_python.sh && rm install_python.sh /opt/requirements-ci.txt
ARG ANACONDA_PYTHON_VERSION
ARG CONDA_CMAKE
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
COPY ./common/install_conda.sh install_conda.sh
COPY ./common/common_utils.sh common_utils.sh
COPY ./common/install_magma_conda.sh install_magma_conda.sh
RUN bash ./install_conda.sh && rm install_conda.sh install_magma_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
# Install cuda and cudnn
ARG CUDA_VERSION
@ -36,10 +37,9 @@ ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
# Note that Docker build forbids copying file outside the build context
COPY ./common/install_linter.sh install_linter.sh
COPY ./common/common_utils.sh common_utils.sh
RUN bash ./install_linter.sh
RUN rm install_linter.sh
RUN chown -R jenkins:jenkins /var/lib/jenkins/ci_env
RUN rm install_linter.sh common_utils.sh
USER jenkins
CMD ["bash"]

View File

@ -15,18 +15,20 @@ COPY ./common/install_user.sh install_user.sh
RUN bash ./install_user.sh && rm install_user.sh
# Install conda and other packages (e.g., numpy, pytest)
ARG PYTHON_VERSION
ARG PIP_CMAKE
ENV PATH /var/lib/jenkins/ci_env/bin:$PATH
ENV VIRTUAL_ENV /var/lib/jenkins/ci_env
COPY requirements-ci.txt /opt/requirements-ci.txt
COPY ./common/install_python.sh install_python.sh
RUN bash ./install_python.sh && rm install_python.sh /opt/requirements-ci.txt
ARG ANACONDA_PYTHON_VERSION
ARG CONDA_CMAKE
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
COPY ./common/install_conda.sh install_conda.sh
COPY ./common/common_utils.sh common_utils.sh
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
# Note that Docker build forbids copying file outside the build context
COPY ./common/install_linter.sh install_linter.sh
COPY ./common/common_utils.sh common_utils.sh
RUN bash ./install_linter.sh
RUN rm install_linter.sh
RUN rm install_linter.sh common_utils.sh
USER jenkins
CMD ["bash"]

View File

@ -0,0 +1,153 @@
# syntax = docker/dockerfile:experimental
ARG ROCM_VERSION=3.7
ARG BASE_CUDA_VERSION=10.2
ARG GPU_IMAGE=nvidia/cuda:${BASE_CUDA_VERSION}-devel-centos7
FROM quay.io/pypa/manylinux2014_x86_64 as base
ENV LC_ALL en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US.UTF-8
RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
RUN yum install -y wget curl perl util-linux xz bzip2 git patch which perl zlib-devel
RUN yum install -y yum-utils centos-release-scl sudo
RUN yum-config-manager --enable rhel-server-rhscl-7-rpms
RUN yum install -y devtoolset-7-gcc devtoolset-7-gcc-c++ devtoolset-7-gcc-gfortran devtoolset-7-binutils
ENV PATH=/opt/rh/devtoolset-7/root/usr/bin:$PATH
ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-7/root/usr/lib64:/opt/rh/devtoolset-7/root/usr/lib:$LD_LIBRARY_PATH
# cmake
RUN yum install -y cmake3 && \
ln -s /usr/bin/cmake3 /usr/bin/cmake
FROM base as openssl
# Install openssl (this must precede `build python` step)
# (In order to have a proper SSL module, Python is compiled
# against a recent openssl [see env vars above], which is linked
# statically. We delete openssl afterwards.)
ADD ./common/install_openssl.sh install_openssl.sh
RUN bash ./install_openssl.sh && rm install_openssl.sh
# remove unncessary python versions
RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2
RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4
RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
FROM base as cuda
ARG BASE_CUDA_VERSION=10.2
# Install CUDA
ADD ./common/install_cuda.sh install_cuda.sh
RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh
FROM base as intel
# MKL
ADD ./common/install_mkl.sh install_mkl.sh
RUN bash ./install_mkl.sh && rm install_mkl.sh
FROM base as magma
ARG BASE_CUDA_VERSION=10.2
# Install magma
ADD ./common/install_magma.sh install_magma.sh
RUN bash ./install_magma.sh ${BASE_CUDA_VERSION} && rm install_magma.sh
FROM base as jni
# Install java jni header
ADD ./common/install_jni.sh install_jni.sh
ADD ./java/jni.h jni.h
RUN bash ./install_jni.sh && rm install_jni.sh
FROM base as libpng
# Install libpng
ADD ./common/install_libpng.sh install_libpng.sh
RUN bash ./install_libpng.sh && rm install_libpng.sh
FROM ${GPU_IMAGE} as common
RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
ENV LC_ALL en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US.UTF-8
RUN yum install -y \
aclocal \
autoconf \
automake \
bison \
bzip2 \
curl \
diffutils \
file \
git \
make \
patch \
perl \
unzip \
util-linux \
wget \
which \
xz \
yasm
RUN yum install -y \
https://repo.ius.io/ius-release-el7.rpm \
https://ossci-linux.s3.amazonaws.com/epel-release-7-14.noarch.rpm
RUN yum swap -y git git236-core
# git236+ would refuse to run git commands in repos owned by other users
# Which causes version check to fail, as pytorch repo is bind-mounted into the image
# Override this behaviour by treating every folder as safe
# For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
RUN git config --global --add safe.directory "*"
ENV SSL_CERT_FILE=/opt/_internal/certs.pem
# Install LLVM version
COPY --from=openssl /opt/openssl /opt/openssl
COPY --from=base /opt/python /opt/python
COPY --from=base /opt/_internal /opt/_internal
COPY --from=base /usr/local/bin/auditwheel /usr/local/bin/auditwheel
COPY --from=intel /opt/intel /opt/intel
COPY --from=base /usr/local/bin/patchelf /usr/local/bin/patchelf
COPY --from=libpng /usr/local/bin/png* /usr/local/bin/
COPY --from=libpng /usr/local/bin/libpng* /usr/local/bin/
COPY --from=libpng /usr/local/include/png* /usr/local/include/
COPY --from=libpng /usr/local/include/libpng* /usr/local/include/
COPY --from=libpng /usr/local/lib/libpng* /usr/local/lib/
COPY --from=libpng /usr/local/lib/pkgconfig /usr/local/lib/pkgconfig
COPY --from=jni /usr/local/include/jni.h /usr/local/include/jni.h
FROM common as cpu_final
ARG BASE_CUDA_VERSION=10.2
RUN yum install -y yum-utils centos-release-scl
RUN yum-config-manager --enable rhel-server-rhscl-7-rpms
RUN yum install -y devtoolset-7-gcc devtoolset-7-gcc-c++ devtoolset-7-gcc-gfortran devtoolset-7-binutils
ENV PATH=/opt/rh/devtoolset-7/root/usr/bin:$PATH
ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-7/root/usr/lib64:/opt/rh/devtoolset-7/root/usr/lib:$LD_LIBRARY_PATH
# cmake
RUN yum install -y cmake3 && \
ln -s /usr/bin/cmake3 /usr/bin/cmake
# ninja
RUN yum install -y http://repo.okay.com.mx/centos/7/x86_64/release/okay-release-1-1.noarch.rpm
RUN yum install -y ninja-build
FROM cpu_final as cuda_final
RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION}
COPY --from=cuda /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION}
COPY --from=magma /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION}
FROM common as rocm_final
ARG ROCM_VERSION=3.7
# Install ROCm
ADD ./common/install_rocm.sh install_rocm.sh
RUN bash ./install_rocm.sh ${ROCM_VERSION} && rm install_rocm.sh
# cmake is already installed inside the rocm base image, but both 2 and 3 exist
# cmake3 is needed for the later MIOpen custom build, so that step is last.
RUN yum install -y cmake3 && \
rm -f /usr/bin/cmake && \
ln -s /usr/bin/cmake3 /usr/bin/cmake
ADD ./common/install_miopen.sh install_miopen.sh
RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh

View File

@ -38,12 +38,6 @@ RUN yum install -y \
sudo \
gcc-toolset-${GCCTOOLSET_VERSION}-toolchain
# (optional) Install non-default Ninja version
ARG NINJA_VERSION
COPY ./common/install_ninja.sh install_ninja.sh
RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
RUN rm install_ninja.sh
# Ensure the expected devtoolset is used
ENV PATH=/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/bin:$PATH
ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH

View File

@ -42,7 +42,6 @@ RUN yum install -y \
llvm-devel \
libzstd-devel \
python3.12-devel \
python3.12-test \
python3.12-setuptools \
python3.12-pip \
python3-virtualenv \
@ -102,33 +101,24 @@ CMD ["/bin/bash"]
# install test dependencies:
# - grpcio requires system openssl, bundled crypto fails to build
# - ml_dtypes 0.4.0 requires some fixes provided in later commits to build
RUN dnf install -y \
protobuf-devel \
protobuf-c-devel \
protobuf-lite-devel \
hdf5-devel \
python3-h5py \
git
wget \
patch
RUN env GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=True pip3 install grpcio
# cmake-3.28.0 from pip for onnxruntime
RUN python3 -mpip install cmake==3.28.0
# build onnxruntime 1.21.0 from sources.
# it is not possible to build it from sources using pip,
# so just build it from upstream repository.
# h5py is dependency of onnxruntime_training.
# h5py==3.11.0 builds with hdf5-devel 1.10.5 from repository.
# install newest flatbuffers version first:
# for some reason old version is getting pulled in otherwise.
# packaging package is required for onnxruntime wheel build.
RUN pip3 install flatbuffers && \
pip3 install h5py==3.11.0 && \
pip3 install packaging && \
git clone https://github.com/microsoft/onnxruntime && \
cd onnxruntime && git checkout v1.21.0 && \
RUN env GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=True pip3 install grpcio==1.65.4
RUN cd ~ && \
git clone https://github.com/jax-ml/ml_dtypes && \
cd ml_dtypes && \
git checkout v0.4.0 && \
git submodule update --init --recursive && \
./build.sh --config Release --parallel 0 --enable_pybind --build_wheel --enable_training --enable_training_apis --enable_training_ops --skip_tests --allow_running_as_root && \
pip3 install ./build/Linux/Release/dist/onnxruntime_training-*.whl && \
cd .. && /bin/rm -rf ./onnxruntime
wget https://github.com/jax-ml/ml_dtypes/commit/b969f76914d6b30676721bc92bf0f6021a0d1321.patch && \
wget https://github.com/jax-ml/ml_dtypes/commit/d4e6d035ecda073eab8bcf60f4eef572ee7087e6.patch && \
patch -p1 < b969f76914d6b30676721bc92bf0f6021a0d1321.patch && \
patch -p1 < d4e6d035ecda073eab8bcf60f4eef572ee7087e6.patch && \
python3 setup.py bdist_wheel && \
pip3 install dist/*.whl && \
rm -rf ml_dtypes

View File

@ -48,7 +48,7 @@ case ${GPU_ARCH_TYPE} in
TARGET=final
DOCKER_TAG=cpu-aarch64
GPU_IMAGE=arm64v8/almalinux:8
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11 --build-arg NINJA_VERSION=1.12.1"
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11"
MANY_LINUX_VERSION="2_28_aarch64"
;;
cpu-cxx11-abi)
@ -97,7 +97,7 @@ case ${GPU_ARCH_TYPE} in
DEVTOOLSET_VERSION="11"
GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete
fi
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101"
DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}"
;;
xpu)
@ -121,8 +121,7 @@ fi
(
set -x
# Only activate this if in CI
if [ "$(uname -m)" != "s390x" ] && [ -v CI ]; then
if [ "$(uname -m)" != "s390x" ]; then
# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
@ -140,7 +139,7 @@ fi
"${TOPDIR}/.ci/docker/"
)
GITHUB_REF=${GITHUB_REF:-"dev")}
GITHUB_REF=${GITHUB_REF:-$(git symbolic-ref -q HEAD || git describe --tags --exact-match)}
GIT_BRANCH_NAME=${GITHUB_REF##*/}
GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)}
DOCKER_IMAGE_BRANCH_TAG=${DOCKER_IMAGE}-${GIT_BRANCH_NAME}

View File

@ -3,7 +3,7 @@
# Script used only in CD pipeline
OPENSSL_DOWNLOAD_URL=https://www.openssl.org/source/old/1.1.1/
CURL_DOWNLOAD_URL=https://curl.se/download
CURL_DOWNLOAD_URL=https://curl.askapache.com/download
AUTOCONF_DOWNLOAD_URL=https://ftp.gnu.org/gnu/autoconf

View File

@ -41,14 +41,11 @@ fbscribelogger==0.1.7
#Pinned versions: 0.1.6
#test that import:
flatbuffers==2.0 ; platform_machine != "s390x"
flatbuffers==2.0
#Description: cross platform serialization library
#Pinned versions: 2.0
#test that import:
flatbuffers ; platform_machine == "s390x"
#Description: cross platform serialization library; Newer version is required on s390x for new python version
hypothesis==5.35.1
# Pin hypothesis to avoid flakiness: https://github.com/pytorch/pytorch/issues/31136
#Description: advanced library for generating parametrized tests
@ -93,10 +90,10 @@ librosa>=0.6.2 ; python_version < "3.11"
#Pinned versions:
#test that import:
mypy==1.14.0
mypy==1.13.0
# Pin MyPy version because new errors are likely to appear with each release
#Description: linter
#Pinned versions: 1.14.0
#Pinned versions: 1.10.0
#test that import: test_typing.py, test_type_hints.py
networkx==2.8.8
@ -105,10 +102,10 @@ networkx==2.8.8
#Pinned versions: 2.8.8
#test that import: functorch
ninja==1.11.1.3
#Description: build system. Used in some tests. Used in build to generate build
#time tracing information
#Pinned versions: 1.11.1.3
#ninja
#Description: build system. Note that it install from
#here breaks things so it is commented out
#Pinned versions: 1.10.0.post1
#test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py
numba==0.49.0 ; python_version < "3.9"
@ -297,7 +294,7 @@ ghstack==0.8.0
#Pinned versions: 0.8.0
#test that import:
jinja2==3.1.6
jinja2==3.1.5
#Description: jinja2 template engine
#Pinned versions: 3.1.4
#test that import:
@ -342,7 +339,7 @@ onnx==1.17.0
#Pinned versions:
#test that import:
onnxscript==0.2.2
onnxscript==0.1.0
#Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
#Pinned versions:
#test that import:
@ -368,6 +365,7 @@ PyYAML
pyzstd
setuptools
ninja==1.11.1 ; platform_machine == "aarch64"
scons==4.5.2 ; platform_machine == "aarch64"
pulp==2.9.0 ; python_version >= "3.8"

View File

@ -1 +1 @@
3.3.0
3.2.0

View File

@ -50,6 +50,13 @@ RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
RUN rm install_protobuf.sh
ENV INSTALLED_PROTOBUF ${PROTOBUF}
# (optional) Install database packages like LMDB and LevelDB
ARG DB
COPY ./common/install_db.sh install_db.sh
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
RUN rm install_db.sh
ENV INSTALLED_DB ${DB}
# (optional) Install vision packages like OpenCV
ARG VISION
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./

View File

@ -50,6 +50,13 @@ RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
RUN rm install_protobuf.sh
ENV INSTALLED_PROTOBUF ${PROTOBUF}
# (optional) Install database packages like LMDB and LevelDB
ARG DB
COPY ./common/install_db.sh install_db.sh
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
RUN rm install_db.sh
ENV INSTALLED_DB ${DB}
# (optional) Install vision packages like OpenCV
ARG VISION
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./

View File

@ -77,6 +77,13 @@ COPY triton_version.txt triton_version.txt
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
RUN rm install_triton.sh common_utils.sh triton-xpu.txt triton_version.txt
# (optional) Install database packages like LMDB and LevelDB
ARG DB
COPY ./common/install_db.sh install_db.sh
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
RUN rm install_db.sh
ENV INSTALLED_DB ${DB}
# (optional) Install vision packages like OpenCV
ARG VISION
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./

View File

@ -74,6 +74,13 @@ RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
RUN rm install_protobuf.sh
ENV INSTALLED_PROTOBUF ${PROTOBUF}
# (optional) Install database packages like LMDB and LevelDB
ARG DB
COPY ./common/install_db.sh install_db.sh
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
RUN rm install_db.sh
ENV INSTALLED_DB ${DB}
# (optional) Install vision packages like OpenCV
ARG VISION
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
@ -81,6 +88,18 @@ RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
RUN rm install_vision.sh cache_vision_models.sh common_utils.sh
ENV INSTALLED_VISION ${VISION}
# (optional) Install Vulkan SDK
ARG VULKAN_SDK_VERSION
COPY ./common/install_vulkan_sdk.sh install_vulkan_sdk.sh
RUN if [ -n "${VULKAN_SDK_VERSION}" ]; then bash ./install_vulkan_sdk.sh; fi
RUN rm install_vulkan_sdk.sh
# (optional) Install swiftshader
ARG SWIFTSHADER
COPY ./common/install_swiftshader.sh install_swiftshader.sh
RUN if [ -n "${SWIFTSHADER}" ]; then bash ./install_swiftshader.sh; fi
RUN rm install_swiftshader.sh
# (optional) Install non-default CMake version
ARG CMAKE_VERSION
COPY ./common/install_cmake.sh install_cmake.sh

View File

@ -12,7 +12,7 @@ DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
-e PACKAGE_NAME=${PACKAGE_NAME}${DESIRED_CUDA_SHORT} \
-e DESIRED_CUDA=${DESIRED_CUDA} \
-e CUDA_ARCH_LIST="${CUDA_ARCH_LIST}" \
"pytorch/manylinux2_28-builder:cuda${DESIRED_CUDA}-main" \
"pytorch/manylinux-builder:cuda${DESIRED_CUDA}-main" \
magma/build_magma.sh
.PHONY: all

View File

@ -111,6 +111,12 @@ case ${DESIRED_PYTHON} in
;;
esac
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
export _GLIBCXX_USE_CXX11_ABI=1
else
export _GLIBCXX_USE_CXX11_ABI=0
fi
if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
echo "Calling build_amd.py at $(date)"
python tools/amd_build/build_amd.py
@ -203,6 +209,12 @@ if [[ -n "$BUILD_PYTHONLESS" ]]; then
mkdir -p /tmp/$LIBTORCH_HOUSE_DIR
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
LIBTORCH_ABI="cxx11-abi-"
else
LIBTORCH_ABI=
fi
zip -rq /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip libtorch
cp /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip \
/tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-latest.zip

View File

@ -54,11 +54,11 @@ cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.')
TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6"
case ${CUDA_VERSION} in
12.8)
TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;9.0;10.0;12.0+PTX" #removing sm_50-sm_70 as these architectures are deprecated in CUDA 12.8 and will be removed in future releases
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0;10.0;12.0+PTX" #Ripping out 5.0 and 6.0 due to ld error
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
;;
12.6)
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0"
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0+PTX"
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
;;
12.4)

View File

@ -95,6 +95,12 @@ python setup.py clean
retry pip install -qr requirements.txt
retry pip install -q numpy==2.0.1
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
export _GLIBCXX_USE_CXX11_ABI=1
else
export _GLIBCXX_USE_CXX11_ABI=0
fi
if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
echo "Calling build_amd.py at $(date)"
python tools/amd_build/build_amd.py
@ -163,6 +169,12 @@ fi
)
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
LIBTORCH_ABI="cxx11-abi-"
else
LIBTORCH_ABI=
fi
(
set -x

View File

@ -173,7 +173,6 @@ if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
source /opt/intel/oneapi/compiler/latest/env/vars.sh
# XPU kineto feature dependencies are not fully ready, disable kineto build as temp WA
export USE_KINETO=0
export TORCH_XPU_ARCH_LIST=pvc
fi
# sccache will fail for CUDA builds if all cores are used for compiling
@ -192,7 +191,7 @@ fi
# We only build FlashAttention files for CUDA 8.0+, and they require large amounts of
# memory to build and will OOM
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]] && [[ 1 -eq $(echo "${TORCH_CUDA_ARCH_LIST} >= 8.0" | bc) ]] && [ -z "$MAX_JOBS_OVERRIDE" ]; then
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]] && [[ 1 -eq $(echo "${TORCH_CUDA_ARCH_LIST} >= 8.0" | bc) ]]; then
echo "WARNING: FlashAttention files require large amounts of memory to build and will OOM"
echo "Setting MAX_JOBS=(nproc-2)/3 to reduce memory usage"
export MAX_JOBS="$(( $(nproc --ignore=2) / 3 ))"
@ -378,10 +377,8 @@ else
# This is an attempt to mitigate flaky libtorch build OOM error. By default, the build parallelization
# is set to be the number of CPU minus 2. So, let's try a more conservative value here. A 4xlarge has
# 16 CPUs
if [ -z "$MAX_JOBS_OVERRIDE" ]; then
MAX_JOBS=$(nproc --ignore=4)
export MAX_JOBS
fi
MAX_JOBS=$(nproc --ignore=4)
export MAX_JOBS
# NB: Install outside of source directory (at the same level as the root
# pytorch folder) so that it doesn't get cleaned away prior to docker push.

View File

@ -73,14 +73,26 @@ fi
# Check GCC ABI
###############################################################################
# NOTE: As of https://github.com/pytorch/pytorch/issues/126551 we only produce
# wheels with cxx11-abi
# NOTE [ Building libtorch with old vs. new gcc ABI ]
#
# Packages built with one version of ABI could not be linked against by client
# C++ libraries that were compiled using the other version of ABI. Since both
# gcc ABIs are still common in the wild, we need to support both ABIs. Currently:
#
# - All the nightlies built on CentOS 7 + devtoolset7 use the old gcc ABI.
# - All the nightlies built on Ubuntu 16.04 + gcc 5.4 use the new gcc ABI.
echo "Checking that the gcc ABI is what we expect"
if [[ "$(uname)" != 'Darwin' ]]; then
function is_expected() {
if [[ "$1" -gt 0 || "$1" == "ON " ]]; then
echo 1
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* || "$DESIRED_CUDA" == *"rocm"* ]]; then
if [[ "$1" -gt 0 || "$1" == "ON " ]]; then
echo 1
fi
else
if [[ -z "$1" || "$1" == 0 || "$1" == "OFF" ]]; then
echo 1
fi
fi
}
@ -196,11 +208,35 @@ setup_link_flags () {
TEST_CODE_DIR="$(dirname $(realpath ${BASH_SOURCE[0]}))/test_example_code"
build_and_run_example_cpp () {
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
GLIBCXX_USE_CXX11_ABI=1
else
GLIBCXX_USE_CXX11_ABI=0
fi
setup_link_flags
g++ ${TEST_CODE_DIR}/$1.cpp -I${install_root}/include -I${install_root}/include/torch/csrc/api/include -std=gnu++17 -L${install_root}/lib ${REF_LIB} ${ADDITIONAL_LINKER_FLAGS} -ltorch $TORCH_CPU_LINK_FLAGS $TORCH_CUDA_LINK_FLAGS $C10_LINK_FLAGS -o $1
g++ ${TEST_CODE_DIR}/$1.cpp -I${install_root}/include -I${install_root}/include/torch/csrc/api/include -D_GLIBCXX_USE_CXX11_ABI=$GLIBCXX_USE_CXX11_ABI -std=gnu++17 -L${install_root}/lib ${REF_LIB} ${ADDITIONAL_LINKER_FLAGS} -ltorch $TORCH_CPU_LINK_FLAGS $TORCH_CUDA_LINK_FLAGS $C10_LINK_FLAGS -o $1
./$1
}
build_example_cpp_with_incorrect_abi () {
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
GLIBCXX_USE_CXX11_ABI=0
else
GLIBCXX_USE_CXX11_ABI=1
fi
set +e
setup_link_flags
g++ ${TEST_CODE_DIR}/$1.cpp -I${install_root}/include -I${install_root}/include/torch/csrc/api/include -D_GLIBCXX_USE_CXX11_ABI=$GLIBCXX_USE_CXX11_ABI -std=gnu++17 -L${install_root}/lib ${REF_LIB} ${ADDITIONAL_LINKER_FLAGS} -ltorch $TORCH_CPU_LINK_FLAGS $TORCH_CUDA_LINK_FLAGS $C10_LINK_FLAGS -o $1
ERRCODE=$?
set -e
if [ "$ERRCODE" -eq "0" ]; then
echo "Building example with incorrect ABI didn't throw error. Aborting."
exit 1
else
echo "Building example with incorrect ABI throws expected error. Proceeding."
fi
}
###############################################################################
# Check simple Python/C++ calls
###############################################################################
@ -210,6 +246,11 @@ if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
export LD_LIBRARY_PATH=/usr/local/cuda/lib64
fi
build_and_run_example_cpp simple-torch-test
# `_GLIBCXX_USE_CXX11_ABI` is always ignored by gcc in devtoolset7, so we test
# the expected failure case for Ubuntu 16.04 + gcc 5.4 only.
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
build_example_cpp_with_incorrect_abi simple-torch-test
fi
else
pushd /tmp
python -c 'import torch'

View File

@ -121,9 +121,9 @@ def main() -> None:
else:
install_root = Path(distutils.sysconfig.get_python_lib()) / "torch"
libtorch_cpu_path = str(install_root / "lib" / "libtorch_cpu.so")
# NOTE: All binaries are built with cxx11abi now
check_lib_symbols_for_abi_correctness(libtorch_cpu_path, False)
libtorch_cpu_path = install_root / "lib" / "libtorch_cpu.so"
pre_cxx11_abi = "cxx11-abi" not in os.getenv("DESIRED_DEVTOOLSET", "")
check_lib_symbols_for_abi_correctness(libtorch_cpu_path, pre_cxx11_abi)
if __name__ == "__main__":

View File

@ -46,9 +46,7 @@ def train(args, model, device, train_loader, optimizer, epoch):
optimizer.step()
if batch_idx % args.log_interval == 0:
print(
f"Train Epoch: {epoch} "
f"[{batch_idx * len(data)}/{len(train_loader.dataset)} "
f"({100.0 * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}"
f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}" # noqa: B950
)
if args.dry_run:
break
@ -73,9 +71,7 @@ def test(model, device, test_loader):
test_loss /= len(test_loader.dataset)
print(
f"\nTest set: Average loss: {test_loss:.4f}, "
f"Accuracy: {correct}/{len(test_loader.dataset)} "
f"({100.0 * correct / len(test_loader.dataset):.0f}%)\n"
f"\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)\n" # noqa: B950
)

View File

@ -76,13 +76,10 @@ def read_release_matrix():
def test_numpy():
try:
import numpy as np
import numpy as np
x = np.arange(5)
torch.tensor(x)
except ImportError:
print("Numpy check skipped. Numpy is not installed.")
x = np.arange(5)
torch.tensor(x)
def check_version(package: str) -> None:
@ -169,10 +166,6 @@ def test_cuda_gds_errors_captured() -> None:
major_version = int(torch.version.cuda.split(".")[0])
minor_version = int(torch.version.cuda.split(".")[1])
if target_os == "windows":
print(f"{target_os} is not supported for GDS smoke test")
return
if major_version < 12 or (major_version == 12 and minor_version < 6):
print("CUDA version is not supported for GDS smoke test")
return
@ -413,7 +406,6 @@ def main() -> None:
smoke_test_conv2d()
test_linalg()
test_numpy()
if is_cuda_system:
test_linalg("cuda")
test_cuda_gds_errors_captured()

View File

@ -314,13 +314,6 @@ test_python() {
assert_git_not_dirty
}
test_lazy_tensor_meta_reference_disabled() {
export TORCH_DISABLE_FUNCTIONALIZATION_META_REFERENCE=1
echo "Testing lazy tensor operations without meta reference"
time python test/run_test.py --include lazy/test_ts_opinfo.py --verbose
export -n TORCH_DISABLE_FUNCTIONALIZATION_META_REFERENCE
}
test_dynamo_wrapped_shard() {
if [[ -z "$NUM_TEST_SHARDS" ]]; then
@ -483,8 +476,6 @@ elif [[ "${TEST_CONFIG}" == *aot_eager* ]]; then
DYNAMO_BENCHMARK_FLAGS+=(--backend aot_eager)
elif [[ "${TEST_CONFIG}" == *aot_inductor* ]]; then
DYNAMO_BENCHMARK_FLAGS+=(--export-aot-inductor)
elif [[ "${TEST_CONFIG}" == *max_autotune_inductor* ]]; then
DYNAMO_BENCHMARK_FLAGS+=(--inductor --inductor-compile-mode max-autotune)
elif [[ "${TEST_CONFIG}" == *inductor* && "${TEST_CONFIG}" != *perf* ]]; then
DYNAMO_BENCHMARK_FLAGS+=(--inductor)
fi
@ -499,59 +490,6 @@ else
DYNAMO_BENCHMARK_FLAGS+=(--device cuda)
fi
test_cachebench() {
TEST_REPORTS_DIR=$(pwd)/test/test-reports
mkdir -p "$TEST_REPORTS_DIR"
local BENCHMARK
if [[ "${SHARD_NUMBER}" == 1 ]]; then
local BENCHMARK=torchbench
elif [[ "${SHARD_NUMBER}" == 2 ]]; then
local BENCHMARK=huggingface
else
echo "invalid SHARD_NUMBER: ${SHARD_NUMBER}"
exit 1
fi
local mode_options=("training" "inference")
for mode in "${mode_options[@]}"; do
$TASKSET python "benchmarks/dynamo/cachebench.py" \
--mode "$mode" \
--device cuda \
--benchmark "$BENCHMARK" \
--repeat 3 \
--output "$TEST_REPORTS_DIR/cachebench_${BENCHMARK}_${mode}.json"
$TASKSET python "benchmarks/dynamo/cachebench.py" \
--mode "$mode" \
--dynamic \
--device cuda \
--benchmark "$BENCHMARK" \
--repeat 3 \
--output "$TEST_REPORTS_DIR/cachebench_${BENCHMARK}_${mode}_dynamic.json"
done
}
test_verify_cachebench() {
TMP_TEST_REPORTS_DIR=$(mktemp -d)
TEST_OUTPUT="$TMP_TEST_REPORTS_DIR/test.json"
$TASKSET python "benchmarks/dynamo/cachebench.py" \
--mode training \
--device cpu \
--model nanogpt \
--benchmark torchbench \
--output "$TEST_OUTPUT"
# -s checks file exists and is non empty
if [[ ! -s "$TEST_OUTPUT" ]]; then
echo "Cachebench failed to produce an output."
echo "Run 'python benchmarks/dynamo/cachebench.py' to make sure it works"
exit 1
fi
}
test_perf_for_dashboard() {
TEST_REPORTS_DIR=$(pwd)/test/test-reports
mkdir -p "$TEST_REPORTS_DIR"
@ -580,8 +518,6 @@ test_perf_for_dashboard() {
test_inductor_set_cpu_affinity
elif [[ "${TEST_CONFIG}" == *cuda_a10g* ]]; then
device=cuda_a10g
elif [[ "${TEST_CONFIG}" == *h100* ]]; then
device=cuda_h100
elif [[ "${TEST_CONFIG}" == *rocm* ]]; then
device=rocm
fi
@ -762,8 +698,6 @@ test_dynamo_benchmark() {
fi
elif [[ "${TEST_CONFIG}" == *aot_inductor* ]]; then
test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --bfloat16 "$@"
elif [[ "${TEST_CONFIG}" == *max_autotune_inductor* ]]; then
test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --bfloat16 "$@"
else
test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --bfloat16 "$@"
test_single_dynamo_benchmark "training" "$suite" "$shard_id" --training --amp "$@"
@ -1481,7 +1415,7 @@ test_executorch() {
bash examples/models/llama3_2_vision/install_requirements.sh
# NB: We need to rebuild ExecuTorch runner here because it depends on PyTorch
# from the PR
bash .ci/scripts/setup-linux.sh --build-tool cmake
bash .ci/scripts/setup-linux.sh cmake
echo "Run ExecuTorch unit tests"
pytest -v -n auto
@ -1505,7 +1439,7 @@ test_executorch() {
test_linux_aarch64() {
python test/run_test.py --include test_modules test_mkldnn test_mkldnn_fusion test_openmp test_torch test_dynamic_shapes \
test_transformers test_multiprocessing test_numpy_interop test_autograd test_binary_ufuncs test_complex test_spectral_ops \
test_foreach test_reductions test_unary_ufuncs test_tensor_creation_ops test_ops \
test_foreach test_reductions test_unary_ufuncs test_tensor_creation_ops \
--shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
# Dynamo tests
@ -1573,16 +1507,6 @@ elif [[ "${TEST_CONFIG}" == *timm* ]]; then
install_torchvision
id=$((SHARD_NUMBER-1))
test_dynamo_benchmark timm_models "$id"
elif [[ "${TEST_CONFIG}" == cachebench ]]; then
install_torchaudio cuda
install_torchvision
checkout_install_torchbench nanogpt BERT_pytorch resnet50 hf_T5 llama moco
PYTHONPATH=$(pwd)/torchbench test_cachebench
elif [[ "${TEST_CONFIG}" == verify_cachebench ]]; then
install_torchaudio cpu
install_torchvision
checkout_install_torchbench nanogpt
PYTHONPATH=$(pwd)/torchbench test_verify_cachebench
elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
if [[ "${TEST_CONFIG}" == *cpu* ]]; then
install_torchaudio cpu
@ -1619,7 +1543,6 @@ elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper* ]]; then
install_torchvision
checkout_install_torchbench hf_T5 llama moco
PYTHONPATH=$(pwd)/torchbench test_inductor_cpp_wrapper_shard "$SHARD_NUMBER"
test_inductor_aoti
elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
install_torchvision
test_inductor_shard "${SHARD_NUMBER}"
@ -1639,7 +1562,6 @@ elif [[ "${BUILD_ENVIRONMENT}" == *rocm* && -n "$TESTS_TO_INCLUDE" ]]; then
test_python_shard "$SHARD_NUMBER"
test_aten
elif [[ "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
test_lazy_tensor_meta_reference_disabled
test_without_numpy
install_torchvision
test_python_shard 1

View File

@ -17,24 +17,32 @@ curl -L -o "%INSTALLER_FILE%" %DOWNLOAD_URL%
:: Install the Visual Studio Build Tools with C++ components
echo Installing Visual Studio Build Tools with C++ components...
echo Installing MSVC %MSVC_VERSION%
"%INSTALLER_FILE%" --norestart --quiet --wait --installPath "%DEPENDENCIES_DIR%\VSBuildTools" ^
--add Microsoft.VisualStudio.Workload.VCTools ^
--add Microsoft.VisualStudio.Component.Windows10SDK ^
--add Microsoft.VisualStudio.Component.Windows11SDK.22621 ^
--add Microsoft.VisualStudio.Component.VC.ASAN ^
--add Microsoft.VisualStudio.Component.VC.CMake.Project ^
--add Microsoft.VisualStudio.Component.VC.CoreBuildTools ^
--add Microsoft.VisualStudio.Component.VC.CoreIde ^
--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest ^
--add Microsoft.VisualStudio.Component.VC.Tools.ARM64EC ^
--add Microsoft.VisualStudio.Component.VC.Tools.ARM64 ^
--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64
echo exitcode = %errorlevel%
if "%MSVC_VERSION%" == "latest" (
"%INSTALLER_FILE%" --norestart --nocache --quiet --wait --installPath "%DEPENDENCIES_DIR%\VSBuildTools" ^
--add Microsoft.VisualStudio.Component.Windows11SDK.22621 ^
--add Microsoft.VisualStudio.Component.VC.ASAN ^
--add Microsoft.VisualStudio.Component.VC.CMake.Project ^
--add Microsoft.VisualStudio.Component.VC.Tools.ARM64 ^
--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64
) else if "%MSVC_VERSION%" == "14.40" (
"%INSTALLER_FILE%" --norestart --nocache --quiet --wait --installPath "%DEPENDENCIES_DIR%\VSBuildTools" ^
--add Microsoft.VisualStudio.Component.Windows11SDK.22621 ^
--add Microsoft.VisualStudio.Component.VC.ASAN ^
--add Microsoft.VisualStudio.Component.VC.CMake.Project ^
--add Microsoft.VisualStudio.Component.VC.14.40.17.10.ARM64 ^
--add Microsoft.VisualStudio.Component.VC.14.40.17.10.x86.x64
) else if "%MSVC_VERSION%" == "14.36" (
"%INSTALLER_FILE%" --norestart --nocache --quiet --wait --installPath "%DEPENDENCIES_DIR%\VSBuildTools" ^
--add Microsoft.VisualStudio.Component.Windows11SDK.22621 ^
--add Microsoft.VisualStudio.Component.VC.ASAN ^
--add Microsoft.VisualStudio.Component.VC.CMake.Project ^
--add Microsoft.VisualStudio.Component.VC.14.36.17.6.ARM64 ^
--add Microsoft.VisualStudio.Component.VC.14.36.17.6.x86.x64
)
:: Check if installation was successful
if %errorlevel% neq 0 (
echo Failed to install Visual Studio Build Tools with C++ components.
echo "Failed to install Visual Studio Build Tools with C++ components. (exitcode = %errorlevel%)"
exit /b 1
)

View File

@ -6,25 +6,22 @@ echo Dependency Python installation started.
if not exist "%DOWNLOADS_DIR%" mkdir %DOWNLOADS_DIR%
if not exist "%DEPENDENCIES_DIR%" mkdir %DEPENDENCIES_DIR%
if "%DESIRED_PYTHON%" == "3.13" (
echo Python version is set to 3.13
set DOWNLOAD_URL=https://www.python.org/ftp/python/3.13.2/python-3.13.2-arm64.exe
) else if "%DESIRED_PYTHON%" == "3.12" (
echo Python version is set to 3.12
set DOWNLOAD_URL=https://www.python.org/ftp/python/3.12.7/python-3.12.7-arm64.exe
) else if "%DESIRED_PYTHON%" == "3.11" (
echo Python version is set to 3.11
set DOWNLOAD_URL=https://www.python.org/ftp/python/3.11.9/python-3.11.9-arm64.exe
if "%PYTHON_VERSION%"=="Python312" (
echo Python version is set to Python312
set DOWNLOAD_URL="https://www.python.org/ftp/python/3.12.7/python-3.12.7-arm64.exe"
) else if "%PYTHON_VERSION%"=="Python311" (
echo Python version is set to Python311
set DOWNLOAD_URL="https://www.python.org/ftp/python/3.11.9/python-3.11.9-arm64.exe"
) else (
echo DESIRED_PYTHON not defined, Python version is set to 3.12
set DOWNLOAD_URL=https://www.python.org/ftp/python/3.12.7/python-3.12.7-arm64.exe
echo PYTHON_VERSION not defined, Python version is set to Python312
set DOWNLOAD_URL="https://www.python.org/ftp/python/3.12.7/python-3.12.7-arm64.exe"
)
set INSTALLER_FILE=%DOWNLOADS_DIR%\python-installer.exe
:: Download installer
echo Downloading Python...
curl -L -o "%INSTALLER_FILE%" "%DOWNLOAD_URL%"
curl -L -o "%INSTALLER_FILE%" %DOWNLOAD_URL%
:: Install Python
echo Installing Python...

View File

@ -14,7 +14,7 @@ where python
:: install dependencies
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install pytest numpy protobuf expecttest hypothesis
pip install pytest numpy
:: find file name for pytorch wheel
for /f "delims=" %%f in ('dir /b "%PYTORCH_FINAL_PACKAGE_DIR%" ^| findstr "torch-"') do set "TORCH_WHEEL_FILENAME=%PYTORCH_FINAL_PACKAGE_DIR%\%%f"

View File

@ -1,6 +1,8 @@
@echo off
setlocal
set "ORIG_PATH=%PATH%"
if "%PACKAGE_TYPE%" == "wheel" goto wheel
if "%PACKAGE_TYPE%" == "libtorch" goto libtorch
@ -8,7 +10,21 @@ echo "unknown package type"
exit /b 1
:wheel
call %PYTORCH_ROOT%\.ci\pytorch\windows\arm64\bootstrap_tests.bat
echo "install wheel package"
echo Running pip install...
pip install -q --pre numpy protobuf
echo Error level after pip install: %ERRORLEVEL%
if errorlevel 1 exit /b 1
for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.whl') do pip install "%%i"
if errorlevel 1 exit /b 1
goto smoke_test
:smoke_test
python -c "import torch"
if ERRORLEVEL 1 exit /b 1
echo Running python rnn_smoke.py...
python %PYTORCH_ROOT%\.ci\pytorch\test_example_code\rnn_smoke_win_arm64.py
@ -23,12 +39,10 @@ goto end
:libtorch
echo "install and test libtorch"
if not exist tmp mkdir tmp
for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *-latest.zip') do C:\Windows\System32\tar.exe -xf "%%i" -C tmp
for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *-latest.zip') do tar -xf "%%i" -C tmp
if ERRORLEVEL 1 exit /b 1
pushd tmp
pushd tmp\libtorch
set VC_VERSION_LOWER=14
set VC_VERSION_UPPER=36
@ -46,4 +60,6 @@ if ERRORLEVEL 1 exit /b 1
.\simple-torch-test.exe
if ERRORLEVEL 1 exit /b 1
:end
:end
set "PATH=%ORIG_PATH%"
popd

View File

@ -71,20 +71,11 @@ if "%DESIRED_PYTHON%" == "3.13" %PYTHON_EXEC% -m pip install --pre numpy==2.1.2
if "%DESIRED_PYTHON%" == "3.12" %PYTHON_EXEC% -m pip install --pre numpy==2.0.2 protobuf
if "%DESIRED_PYTHON%" == "3.11" %PYTHON_EXEC% -m pip install --pre numpy==2.0.2 protobuf
if "%DESIRED_PYTHON%" == "3.10" %PYTHON_EXEC% -m pip install --pre numpy==2.0.2 protobuf
if "%DESIRED_PYTHON%" == "3.9" %PYTHON_EXEC% -m pip install --pre numpy==2.0.2 protobuf networkx
if "%DESIRED_PYTHON%" == "3.9" %PYTHON_EXEC% -m pip install --pre numpy==2.0.2 protobuf
if errorlevel 1 exit /b 1
if "%PYTORCH_BUILD_VERSION:dev=%" NEQ "%PYTORCH_BUILD_VERSION%" (
set "CHANNEL=nightly"
) else (
set "CHANNEL=test"
)
set "EXTRA_INDEX= "
if "%CUDA_VERSION%" == "xpu" set "EXTRA_INDEX=--index-url https://download.pytorch.org/whl/%CHANNEL%/xpu"
for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.whl') do %PYTHON_EXEC% -m pip install "%%i" %EXTRA_INDEX%
for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.whl') do %PYTHON_EXEC% -m pip install "%%i"
if errorlevel 1 exit /b 1
goto smoke_test

View File

@ -47,9 +47,9 @@ set XPU_EXTRA_INSTALLED=0
set XPU_EXTRA_UNINSTALL=0
if not [%XPU_VERSION%]==[] if [%XPU_VERSION%]==[2025.0] (
set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/9d6d6c17-ca2d-4735-9331-99447e4a1280/intel-deep-learning-essentials-2025.0.1.28_offline.exe
set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/efc86abd-cb77-452e-a03f-a741895b8ece/intel-deep-learning-essentials-2025.0.0.336_offline.exe
set XPU_BUNDLE_PRODUCT_NAME=intel.oneapi.win.deep-learning-essentials.product
set XPU_BUNDLE_VERSION=2025.0.1+20
set XPU_BUNDLE_VERSION=2025.0.0+335
set XPU_BUNDLE_INSTALLED=0
set XPU_BUNDLE_UNINSTALL=0
set XPU_EXTRA_URL=NULL

View File

@ -31,9 +31,9 @@ fi
export DOCKER_IMAGE=${DOCKER_IMAGE:-}
if [[ -z "$DOCKER_IMAGE" ]]; then
if [[ "$DESIRED_CUDA" == cpu ]]; then
export DOCKER_IMAGE="pytorch/manylinux2_28:cpu"
export DOCKER_IMAGE="pytorch/manylinux:cpu"
else
export DOCKER_IMAGE="pytorch/manylinux2_28-builder:${DESIRED_CUDA:2}"
export DOCKER_IMAGE="pytorch/manylinux-builder:${DESIRED_CUDA:2}"
fi
fi
@ -74,12 +74,6 @@ TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt)
# Here PYTORCH_EXTRA_INSTALL_REQUIREMENTS is already set for the all the wheel builds hence append TRITON_CONSTRAINT
TRITON_CONSTRAINT="platform_system == 'Linux' and platform_machine == 'x86_64'"
# CUDA 12.8 builds have triton for Linux and Linux aarch64 binaries.
if [[ "$DESIRED_CUDA" == cu128 ]]; then
TRITON_CONSTRAINT="platform_system == 'Linux'"
fi
if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" && ! "$PYTORCH_BUILD_VERSION" =~ .*xpu.* ]]; then
TRITON_REQUIREMENT="triton==${TRITON_VERSION}; ${TRITON_CONSTRAINT}"
if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then
@ -104,11 +98,11 @@ if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_B
fi
# Set triton via PYTORCH_EXTRA_INSTALL_REQUIREMENTS for triton xpu package
if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*xpu.* ]]; then
TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}"
if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*xpu.* && $(uname) == "Linux" ]]; then
TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}; ${TRITON_CONSTRAINT}"
if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then
TRITON_SHORTHASH=$(cut -c1-8 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton-xpu.txt)
TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}+git${TRITON_SHORTHASH}"
TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}+git${TRITON_SHORTHASH}; ${TRITON_CONSTRAINT}"
fi
if [[ -z "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" ]]; then
export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${TRITON_REQUIREMENT}"

View File

@ -55,16 +55,12 @@ s3_upload() {
s3_upload_dir="${s3_root_dir}/${UPLOAD_SUBFOLDER}/"
fi
(
cache_control_flag=""
if [[ "${UPLOAD_CHANNEL}" = "test" ]]; then
cache_control_flag="--cache-control='no-cache,no-store,must-revalidate'"
fi
for pkg in ${PKG_DIR}/*.${extension}; do
(
set -x
shm_id=$(sha256sum "${pkg}" | awk '{print $1}')
${AWS_S3_CP} --no-progress --acl public-read "${pkg}" "${s3_upload_dir}" \
--metadata "checksum-sha256=${shm_id}" ${cache_control_flag}
--metadata "checksum-sha256=${shm_id}"
)
done
)

View File

@ -1,22 +0,0 @@
#!/bin/bash
set -eux -o pipefail
source "${BINARY_ENV_FILE:-/c/w/env}"
mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR"
export USE_SCCACHE=1
export SCCACHE_IGNORE_SERVER_IO_ERROR=1
echo "Free space on filesystem before build:"
df -h
export NIGHTLIES_PYTORCH_ROOT="$PYTORCH_ROOT"
if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
pytorch/.ci/pytorch/windows/arm64/build_libtorch.bat
elif [[ "$PACKAGE_TYPE" == 'wheel' ]]; then
pytorch/.ci/pytorch/windows/arm64/build_pytorch.bat
fi
echo "Free space on filesystem after build:"
df -h

View File

@ -1,6 +0,0 @@
#!/bin/bash
set -eux -o pipefail
source "${BINARY_ENV_FILE:-/c/w/env}"
pytorch/.ci/pytorch/windows/arm64/smoke_test.bat

View File

@ -13,7 +13,6 @@ export VC_YEAR=2022
if [[ "$DESIRED_CUDA" == 'xpu' ]]; then
export USE_SCCACHE=0
export XPU_VERSION=2025.0
export XPU_ENABLE_KINETO=1
fi
echo "Free space on filesystem before build:"

View File

@ -12,7 +12,6 @@ bugprone-*,
-bugprone-macro-parentheses,
-bugprone-lambda-function-name,
-bugprone-reserved-identifier,
-bugprone-return-const-ref-from-parameter,
-bugprone-swapped-arguments,
clang-analyzer-core.*,
clang-analyzer-cplusplus.*,
@ -25,7 +24,6 @@ cppcoreguidelines-*,
-cppcoreguidelines-avoid-non-const-global-variables,
-cppcoreguidelines-interfaces-global-init,
-cppcoreguidelines-macro-usage,
-cppcoreguidelines-macro-to-enum,
-cppcoreguidelines-owning-memory,
-cppcoreguidelines-pro-bounds-array-to-pointer-decay,
-cppcoreguidelines-pro-bounds-constant-array-index,
@ -57,7 +55,6 @@ modernize-*,
-modernize-use-trailing-return-type,
-modernize-use-nodiscard,
performance-*,
-performance-enum-size,
readability-container-size-empty,
readability-delete-null-pointer,
readability-duplicate-include

View File

@ -38,7 +38,6 @@ per-file-ignores =
torchgen/api/types/__init__.py: F401,F403
torchgen/executorch/api/types/__init__.py: F401,F403
test/dynamo/test_higher_order_ops.py: B950
test/dynamo/test_error_messages.py: B950
torch/testing/_internal/dynamo_test_failures.py: B950
# TOR901 is only for test, we want to ignore it for everything else.
# It's not easy to configure this without affecting other per-file-ignores,

View File

@ -1,13 +1,8 @@
self-hosted-runner:
labels:
# GitHub hosted runner that actionlint doesn't recognize because actionlint version (1.6.21) is too old
- ubuntu-24.04
# GitHub hosted x86 Linux runners
# TODO: Cleanup mentions of linux.20_04 when upgrade to linux.24_04 is complete
- linux.20_04.4x
- linux.20_04.16x
- linux.24_04.4x
- linux.24_04.16x
# Organization-wide AWS Linux Runners
- linux.large
- linux.2xlarge
@ -15,6 +10,7 @@ self-hosted-runner:
- linux.9xlarge.ephemeral
- am2.linux.9xlarge.ephemeral
- linux.12xlarge
- linux.12xlarge.ephemeral
- linux.24xlarge
- linux.24xlarge.ephemeral
- linux.arm64.2xlarge
@ -46,13 +42,10 @@ self-hosted-runner:
- windows.8xlarge.nvidia.gpu
- windows.8xlarge.nvidia.gpu.nonephemeral
- windows.g5.4xlarge.nvidia.gpu
# Windows ARM64 runners
- windows-11-arm64
# Organization-wide AMD hosted runners
- linux.rocm.gpu
- linux.rocm.gpu.2
- linux.rocm.gpu.4
- rocm-docker
# Repo-specific Apple hosted runners
- macos-m1-ultra
- macos-m2-14

View File

@ -23,44 +23,9 @@ runs:
id: check_container_runner
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
- name: Set up parallel fetch and clean workspace
id: first-clean
continue-on-error: true
- name: Clean workspace
shell: bash
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
env:
NO_SUDO: ${{ inputs.no-sudo }}
run: |
# Use all available CPUs for fetching
cd "${GITHUB_WORKSPACE}"
git config --global fetch.parallel 0
git config --global submodule.fetchJobs 0
# Clean workspace. The default checkout action should also do this, but
# do it here as well just in case
if [[ -d .git ]]; then
if [ -z "${NO_SUDO}" ]; then
sudo git clean -ffdx
else
git clean -ffdx
fi
fi
- name: Checkout PyTorch
id: first-checkout-attempt
continue-on-error: true
uses: actions/checkout@v4
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
# --depth=1 for speed, manually fetch history and other refs as necessary
fetch-depth: ${{ inputs.fetch-depth }}
submodules: ${{ inputs.submodules }}
show-progress: false
- name: Clean workspace (try again)
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' &&
(steps.first-clean.outcome != 'success' || steps.first-checkout-attempt.outcome != 'success') }}
shell: bash
env:
NO_SUDO: ${{ inputs.no-sudo }}
run: |
@ -75,11 +40,11 @@ runs:
fi
mkdir "${GITHUB_WORKSPACE}"
- name: Checkout PyTorch (try again)
- name: Checkout PyTorch
uses: actions/checkout@v4
if: ${{ steps.first-clean.outcome != 'success' || steps.first-checkout-attempt.outcome != 'success' }}
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
# --depth=1 for speed, manually fetch history and other refs as necessary
fetch-depth: ${{ inputs.fetch-depth }}
submodules: ${{ inputs.submodules }}
show-progress: false

View File

@ -15,6 +15,7 @@ runs:
-e BINARY_ENV_FILE \
-e BUILD_ENVIRONMENT \
-e DESIRED_CUDA \
-e DESIRED_DEVTOOLSET \
-e DESIRED_PYTHON \
-e GITHUB_ACTIONS \
-e GPU_ARCH_TYPE \

View File

@ -1 +1 @@
318bace01aebc1f82ae13d0d133fcf9fede73383
f084f34bbb743fada85f66b0ed8041387565e69c

2
.github/labeler.yml vendored
View File

@ -98,7 +98,7 @@
- test/distributed/**
- torch/testing/_internal/distributed/**
"release notes: distributed (checkpoint)":
"module: distributed_checkpoint":
- torch/distributed/checkpoint/**
- test/distributed/checkpoint/**

View File

@ -334,7 +334,6 @@
- XiaobingSuper
- jgong5
- mingfeima
- EikanWang
mandatory_checks_name:
- EasyCLA
- Lint
@ -367,7 +366,6 @@
- jgong5
- vfdev-5
- leslie-fang-intel
- EikanWang
mandatory_checks_name:
- EasyCLA
- Lint
@ -381,7 +379,6 @@
approved_by:
- leslie-fang-intel
- jgong5
- EikanWang
mandatory_checks_name:
- EasyCLA
- Lint

View File

@ -7,7 +7,6 @@ ciflow_push_tags:
- ciflow/inductor
- ciflow/inductor-periodic
- ciflow/inductor-rocm
- ciflow/inductor-perf-test-nightly-rocm
- ciflow/inductor-perf-compare
- ciflow/inductor-micro-benchmark
- ciflow/inductor-micro-benchmark-cpu-x86
@ -17,7 +16,6 @@ ciflow_push_tags:
- ciflow/nightly
- ciflow/periodic
- ciflow/rocm
- ciflow/rocm-mi300
- ciflow/s390
- ciflow/slow
- ciflow/trunk

View File

@ -5,7 +5,7 @@
# functorch/docs/requirements.txt
# .ci/docker/requirements-ci.txt
boto3==1.35.42
jinja2==3.1.6
jinja2==3.1.5
lintrunner==0.10.7
ninja==1.10.0.post1
nvidia-ml-py==11.525.84

View File

@ -123,7 +123,7 @@ def main() -> None:
parser = ArgumentParser("Build Triton binaries")
parser.add_argument("--release", action="store_true")
parser.add_argument(
"--device", type=str, default="cuda", choices=["cuda", "rocm", "xpu", "aarch64"]
"--device", type=str, default="cuda", choices=["cuda", "rocm", "xpu"]
)
parser.add_argument("--py-version", type=str)
parser.add_argument("--commit-hash", type=str)

View File

@ -16,15 +16,16 @@ from typing import Optional
# NOTE: Also update the CUDA sources in tools/nightly.py when changing this list
CUDA_ARCHES = ["11.8", "12.6", "12.8"]
CUDA_STABLE = "12.6"
CUDA_ARCHES = ["11.8", "12.4", "12.6", "12.8"]
CUDA_ARCHES_FULL_VERSION = {
"11.8": "11.8.0",
"12.4": "12.4.1",
"12.6": "12.6.3",
"12.8": "12.8.0",
}
CUDA_ARCHES_CUDNN_VERSION = {
"11.8": "9",
"12.4": "9",
"12.6": "9",
"12.8": "9",
}
@ -34,11 +35,13 @@ ROCM_ARCHES = ["6.2.4", "6.3"]
XPU_ARCHES = ["xpu"]
CPU_CXX11_ABI_ARCH = ["cpu-cxx11-abi"]
CPU_AARCH64_ARCH = ["cpu-aarch64"]
CPU_S390X_ARCH = ["cpu-s390x"]
CUDA_AARCH64_ARCHES = ["12.8-aarch64"]
CUDA_AARCH64_ARCHES = ["12.6-aarch64", "12.8-aarch64"]
PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
@ -55,6 +58,21 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
"nvidia-nccl-cu11==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'"
),
"12.4": (
"nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'"
),
"12.6": (
"nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | "
@ -66,7 +84,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
"nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'"
@ -75,30 +93,26 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
"nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cudnn-cu12==9.8.0.87; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | "
"nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'"
),
"xpu": (
"intel-cmplr-lib-rt==2025.0.4; platform_system == 'Linux' | "
"intel-cmplr-lib-ur==2025.0.4; platform_system == 'Linux' | "
"intel-cmplr-lic-rt==2025.0.4; platform_system == 'Linux' | "
"intel-sycl-rt==2025.0.4; platform_system == 'Linux' | "
"intel-cmplr-lib-rt==2025.0.5; platform_system == 'Windows' | "
"intel-cmplr-lib-ur==2025.0.5; platform_system == 'Windows' | "
"intel-cmplr-lic-rt==2025.0.5; platform_system == 'Windows' | "
"intel-sycl-rt==2025.0.5; platform_system == 'Windows' | "
"intel-cmplr-lib-rt==2025.0.2 | "
"intel-cmplr-lib-ur==2025.0.2 | "
"intel-cmplr-lic-rt==2025.0.2 | "
"intel-sycl-rt==2025.0.2 | "
"tcmlib==1.2.0 | "
"umf==0.9.1 | "
"intel-pti==0.10.1"
"intel-pti==0.10.0"
),
}
@ -144,6 +158,8 @@ def arch_type(arch_version: str) -> str:
return "rocm"
elif arch_version in XPU_ARCHES:
return "xpu"
elif arch_version in CPU_CXX11_ABI_ARCH:
return "cpu-cxx11-abi"
elif arch_version in CPU_AARCH64_ARCH:
return "cpu-aarch64"
elif arch_version in CPU_S390X_ARCH:
@ -172,23 +188,31 @@ WHEEL_CONTAINER_IMAGES = {
},
"xpu": f"pytorch/manylinux2_28-builder:xpu-{DEFAULT_TAG}",
"cpu": f"pytorch/manylinux2_28-builder:cpu-{DEFAULT_TAG}",
"cpu-cxx11-abi": f"pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-{DEFAULT_TAG}",
"cpu-aarch64": f"pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-{DEFAULT_TAG}",
"cpu-s390x": f"pytorch/manylinuxs390x-builder:cpu-s390x-{DEFAULT_TAG}",
}
CXX11_ABI = "cxx11-abi"
RELEASE = "release"
DEBUG = "debug"
LIBTORCH_CONTAINER_IMAGES: dict[str, str] = {
LIBTORCH_CONTAINER_IMAGES: dict[tuple[str, str], str] = {
**{
gpu_arch: f"pytorch/libtorch-cxx11-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
(
gpu_arch,
CXX11_ABI,
): f"pytorch/libtorch-cxx11-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
for gpu_arch in CUDA_ARCHES
},
**{
gpu_arch: f"pytorch/libtorch-cxx11-builder:rocm{gpu_arch}-{DEFAULT_TAG}"
(
gpu_arch,
CXX11_ABI,
): f"pytorch/libtorch-cxx11-builder:rocm{gpu_arch}-{DEFAULT_TAG}"
for gpu_arch in ROCM_ARCHES
},
"cpu": f"pytorch/libtorch-cxx11-builder:cpu-{DEFAULT_TAG}",
("cpu", CXX11_ABI): f"pytorch/libtorch-cxx11-builder:cpu-{DEFAULT_TAG}",
}
FULL_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13", "3.13t"]
@ -198,6 +222,7 @@ def translate_desired_cuda(gpu_arch_type: str, gpu_arch_version: str) -> str:
return {
"cpu": "cpu",
"cpu-aarch64": "cpu",
"cpu-cxx11-abi": "cpu-cxx11-abi",
"cpu-s390x": "cpu",
"cuda": f"cu{gpu_arch_version.replace('.', '')}",
"cuda-aarch64": f"cu{gpu_arch_version.replace('-aarch64', '').replace('.', '')}",
@ -212,7 +237,7 @@ def list_without(in_list: list[str], without: list[str]) -> list[str]:
def generate_libtorch_matrix(
os: str,
release_type: str,
abi_version: str,
arches: Optional[list[str]] = None,
libtorch_variants: Optional[list[str]] = None,
) -> list[dict[str, str]]:
@ -221,8 +246,14 @@ def generate_libtorch_matrix(
if os == "linux":
arches += CUDA_ARCHES
arches += ROCM_ARCHES
# skip CUDA 12.8 builds for libtorch
if "12.8" in arches:
arches.remove("12.8")
elif os == "windows":
arches += CUDA_ARCHES
# skip CUDA 12.8 builds on Windows
if "12.8" in arches:
arches.remove("12.8")
if libtorch_variants is None:
libtorch_variants = [
"shared-with-deps",
@ -234,6 +265,9 @@ def generate_libtorch_matrix(
ret: list[dict[str, str]] = []
for arch_version in arches:
for libtorch_variant in libtorch_variants:
# one of the values in the following list must be exactly
# CXX11_ABI, but the precise value of the other one doesn't
# matter
gpu_arch_type = arch_type(arch_version)
gpu_arch_version = "" if arch_version == "cpu" else arch_version
# ROCm builds without-deps failed even in ROCm runners; skip for now
@ -246,15 +280,16 @@ def generate_libtorch_matrix(
"desired_cuda": translate_desired_cuda(
gpu_arch_type, gpu_arch_version
),
"libtorch_config": release_type,
"libtorch_variant": libtorch_variant,
"libtorch_config": abi_version if os == "windows" else "",
"devtoolset": abi_version if os != "windows" else "",
"container_image": (
LIBTORCH_CONTAINER_IMAGES[arch_version]
if os not in ("windows", "windows-arm64")
LIBTORCH_CONTAINER_IMAGES[(arch_version, abi_version)]
if os != "windows"
else ""
),
"package_type": "libtorch",
"build_name": f"libtorch-{gpu_arch_type}{gpu_arch_version}-{libtorch_variant}-{release_type}".replace(
"build_name": f"libtorch-{gpu_arch_type}{gpu_arch_version}-{libtorch_variant}-{abi_version}".replace(
".", "_"
),
}
@ -280,9 +315,12 @@ def generate_wheels_matrix(
# Define default compute archivectures
arches = ["cpu"]
if os == "linux":
arches += CUDA_ARCHES + ROCM_ARCHES + XPU_ARCHES
arches += CPU_CXX11_ABI_ARCH + CUDA_ARCHES + ROCM_ARCHES + XPU_ARCHES
elif os == "windows":
arches += CUDA_ARCHES + XPU_ARCHES
# skip CUDA 12.8 builds on Windows until available
if "12.8" in arches:
arches.remove("12.8")
elif os == "linux-aarch64":
# Separate new if as the CPU type is different and
# uses different build/test scripts
@ -299,6 +337,7 @@ def generate_wheels_matrix(
gpu_arch_version = (
""
if arch_version == "cpu"
or arch_version == "cpu-cxx11-abi"
or arch_version == "cpu-aarch64"
or arch_version == "cpu-s390x"
or arch_version == "xpu"
@ -310,7 +349,7 @@ def generate_wheels_matrix(
continue
if use_split_build and (
arch_version not in ["12.6", "12.8", "11.8", "cpu"] or os != "linux"
arch_version not in ["12.6", "12.4", "11.8", "cpu"] or os != "linux"
):
raise RuntimeError(
"Split build is only supported on linux with cuda 12*, 11.8, and cpu.\n"
@ -321,26 +360,26 @@ def generate_wheels_matrix(
# cuda linux wheels require PYTORCH_EXTRA_INSTALL_REQUIREMENTS to install
if (
arch_version in ["12.8", "12.6", "11.8"]
arch_version in ["12.8", "12.6", "12.4", "11.8"]
and os == "linux"
or arch_version in CUDA_AARCH64_ARCHES
):
desired_cuda = translate_desired_cuda(gpu_arch_type, gpu_arch_version)
ret.append(
{
"python_version": python_version,
"gpu_arch_type": gpu_arch_type,
"gpu_arch_version": gpu_arch_version,
"desired_cuda": desired_cuda,
"desired_cuda": translate_desired_cuda(
gpu_arch_type, gpu_arch_version
),
"use_split_build": "True" if use_split_build else "False",
"devtoolset": "cxx11-abi",
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
"package_type": package_type,
"pytorch_extra_install_requirements": (
PYTORCH_EXTRA_INSTALL_REQUIREMENTS[
f"{desired_cuda[2:4]}.{desired_cuda[4:]}" # for cuda-aarch64: cu126 -> 12.6
]
if os == "linux-aarch64"
else PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version]
PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version]
if os != "linux-aarch64"
else ""
),
"build_name": (
f"{package_type}-py{python_version}-{gpu_arch_type}"
@ -350,8 +389,8 @@ def generate_wheels_matrix(
), # include special case for aarch64 build, remove the -aarch64 postfix
}
)
# Special build building to use on Colab. Python 3.11 for 12.6 CUDA
if python_version == "3.11" and arch_version == CUDA_STABLE:
# Special build building to use on Colab. Python 3.11 for 12.4 CUDA
if python_version == "3.11" and arch_version == "12.4":
ret.append(
{
"python_version": python_version,
@ -361,6 +400,7 @@ def generate_wheels_matrix(
gpu_arch_type, gpu_arch_version
),
"use_split_build": "True" if use_split_build else "False",
"devtoolset": "",
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
"package_type": package_type,
"pytorch_extra_install_requirements": "",
@ -379,6 +419,12 @@ def generate_wheels_matrix(
gpu_arch_type, gpu_arch_version
),
"use_split_build": "True" if use_split_build else "False",
"devtoolset": (
"cxx11-abi"
if (arch_version in ["cpu-cxx11-abi", "cpu-aarch64"])
or os == "linux"
else ""
),
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
"package_type": package_type,
"build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace(
@ -387,7 +433,7 @@ def generate_wheels_matrix(
"pytorch_extra_install_requirements": (
PYTORCH_EXTRA_INSTALL_REQUIREMENTS["xpu"]
if gpu_arch_type == "xpu"
else PYTORCH_EXTRA_INSTALL_REQUIREMENTS[CUDA_STABLE]
else PYTORCH_EXTRA_INSTALL_REQUIREMENTS["12.4"]
if os != "linux"
else ""
),
@ -399,4 +445,5 @@ def generate_wheels_matrix(
validate_nccl_dep_consistency("12.8")
validate_nccl_dep_consistency("12.6")
validate_nccl_dep_consistency("12.4")
validate_nccl_dep_consistency("11.8")

View File

@ -54,6 +54,7 @@ class BinaryBuildWorkflow:
# Optional fields
build_environment: str = ""
abi_version: str = ""
ciflow_config: CIFlowConfig = field(default_factory=CIFlowConfig)
is_scheduled: str = ""
branches: str = "nightly"
@ -63,7 +64,12 @@ class BinaryBuildWorkflow:
use_split_build: bool = False
def __post_init__(self) -> None:
self.build_environment = f"{self.os}-binary-{self.package_type}"
if self.abi_version:
self.build_environment = (
f"{self.os}-binary-{self.package_type}-{self.abi_version}"
)
else:
self.build_environment = f"{self.os}-binary-{self.package_type}"
if self.use_split_build:
# added to distinguish concurrency groups
self.build_environment += "-split"
@ -90,7 +96,6 @@ class BinaryBuildWorkflow:
class OperatingSystem:
LINUX = "linux"
WINDOWS = "windows"
WINDOWS_ARM64 = "windows-arm64"
MACOS = "macos"
MACOS_ARM64 = "macos-arm64"
LINUX_AARCH64 = "linux-aarch64"
@ -127,9 +132,10 @@ LINUX_BINARY_BUILD_WORFKLOWS = [
BinaryBuildWorkflow(
os=OperatingSystem.LINUX,
package_type="libtorch",
abi_version=generate_binary_build_matrix.CXX11_ABI,
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
OperatingSystem.LINUX,
generate_binary_build_matrix.RELEASE,
generate_binary_build_matrix.CXX11_ABI,
libtorch_variants=["shared-with-deps"],
),
ciflow_config=CIFlowConfig(
@ -145,7 +151,7 @@ LINUX_BINARY_SMOKE_WORKFLOWS = [
package_type="manywheel",
build_configs=generate_binary_build_matrix.generate_wheels_matrix(
OperatingSystem.LINUX,
arches=["11.8", "12.6", "12.8"],
arches=["11.8", "12.4", "12.6", "12.8"],
python_versions=["3.9"],
),
branches="main",
@ -169,9 +175,10 @@ LINUX_BINARY_SMOKE_WORKFLOWS = [
BinaryBuildWorkflow(
os=OperatingSystem.LINUX,
package_type="libtorch",
abi_version=generate_binary_build_matrix.CXX11_ABI,
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
OperatingSystem.LINUX,
generate_binary_build_matrix.RELEASE,
generate_binary_build_matrix.CXX11_ABI,
arches=["cpu"],
libtorch_variants=["shared-with-deps"],
),
@ -194,6 +201,7 @@ WINDOWS_BINARY_BUILD_WORKFLOWS = [
BinaryBuildWorkflow(
os=OperatingSystem.WINDOWS,
package_type="libtorch",
abi_version=generate_binary_build_matrix.RELEASE,
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
OperatingSystem.WINDOWS,
generate_binary_build_matrix.RELEASE,
@ -207,6 +215,7 @@ WINDOWS_BINARY_BUILD_WORKFLOWS = [
BinaryBuildWorkflow(
os=OperatingSystem.WINDOWS,
package_type="libtorch",
abi_version=generate_binary_build_matrix.DEBUG,
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
OperatingSystem.WINDOWS,
generate_binary_build_matrix.DEBUG,
@ -223,6 +232,7 @@ WINDOWS_BINARY_SMOKE_WORKFLOWS = [
BinaryBuildWorkflow(
os=OperatingSystem.WINDOWS,
package_type="libtorch",
abi_version=generate_binary_build_matrix.RELEASE,
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
OperatingSystem.WINDOWS,
generate_binary_build_matrix.RELEASE,
@ -237,6 +247,7 @@ WINDOWS_BINARY_SMOKE_WORKFLOWS = [
BinaryBuildWorkflow(
os=OperatingSystem.WINDOWS,
package_type="libtorch",
abi_version=generate_binary_build_matrix.DEBUG,
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
OperatingSystem.WINDOWS,
generate_binary_build_matrix.DEBUG,
@ -250,57 +261,14 @@ WINDOWS_BINARY_SMOKE_WORKFLOWS = [
),
]
WINDOWS_ARM64_BINARY_BUILD_WORKFLOWS = [
BinaryBuildWorkflow(
os=OperatingSystem.WINDOWS_ARM64,
package_type="wheel",
build_configs=generate_binary_build_matrix.generate_wheels_matrix(
OperatingSystem.WINDOWS_ARM64,
arches=["cpu"],
python_versions=["3.12"],
),
ciflow_config=CIFlowConfig(
labels={LABEL_CIFLOW_BINARIES, LABEL_CIFLOW_BINARIES_WHEEL},
isolated_workflow=True,
),
),
BinaryBuildWorkflow(
os=OperatingSystem.WINDOWS_ARM64,
package_type="libtorch",
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
OperatingSystem.WINDOWS_ARM64,
generate_binary_build_matrix.RELEASE,
arches=["cpu"],
libtorch_variants=["shared-with-deps"],
),
ciflow_config=CIFlowConfig(
labels={LABEL_CIFLOW_BINARIES, LABEL_CIFLOW_BINARIES_LIBTORCH},
isolated_workflow=True,
),
),
BinaryBuildWorkflow(
os=OperatingSystem.WINDOWS_ARM64,
package_type="libtorch",
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
OperatingSystem.WINDOWS_ARM64,
generate_binary_build_matrix.DEBUG,
arches=["cpu"],
libtorch_variants=["shared-with-deps"],
),
ciflow_config=CIFlowConfig(
labels={LABEL_CIFLOW_BINARIES, LABEL_CIFLOW_BINARIES_LIBTORCH},
isolated_workflow=True,
),
),
]
MACOS_BINARY_BUILD_WORKFLOWS = [
BinaryBuildWorkflow(
os=OperatingSystem.MACOS_ARM64,
package_type="libtorch",
abi_version=generate_binary_build_matrix.CXX11_ABI,
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
OperatingSystem.MACOS,
generate_binary_build_matrix.RELEASE,
generate_binary_build_matrix.CXX11_ABI,
libtorch_variants=["shared-with-deps"],
),
cross_compile_arm64=False,
@ -387,10 +355,6 @@ def main() -> None:
jinja_env.get_template("windows_binary_build_workflow.yml.j2"),
WINDOWS_BINARY_SMOKE_WORKFLOWS,
),
(
jinja_env.get_template("windows_arm64_binary_build_workflow.yml.j2"),
WINDOWS_ARM64_BINARY_BUILD_WORKFLOWS,
),
(
jinja_env.get_template("macos_binary_build_workflow.yml.j2"),
MACOS_BINARY_BUILD_WORKFLOWS,

View File

@ -1,30 +0,0 @@
#!/usr/bin/env python3
"""Helper script - Return CI variables such as stable cuda, min python version, etc."""
import argparse
import sys
def main(args: list[str]) -> None:
import generate_binary_build_matrix
parser = argparse.ArgumentParser()
parser.add_argument(
"--cuda-stable-version",
action="store_true",
help="get cuda stable version",
)
parser.add_argument(
"--min-python-version",
action="store_true",
help="get min supported python version",
)
options = parser.parse_args(args)
if options.cuda_stable_version:
return print(generate_binary_build_matrix.CUDA_STABLE)
if options.min_python_version:
return print(generate_binary_build_matrix.FULL_PYTHON_VERSIONS[0])
if __name__ == "__main__":
main(sys.argv[1:])

View File

@ -57,10 +57,10 @@ def gh_fetch_url_and_headers(
print(
f"""{url}
Rate limit exceeded:
Used: {err.headers["X-RateLimit-Used"]}
Limit: {err.headers["X-RateLimit-Limit"]}
Remaining: {err.headers["X-RateLimit-Remaining"]}
Resets at: {err.headers["x-RateLimit-Reset"]}"""
Used: {err.headers['X-RateLimit-Used']}
Limit: {err.headers['X-RateLimit-Limit']}
Remaining: {err.headers['X-RateLimit-Remaining']}
Resets at: {err.headers['x-RateLimit-Reset']}"""
)
else:
print(f"Error fetching {url} {err}")

View File

@ -63,9 +63,9 @@ def gh_get_labels(org: str, repo: str) -> list[str]:
update_labels(labels, info)
last_page = get_last_page_num_from_header(header)
assert last_page > 0, (
"Error reading header info to determine total number of pages of labels"
)
assert (
last_page > 0
), "Error reading header info to determine total number of pages of labels"
for page_number in range(2, last_page + 1): # skip page 1
_, info = request_for_labels(prefix + f"&page={page_number}")
update_labels(labels, info)

View File

@ -1,6 +1,11 @@
#!/usr/bin/env bash
set -ex
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
eval "$(command conda 'shell.bash' 'hook' 2> /dev/null)"
conda activate "${CONDA_ENV}"
# Use uv to speed up lintrunner init
python3 -m pip install uv==0.1.45

View File

@ -33,7 +33,7 @@ class PRIdentifier(str):
__slots__ = ()
def __new__(cls, value: str) -> "PRIdentifier":
md5 = hashlib.md5(value.encode("utf-8"), usedforsecurity=False).hexdigest()
md5 = hashlib.md5(value.encode("utf-8")).hexdigest()
return super().__new__(cls, md5)

View File

@ -5,50 +5,6 @@ FROM --platform=linux/amd64 docker.io/ubuntu:24.04 as ld-prefix
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get -y install ca-certificates libicu74 libssl3
# Patched podman
FROM --platform=linux/s390x docker.io/ubuntu:24.04 as podman
ENV DEBIAN_FRONTEND=noninteractive
RUN sed -i 's/^Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/ubuntu.sources
RUN apt-get update && \
apt-get install -y \
cmake \
curl \
devscripts \
dpkg-dev \
gdb \
less \
make \
python3 \
python3-pip \
quilt \
rsync \
software-properties-common \
stress-ng \
vim \
nano \
wget && \
apt-get build-dep -y podman && \
apt-get source podman
COPY podman-patches/podman-25245.patch /tmp/podman-25245.patch
COPY podman-patches/podman-25102-backport.patch /tmp/podman-25102-backport.patch
# import and apply patches
# patches:
# https://github.com/containers/podman/pull/25102
# https://github.com/containers/podman/pull/25245
RUN cd /libpod-* && \
quilt import /tmp/podman-25245.patch && quilt push && \
quilt import /tmp/podman-25102-backport.patch && quilt push && \
dch -i "Fix podman deadlock and add option to clean up build leftovers" && \
/bin/rm /tmp/podman-25245.patch /tmp/podman-25102-backport.patch
# build patched podman
RUN cd /libpod-* && \
debuild -i -us -uc -b && \
/bin/rm /podman-remote_*.deb && \
mkdir /tmp/podman && cp -v /podman*.deb /tmp/podman
# Main image.
FROM --platform=linux/s390x docker.io/ubuntu:24.04
@ -89,11 +45,7 @@ COPY fs/ /
RUN chmod +x /usr/bin/actions-runner /usr/bin/entrypoint
# install podman
# RUN apt-get update && apt -y install podman podman-docker
# install patched podman
COPY --from=podman /tmp/podman /tmp/podman
RUN apt-get update && apt -y install /tmp/podman/*.deb && /bin/rm -rfv /tmp/podman
RUN apt -y install podman podman-docker
# amd64 Github Actions Runner.
RUN useradd -m actions-runner
@ -113,7 +65,7 @@ RUN virtualenv --system-site-packages venv
#
COPY --chown=actions-runner:actions-runner manywheel-s390x.tar /home/actions-runner/manywheel-s390x.tar
RUN curl -L https://github.com/actions/runner/releases/download/v2.322.0/actions-runner-linux-x64-2.322.0.tar.gz | tar -xz
RUN curl -L https://github.com/actions/runner/releases/download/v2.317.0/actions-runner-linux-x64-2.317.0.tar.gz | tar -xz
ENTRYPOINT ["/usr/bin/entrypoint"]
CMD ["/usr/bin/actions-runner"]

View File

@ -1,358 +0,0 @@
diff --git a/cmd/podman/system/prune.go b/cmd/podman/system/prune.go
index f7cf7b551..739f87cde 100644
--- a/cmd/podman/system/prune.go
+++ b/cmd/podman/system/prune.go
@@ -48,6 +48,7 @@ func init() {
flags.BoolVarP(&force, "force", "f", false, "Do not prompt for confirmation. The default is false")
flags.BoolVarP(&pruneOptions.All, "all", "a", false, "Remove all unused data")
flags.BoolVar(&pruneOptions.External, "external", false, "Remove container data in storage not controlled by podman")
+ flags.BoolVar(&pruneOptions.Build, "build", false, "Remove build containers")
flags.BoolVar(&pruneOptions.Volume, "volumes", false, "Prune volumes")
filterFlagName := "filter"
flags.StringArrayVar(&filters, filterFlagName, []string{}, "Provide filter values (e.g. 'label=<key>=<value>')")
@@ -64,8 +65,12 @@ func prune(cmd *cobra.Command, args []string) error {
volumeString = `
- all volumes not used by at least one container`
}
-
- fmt.Printf(createPruneWarningMessage(pruneOptions), volumeString, "Are you sure you want to continue? [y/N] ")
+ buildString := ""
+ if pruneOptions.Build {
+ buildString = `
+ - all build containers`
+ }
+ fmt.Printf(createPruneWarningMessage(pruneOptions), volumeString, buildString, "Are you sure you want to continue? [y/N] ")
answer, err := reader.ReadString('\n')
if err != nil {
@@ -124,7 +129,7 @@ func createPruneWarningMessage(pruneOpts entities.SystemPruneOptions) string {
if pruneOpts.All {
return `WARNING! This command removes:
- all stopped containers
- - all networks not used by at least one container%s
+ - all networks not used by at least one container%s%s
- all images without at least one container associated with them
- all build cache
@@ -132,7 +137,7 @@ func createPruneWarningMessage(pruneOpts entities.SystemPruneOptions) string {
}
return `WARNING! This command removes:
- all stopped containers
- - all networks not used by at least one container%s
+ - all networks not used by at least one container%s%s
- all dangling images
- all dangling build cache
diff --git a/docs/source/markdown/podman-system-prune.1.md b/docs/source/markdown/podman-system-prune.1.md
index 52f9ec1c7..95099d018 100644
--- a/docs/source/markdown/podman-system-prune.1.md
+++ b/docs/source/markdown/podman-system-prune.1.md
@@ -7,20 +7,28 @@ podman\-system\-prune - Remove all unused pods, containers, images, networks, an
**podman system prune** [*options*]
## DESCRIPTION
-**podman system prune** removes all unused containers (both dangling and unreferenced), pods, networks, and optionally, volumes from local storage.
+**podman system prune** removes all unused containers (both dangling and unreferenced), build containers, pods, networks, and optionally, volumes from local storage.
Use the **--all** option to delete all unused images. Unused images are dangling images as well as any image that does not have any containers based on it.
By default, volumes are not removed to prevent important data from being deleted if there is currently no container using the volume. Use the **--volumes** flag when running the command to prune volumes as well.
+By default, build containers are not removed to prevent interference with builds in progress. Use the **--build** flag when running the command to remove build containers as well.
+
## OPTIONS
#### **--all**, **-a**
Recursively remove all unused pods, containers, images, networks, and volume data. (Maximum 50 iterations.)
+#### **--build**
+
+Removes any build containers that were created during the build, but were not removed because the build was unexpectedly terminated.
+
+Note: **This is not safe operation and should be executed only when no builds are in progress. It can interfere with builds in progress.**
+
#### **--external**
-Removes all leftover container storage files from local storage not managed by Podman. In normal circumstances, no such data exists, but in case of an unclean shutdown, the Podman database may be corrupted and cause this.
+Tries to clean up remainders of previous containers or layers that are not references in the storage json files. These can happen in the case of unclean shutdowns or regular restarts in transient storage mode.
However, when using transient storage mode, the Podman database does not persist. This means containers leave the writable layers on disk after a reboot. When using a transient store, it is recommended that the **podman system prune --external** command is run during boot.
diff --git a/libpod/runtime.go b/libpod/runtime.go
index 986e40f60..609fbba57 100644
--- a/libpod/runtime.go
+++ b/libpod/runtime.go
@@ -33,6 +33,7 @@ import (
"github.com/containers/podman/v4/libpod/lock"
"github.com/containers/podman/v4/libpod/plugin"
"github.com/containers/podman/v4/libpod/shutdown"
+ "github.com/containers/podman/v4/pkg/domain/entities/reports"
"github.com/containers/podman/v4/pkg/rootless"
"github.com/containers/podman/v4/pkg/systemd"
"github.com/containers/podman/v4/pkg/util"
@@ -1250,3 +1251,52 @@ func (r *Runtime) LockConflicts() (map[uint32][]string, []uint32, error) {
return toReturn, locksHeld, nil
}
+
+// Exists checks whether a file or directory exists at the given path.
+// If the path is a symlink, the symlink is followed.
+func Exists(path string) error {
+ // It uses unix.Faccessat which is a faster operation compared to os.Stat for
+ // simply checking the existence of a file.
+ err := unix.Faccessat(unix.AT_FDCWD, path, unix.F_OK, 0)
+ if err != nil {
+ return &os.PathError{Op: "faccessat", Path: path, Err: err}
+ }
+ return nil
+}
+
+// PruneBuildContainers removes any build containers that were created during the build,
+// but were not removed because the build was unexpectedly terminated.
+//
+// Note: This is not safe operation and should be executed only when no builds are in progress. It can interfere with builds in progress.
+func (r *Runtime) PruneBuildContainers() ([]*reports.PruneReport, error) {
+ stageContainersPruneReports := []*reports.PruneReport{}
+
+ containers, err := r.store.Containers()
+ if err != nil {
+ return stageContainersPruneReports, err
+ }
+ for _, container := range containers {
+ path, err := r.store.ContainerDirectory(container.ID)
+ if err != nil {
+ return stageContainersPruneReports, err
+ }
+ if err := Exists(filepath.Join(path, "buildah.json")); err != nil {
+ continue
+ }
+
+ report := &reports.PruneReport{
+ Id: container.ID,
+ }
+ size, err := r.store.ContainerSize(container.ID)
+ if err != nil {
+ report.Err = err
+ }
+ report.Size = uint64(size)
+
+ if err := r.store.DeleteContainer(container.ID); err != nil {
+ report.Err = errors.Join(report.Err, err)
+ }
+ stageContainersPruneReports = append(stageContainersPruneReports, report)
+ }
+ return stageContainersPruneReports, nil
+}
diff --git a/pkg/api/handlers/libpod/system.go b/pkg/api/handlers/libpod/system.go
index 70d4493f8..7c129b1ba 100644
--- a/pkg/api/handlers/libpod/system.go
+++ b/pkg/api/handlers/libpod/system.go
@@ -22,6 +22,7 @@ func SystemPrune(w http.ResponseWriter, r *http.Request) {
All bool `schema:"all"`
Volumes bool `schema:"volumes"`
External bool `schema:"external"`
+ Build bool `schema:"build"`
}{}
if err := decoder.Decode(&query, r.URL.Query()); err != nil {
@@ -43,6 +44,7 @@ func SystemPrune(w http.ResponseWriter, r *http.Request) {
Volume: query.Volumes,
Filters: *filterMap,
External: query.External,
+ Build: query.Build,
}
report, err := containerEngine.SystemPrune(r.Context(), pruneOptions)
if err != nil {
diff --git a/pkg/bindings/system/types.go b/pkg/bindings/system/types.go
index 89e093f68..b4a4ff064 100644
--- a/pkg/bindings/system/types.go
+++ b/pkg/bindings/system/types.go
@@ -18,6 +18,7 @@ type PruneOptions struct {
Filters map[string][]string
Volumes *bool
External *bool
+ Build *bool
}
// VersionOptions are optional options for getting version info
diff --git a/pkg/bindings/system/types_prune_options.go b/pkg/bindings/system/types_prune_options.go
index d00498520..5f3bd652c 100644
--- a/pkg/bindings/system/types_prune_options.go
+++ b/pkg/bindings/system/types_prune_options.go
@@ -76,3 +76,18 @@ func (o *PruneOptions) GetExternal() bool {
}
return *o.External
}
+
+// WithBuild set field Build to given value
+func (o *PruneOptions) WithBuild(value bool) *PruneOptions {
+ o.Build = &value
+ return o
+}
+
+// GetBuild returns value of field Build
+func (o *PruneOptions) GetBuild() bool {
+ if o.Build == nil {
+ var z bool
+ return z
+ }
+ return *o.Build
+}
diff --git a/pkg/domain/entities/system.go b/pkg/domain/entities/system.go
index 473db3530..f6938652a 100644
--- a/pkg/domain/entities/system.go
+++ b/pkg/domain/entities/system.go
@@ -22,6 +22,7 @@ type SystemPruneOptions struct {
Volume bool
Filters map[string][]string `json:"filters" schema:"filters"`
External bool
+ Build bool
}
// SystemPruneReport provides report after system prune is executed.
diff --git a/pkg/domain/infra/abi/system.go b/pkg/domain/infra/abi/system.go
index 24ee64d29..ea3e5f203 100644
--- a/pkg/domain/infra/abi/system.go
+++ b/pkg/domain/infra/abi/system.go
@@ -150,16 +150,16 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool)
return nil
}
-// SystemPrune removes unused data from the system. Pruning pods, containers, networks, volumes and images.
+// SystemPrune removes unused data from the system. Pruning pods, containers, build container, networks, volumes and images.
func (ic *ContainerEngine) SystemPrune(ctx context.Context, options entities.SystemPruneOptions) (*entities.SystemPruneReport, error) {
var systemPruneReport = new(entities.SystemPruneReport)
if options.External {
- if options.All || options.Volume || len(options.Filters) > 0 {
+ if options.All || options.Volume || len(options.Filters) > 0 || options.Build {
return nil, fmt.Errorf("system prune --external cannot be combined with other options")
}
- err := ic.Libpod.GarbageCollect()
- if err != nil {
+
+ if err := ic.Libpod.GarbageCollect(); err != nil {
return nil, err
}
return systemPruneReport, nil
@@ -170,6 +170,17 @@ func (ic *ContainerEngine) SystemPrune(ctx context.Context, options entities.Sys
filters = append(filters, fmt.Sprintf("%s=%s", k, v[0]))
}
reclaimedSpace := (uint64)(0)
+
+ // Prune Build Containers
+ if options.Build {
+ stageContainersPruneReports, err := ic.Libpod.PruneBuildContainers()
+ if err != nil {
+ return nil, err
+ }
+ reclaimedSpace += reports.PruneReportsSize(stageContainersPruneReports)
+ systemPruneReport.ContainerPruneReports = append(systemPruneReport.ContainerPruneReports, stageContainersPruneReports...)
+ }
+
found := true
for found {
found = false
diff --git a/pkg/domain/infra/tunnel/system.go b/pkg/domain/infra/tunnel/system.go
index fc82e7b2b..142a9fa5c 100644
--- a/pkg/domain/infra/tunnel/system.go
+++ b/pkg/domain/infra/tunnel/system.go
@@ -19,7 +19,7 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool)
// SystemPrune prunes unused data from the system.
func (ic *ContainerEngine) SystemPrune(ctx context.Context, opts entities.SystemPruneOptions) (*entities.SystemPruneReport, error) {
- options := new(system.PruneOptions).WithAll(opts.All).WithVolumes(opts.Volume).WithFilters(opts.Filters).WithExternal(opts.External)
+ options := new(system.PruneOptions).WithAll(opts.All).WithVolumes(opts.Volume).WithFilters(opts.Filters).WithExternal(opts.External).WithBuild(opts.Build)
return system.Prune(ic.ClientCtx, options)
}
diff --git a/test/e2e/prune_test.go b/test/e2e/prune_test.go
index 01e848478..57bd5582d 100644
--- a/test/e2e/prune_test.go
+++ b/test/e2e/prune_test.go
@@ -4,6 +4,8 @@ import (
"fmt"
"os"
"path/filepath"
+ "syscall"
+ "time"
. "github.com/containers/podman/v4/test/utils"
. "github.com/onsi/ginkgo/v2"
@@ -22,6 +24,11 @@ FROM scratch
ENV test1=test1
ENV test2=test2`
+var longBuildImage = fmt.Sprintf(`
+FROM %s
+RUN echo "Hello, World!"
+RUN RUN echo "Please use signal 9 this will never ends" && sleep 10000s`, ALPINE)
+
var _ = Describe("Podman prune", func() {
It("podman container prune containers", func() {
@@ -593,4 +600,63 @@ var _ = Describe("Podman prune", func() {
Expect(err).ToNot(HaveOccurred())
Expect(dirents).To(HaveLen(3))
})
+
+ It("podman system prune --build clean up after terminated build", func() {
+ useCustomNetworkDir(podmanTest, tempdir)
+
+ podmanTest.BuildImage(pruneImage, "alpine_notleaker:latest", "false")
+
+ create := podmanTest.Podman([]string{"create", "--name", "test", BB, "sleep", "10000"})
+ create.WaitWithDefaultTimeout()
+ Expect(create).Should(ExitCleanly())
+
+ containerFilePath := filepath.Join(podmanTest.TempDir, "ContainerFile-podman-leaker")
+ err := os.WriteFile(containerFilePath, []byte(longBuildImage), 0755)
+ Expect(err).ToNot(HaveOccurred())
+
+ build := podmanTest.Podman([]string{"build", "-f", containerFilePath, "-t", "podmanleaker"})
+ // Build will never finish so let's wait for build to ask for SIGKILL to simulate a failed build that leaves stage containers.
+ matchedOutput := false
+ for range 900 {
+ if build.LineInOutputContains("Please use signal 9") {
+ matchedOutput = true
+ build.Signal(syscall.SIGKILL)
+ break
+ }
+ time.Sleep(100 * time.Millisecond)
+ }
+ if !matchedOutput {
+ Fail("Did not match special string in podman build")
+ }
+
+ // Check Intermediate image of stage container
+ none := podmanTest.Podman([]string{"images", "-a"})
+ none.WaitWithDefaultTimeout()
+ Expect(none).Should(ExitCleanly())
+ Expect(none.OutputToString()).Should(ContainSubstring("none"))
+
+ // Check if Container and Stage Container exist
+ count := podmanTest.Podman([]string{"ps", "-aq", "--external"})
+ count.WaitWithDefaultTimeout()
+ Expect(count).Should(ExitCleanly())
+ Expect(count.OutputToStringArray()).To(HaveLen(3))
+
+ prune := podmanTest.Podman([]string{"system", "prune", "--build", "-f"})
+ prune.WaitWithDefaultTimeout()
+ Expect(prune).Should(ExitCleanly())
+
+ // Container should still exist, but no stage containers
+ count = podmanTest.Podman([]string{"ps", "-aq", "--external"})
+ count.WaitWithDefaultTimeout()
+ Expect(count).Should(ExitCleanly())
+ Expect(count.OutputToString()).To(BeEmpty())
+
+ Expect(podmanTest.NumberOfContainers()).To(Equal(0))
+
+ after := podmanTest.Podman([]string{"images", "-a"})
+ after.WaitWithDefaultTimeout()
+ Expect(after).Should(ExitCleanly())
+ Expect(after.OutputToString()).ShouldNot(ContainSubstring("none"))
+ Expect(after.OutputToString()).Should(ContainSubstring("notleaker"))
+ })
})

View File

@ -1,21 +0,0 @@
diff --git a/pkg/rootless/rootless_linux.c b/pkg/rootless/rootless_linux.c
index 4f71d49e5c..3d74af6a6c 100644
--- a/pkg/rootless/rootless_linux.c
+++ b/pkg/rootless/rootless_linux.c
@@ -658,7 +658,7 @@ create_pause_process (const char *pause_pid_file_path, char **argv)
if (pipe (p) < 0)
return -1;
- pid = fork ();
+ pid = syscall_clone (SIGCHLD, NULL);
if (pid < 0)
{
close (p[0]);
@@ -689,7 +689,7 @@ create_pause_process (const char *pause_pid_file_path, char **argv)
close (p[0]);
setsid ();
- pid = fork ();
+ pid = syscall_clone (SIGCHLD, NULL);
if (pid < 0)
_exit (EXIT_FAILURE);

View File

@ -1,97 +0,0 @@
#!/usr/bin/env python3
import os
import re
import sys
sys.path.insert(1, os.path.join(sys.path[0], "..", "..", ".."))
from tools.testing.discover_tests import TESTS
skip_list = [
# these tests fail due to various reasons
"dynamo/test_misc",
"inductor/test_aot_inductor",
"inductor/test_cpu_repro",
"inductor/test_cpu_select_algorithm",
"inductor/test_aot_inductor_arrayref",
"inductor/test_torchinductor_codegen_dynamic_shapes",
"lazy/test_meta_kernel",
"onnx/test_utility_funs",
"profiler/test_profiler",
"test_ao_sparsity",
"test_cpp_extensions_open_device_registration",
"test_jit",
"test_metal",
"test_mps",
"dynamo/test_torchrec",
"inductor/test_aot_inductor_utils",
"inductor/test_coordinate_descent_tuner",
"test_jiterator",
# these tests run long and fail in addition to that
"dynamo/test_dynamic_shapes",
"test_quantization",
"inductor/test_torchinductor",
"inductor/test_torchinductor_dynamic_shapes",
"inductor/test_torchinductor_opinfo",
"test_binary_ufuncs",
"test_unary_ufuncs",
# these tests fail when cuda is not available
"inductor/test_cudacodecache",
"inductor/test_inductor_utils",
"inductor/test_inplacing_pass",
"inductor/test_kernel_benchmark",
"inductor/test_max_autotune",
"inductor/test_move_constructors_to_cuda",
"inductor/test_multi_kernel",
"inductor/test_pattern_matcher",
"inductor/test_perf",
"inductor/test_select_algorithm",
"inductor/test_snode_runtime",
"inductor/test_triton_wrapper",
# these tests fail when mkldnn is not available
"inductor/test_custom_post_grad_passes",
"inductor/test_mkldnn_pattern_matcher",
# lacks quantization support
"onnx/test_models_quantized_onnxruntime",
"onnx/test_pytorch_onnx_onnxruntime",
# https://github.com/pytorch/pytorch/issues/102078
"test_decomp",
# https://github.com/pytorch/pytorch/issues/146698
"test_model_exports_to_core_aten",
# runs very long, skip for now
"inductor/test_layout_optim",
"test_fx",
# some false errors
"doctests",
]
skip_list_regex = [
# distributed tests fail randomly
"distributed/.*",
]
all_testfiles = sorted(TESTS)
filtered_testfiles = []
for filename in all_testfiles:
if filename in skip_list:
continue
regex_filtered = False
for regex_string in skip_list_regex:
if re.fullmatch(regex_string, filename):
regex_filtered = True
break
if regex_filtered:
continue
filtered_testfiles.append(filename)
for filename in filtered_testfiles:
print(' "' + filename + '",')

View File

@ -485,7 +485,7 @@ def get_check_run_name_prefix(workflow_run: Any) -> str:
if workflow_run is None:
return ""
else:
return f"{workflow_run['workflow']['name']} / "
return f'{workflow_run["workflow"]["name"]} / '
def is_passing_status(status: Optional[str]) -> bool:
@ -545,7 +545,7 @@ def add_workflow_conclusions(
if not isinstance(checkrun_node, dict):
warn(f"Expected dictionary, but got {type(checkrun_node)}")
continue
checkrun_name = f"{get_check_run_name_prefix(workflow_run)}{checkrun_node['name']}"
checkrun_name = f'{get_check_run_name_prefix(workflow_run)}{checkrun_node["name"]}'
existing_checkrun = workflow_obj.jobs.get(checkrun_name)
if existing_checkrun is None or not is_passing_status(
existing_checkrun.status
@ -819,9 +819,10 @@ class GitHubPR:
cursor=info["reviews"]["pageInfo"]["startCursor"],
)
info = rc["data"]["repository"]["pullRequest"]
reviews = {
author: state for author, state in self._reviews if state != "COMMENTED"
}
reviews = {}
for author, state in self._reviews:
if state != "COMMENTED":
reviews[author] = state
return list(reviews.items())
def get_approved_by(self) -> list[str]:
@ -1223,17 +1224,9 @@ class GitHubPR:
if not self.is_ghstack_pr():
msg = self.gen_commit_message()
pr_branch_name = f"__pull-request-{self.pr_num}__init__"
repo.fetch(self.last_commit()["oid"], pr_branch_name)
repo.fetch(f"pull/{self.pr_num}/head", pr_branch_name)
repo._run_git("merge", "--squash", pr_branch_name)
repo._run_git("commit", f'--author="{self.get_author()}"', "-m", msg)
# Did the PR change since we started the merge?
pulled_sha = repo.show_ref(pr_branch_name)
latest_pr_status = GitHubPR(self.org, self.project, self.pr_num)
if pulled_sha != latest_pr_status.last_commit()["oid"]:
raise RuntimeError(
"PR has been updated since CI checks last passed. Please rerun the merge command."
)
return []
else:
return self.merge_ghstack_into(
@ -1514,36 +1507,6 @@ def checks_to_markdown_bullets(
]
def post_starting_merge_comment(
repo: GitRepo,
pr: GitHubPR,
explainer: TryMergeExplainer,
dry_run: bool,
ignore_current_checks_info: Optional[
list[tuple[str, Optional[str], Optional[int]]]
] = None,
) -> None:
"""Post the initial merge starting message on the PR. Also post a short
message on all PRs in the stack."""
gh_post_pr_comment(
pr.org,
pr.project,
pr.pr_num,
explainer.get_merge_message(ignore_current_checks_info),
dry_run=dry_run,
)
if pr.is_ghstack_pr():
for additional_prs, _ in get_ghstack_prs(repo, pr):
if additional_prs.pr_num != pr.pr_num:
gh_post_pr_comment(
additional_prs.org,
additional_prs.project,
additional_prs.pr_num,
f"Starting merge as part of PR stack under #{pr.pr_num}",
dry_run=dry_run,
)
def manually_close_merged_pr(
pr: GitHubPR,
additional_merged_prs: list[GitHubPR],
@ -2167,7 +2130,13 @@ def merge(
check_for_sev(pr.org, pr.project, skip_mandatory_checks)
if skip_mandatory_checks:
post_starting_merge_comment(repo, pr, explainer, dry_run)
gh_post_pr_comment(
pr.org,
pr.project,
pr.pr_num,
explainer.get_merge_message(),
dry_run=dry_run,
)
return pr.merge_into(
repo,
dry_run=dry_run,
@ -2190,12 +2159,12 @@ def merge(
)
ignore_current_checks_info = failing
post_starting_merge_comment(
repo,
pr,
explainer,
dry_run,
ignore_current_checks_info=ignore_current_checks_info,
gh_post_pr_comment(
pr.org,
pr.project,
pr.pr_num,
explainer.get_merge_message(ignore_current_checks_info),
dry_run=dry_run,
)
start_time = time.time()
@ -2281,8 +2250,7 @@ def merge(
except MandatoryChecksMissingError as ex:
last_exception = str(ex)
print(
f"Merge of https://github.com/{pr.org}/{pr.project}/pull/{pr.pr_num} failed due to: {ex}. Retrying in 5 min",
flush=True,
f"Merge of https://github.com/{pr.org}/{pr.project}/pull/{pr.pr_num} failed due to: {ex}. Retrying in 5 min"
)
time.sleep(5 * 60)
# Finally report timeout back

View File

@ -79,7 +79,7 @@ class TryMergeExplainer:
(
"<details><summary>Advanced Debugging</summary>",
"Check the merge workflow status ",
f'<a href="{os.getenv("GH_RUN_URL")}">here</a>',
f"<a href=\"{os.getenv('GH_RUN_URL')}\">here</a>",
"</details>",
)
)

View File

@ -1,17 +0,0 @@
@echo on
set PYTHON_PREFIX=%PY_VERS:.=%
set PYTHON_PREFIX=py%PYTHON_PREFIX:;=;py%
call .ci/pytorch/win-test-helpers/installation-helpers/activate_miniconda3.bat
:: Create a new conda environment
if "%PY_VERS%" == "3.13t" (
call conda create -n %PYTHON_PREFIX% -y -c=conda-forge python-freethreading python=3.13
) else (
call conda create -n %PYTHON_PREFIX% -y -c=conda-forge python=%PY_VERS%
)
call conda run -n %PYTHON_PREFIX% pip install wheel pybind11 certifi cython cmake setuptools==72.1.0 ninja
dir "%VC_INSTALL_PATH%"
call "%VC_INSTALL_PATH%\VC\Auxiliary\Build\vcvarsall.bat" x64
call conda run -n %PYTHON_PREFIX% python .github/scripts/build_triton_wheel.py --device=%BUILD_DEVICE% %RELEASE%

View File

@ -1,35 +0,0 @@
#Requires -RunAsAdministrator
# Enable long paths on Windows
Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1
$VC_VERSION_major = [int] ${env:VC_VERSION}.split(".")[0]
$VC_DOWNLOAD_LINK = "https://aka.ms/vs/$VC_VERSION_major/release/vs_BuildTools.exe"
$VC_INSTALL_ARGS = @("--nocache","--quiet","--norestart","--wait", "--add Microsoft.VisualStudio.Workload.VCTools",
"--add Microsoft.Component.MSBuild",
"--add Microsoft.VisualStudio.Component.Roslyn.Compiler",
"--add Microsoft.VisualStudio.Component.TextTemplating",
"--add Microsoft.VisualStudio.Component.VC.CoreBuildTools",
"--add Microsoft.VisualStudio.Component.VC.CoreIde",
"--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest",
"--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core",
"--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64",
"--add Microsoft.VisualStudio.Component.Windows11SDK.22621")
echo "Downloading Visual Studio installer from $VC_DOWNLOAD_LINK."
curl.exe --retry 3 -kL $VC_DOWNLOAD_LINK --output vs_installer.exe
if ($LASTEXITCODE -ne 0) {
echo "Download of the VS ${env:VC_YEAR} Version ${env:VC_VERSION} installer failed"
exit 1
}
$InstallationPath = ${env:VC_INSTALL_PATH}
$VC_INSTALL_ARGS = "--installPath `"$InstallationPath`"" + " " + $VC_INSTALL_ARGS
echo "Installing Visual Studio version ${env:VC_VERSION} in $InstallationPath."
$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VC_INSTALL_ARGS -NoNewWindow -Wait -PassThru
Remove-Item -Path vs_installer.exe -Force
$exitCode = $process.ExitCode
if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
echo "VS ${env:VC_YEAR} installer exited with code $exitCode, which should be one of [0, 3010]."
exit 1
}

View File

@ -4,7 +4,6 @@
{%- set download_artifact_action = "actions/download-artifact@v4.1.7" -%}
{%- set timeout_minutes = 240 -%}
{%- set timeout_minutes_windows_binary = 300 -%}
{%- macro concurrency(build_environment) -%}
concurrency:

View File

@ -111,10 +111,7 @@ jobs:
ALPINE_IMAGE: "docker.io/s390x/alpine"
{%- elif config["gpu_arch_type"] == "rocm" %}
runs_on: linux.rocm.gpu
{%- elif config["gpu_arch_type"] == "cuda" and config["gpu_arch_version"] == "12.8" %}
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8 build needs sm_70+ runner
{%- elif config["gpu_arch_type"] == "cuda" and config["gpu_arch_version"] != "12.8"%}
{%- elif config["gpu_arch_type"] == "cuda" %}
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runs_on: linux.4xlarge.nvidia.gpu
{%- else %}

View File

@ -25,6 +25,9 @@
DOCKER_IMAGE: !{{ config["container_image"] }}
{%- endif %}
{%- if config["package_type"] == "manywheel" %}
{%- if config["devtoolset"] %}
DESIRED_DEVTOOLSET: !{{ config["devtoolset"] }}
{%- endif %}
{%- if config.use_split_build is defined %}
use_split_build: !{{ config["use_split_build"] }}
{%- endif %}
@ -34,6 +37,9 @@
LIBTORCH_CONFIG: !{{ config["libtorch_config"] }}
{%- endif %}
LIBTORCH_VARIANT: !{{ config["libtorch_variant"] }}
{%- if config["devtoolset"] %}
DESIRED_DEVTOOLSET: !{{ config["devtoolset"] }}
{%- endif %}
{%- if is_windows %}
# This is a dummy value for libtorch to work correctly with our batch scripts
# without this value pip does not get installed for some reason

View File

@ -1,197 +0,0 @@
{% import 'common.yml.j2' as common %}
{% import 'upload.yml.j2' as upload %}
{%- block name -%}
# Template is at: .github/templates/windows_arm64_binary_build_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: !{{ build_environment }}
{%- endblock %}
{%- macro set_runner_specific_vars() -%}
# NOTE: These environment variables are put here so that they can be applied on every job equally
# They are also here because setting them at a workflow level doesn't give us access to the
# runner.temp variable, which we need.
- name: Populate binary env
shell: cmd
run: |
echo BINARY_ENV_FILE=%RUNNER_TEMP%/env>> %GITHUB_ENV%
echo PYTORCH_FINAL_PACKAGE_DIR=%RUNNER_TEMP%/artifacts>> %GITHUB_ENV%
echo WIN_PACKAGE_WORK_DIR=%RUNNER_TEMP%>> %GITHUB_ENV%
{%- endmacro %}
on:
push:
branches:
- !{{ branches }}
{%- if branches == "nightly" %}
tags:
# NOTE: Binary build pipelines should only get triggered on release candidate builds
# Release candidate tags look like: v1.11.0-rc1
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
{%- endif %}
{%- for label in ciflow_config.labels | sort %}
{%- if loop.first and branches != "nightly" %}
tags:
{%- endif %}
- '!{{ label }}/*'
{%- endfor %}
workflow_dispatch:
env:
BUILD_ENVIRONMENT: !{{ build_environment }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ github.event.pull_request.number }}
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
SKIP_ALL_TESTS: 1
PYTORCH_ROOT: /pytorch
DOWNLOADS_DIR: c:\temp\downloads
DEPENDENCIES_DIR: c:\temp\dependencies
ENABLE_APL: 1
ENABLE_OPENBLAS: 0
MSVC_VERSION : 14.42
AWS_DEFAULT_REGION: us-east-1
jobs:
get-label-type:
if: github.repository_owner == 'pytorch'
name: get-label-type
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
with:
triggering_actor: ${{ github.triggering_actor }}
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
{%- for config in build_configs %}
!{{ config["build_name"] }}-build:
if: ${{ github.repository_owner == 'pytorch' }}
needs: get-label-type
runs-on: "windows-11-arm64"
timeout-minutes: !{{ common.timeout_minutes }}
!{{ upload.binary_env(config, True) }}
{%- if config.pytorch_extra_install_requirements is defined and config.pytorch_extra_install_requirements|d('')|length > 0 %}
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: !{{ config.pytorch_extra_install_requirements }}
{%- endif %}
steps:
!{{ set_runner_specific_vars() }}
- name: Bootstrap folders
shell: cmd
run: |
mkdir "%NIGHTLIES_PYTORCH_ROOT%"
mkdir "%PYTORCH_FINAL_PACKAGE_DIR%"
- name: Git checkout PyTorch
uses: actions/checkout@v4
with:
path: "pytorch"
- name: Bootstrap Build Tools
shell: cmd
run: |
"pytorch/.ci/pytorch/windows/arm64/bootstrap_buildtools.bat"
- name: Bootstrap Git
shell: cmd
run: |
"pytorch/.ci/pytorch/windows/arm64/bootstrap_git.bat"
- name: Remove Pytorch folder
shell: cmd
run: |
rmdir /s /q "pytorch"
- name: Git checkout PyTorch - recursive
uses: actions/checkout@v4
with:
path: "pytorch"
submodules: recursive
- name: Bootstrap Python
shell: cmd
run: |
"pytorch/.ci/pytorch/windows/arm64/bootstrap_python.bat"
- name: Bootstrap APL
shell: cmd
run: |
"pytorch/.ci/pytorch/windows/arm64/bootstrap_apl.bat"
- name: Bootstrap Rust
shell: cmd
run: |
"pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat"
- name: Bootstrap sccache
shell: cmd
run: |
"pytorch/.ci/pytorch/windows/arm64/bootstrap_sccache.bat"
- name: Bootstrap Libuv
shell: cmd
run: |
"pytorch/.ci/pytorch/windows/arm64/bootstrap_libuv.bat"
- name: Populate binary env
shell: bash
run: |
"pytorch/.circleci/scripts/binary_populate_env.sh"
- name: Build PyTorch binary
shell: bash
run: |
"pytorch/.circleci/scripts/binary_windows_arm64_build.sh"
- uses: !{{ common.upload_artifact_action }}
if: always()
with:
name: !{{ config["build_name"] }}
retention-days: 14
if-no-files-found: error
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
!{{ config["build_name"] }}-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs:
- !{{ config["build_name"] }}-build
- get-label-type
runs-on: "windows-11-arm64"
timeout-minutes: !{{ common.timeout_minutes }}
!{{ upload.binary_env(config, True) }}
steps:
!{{ set_runner_specific_vars() }}
- uses: !{{ common.download_artifact_action }}
name: Download Build Artifacts
with:
name: !{{ config["build_name"] }}
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
- name: Git checkout PyTorch
uses: actions/checkout@v4
with:
path: "pytorch"
- name: Bootstrap Git
shell: cmd
run: |
"pytorch/.ci/pytorch/windows/arm64/bootstrap_git.bat"
- name: Remove Pytorch folder
shell: cmd
run: |
rmdir /s /q "pytorch"
- name: Git checkout PyTorch
uses: actions/checkout@v4
with:
path: "pytorch"
submodules: recursive
- name: Bootstrap APL
shell: cmd
run: |
"pytorch/.ci/pytorch/windows/arm64/bootstrap_apl.bat"
- name: Bootstrap Python
shell: cmd
run: |
"pytorch/.ci/pytorch/windows/arm64/bootstrap_python.bat"
- name: Bootstrap Build Tools
shell: cmd
run: |
"pytorch/.ci/pytorch/windows/arm64/bootstrap_buildtools.bat"
- name: Bootstrap Rust
shell: cmd
run: |
"pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat"
- name: Populate binary env
shell: bash
run: |
"pytorch/.circleci/scripts/binary_populate_env.sh"
- name: Test PyTorch binary
shell: bash
run: |
"pytorch/.circleci/scripts/binary_windows_arm64_test.sh"
{%- if branches == "nightly" %}
!{{ upload.upload_binaries(config, True) }}
{%- endif %}
{%- endfor %}

View File

@ -71,7 +71,7 @@ jobs:
{%- else %}
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
{%- endif %}
timeout-minutes: !{{ common.timeout_minutes_windows_binary }}
timeout-minutes: !{{ common.timeout_minutes }}
!{{ upload.binary_env(config, True) }}
{%- if config.pytorch_extra_install_requirements is defined and config.pytorch_extra_install_requirements|d('')|length > 0 %}
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: !{{ config.pytorch_extra_install_requirements }}
@ -107,14 +107,10 @@ jobs:
{%- else %}
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge.nonephemeral"
{%- endif %}
{%- else %}
{%- if branches == "nightly" %}
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
{%- else %}
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
{%- endif %}
{%- endif %}
timeout-minutes: !{{ common.timeout_minutes_windows_binary }}
timeout-minutes: !{{ common.timeout_minutes }}
!{{ upload.binary_env(config, True) }}
steps:
!{{ common.setup_ec2_windows() }}

View File

@ -18,7 +18,7 @@ on:
description: prefix for runner label
runs_on:
required: false
default: linux.12xlarge.memory.ephemeral
default: linux.12xlarge.ephemeral
type: string
description: Hardware to run this "build" job on, linux.12xlarge or linux.arm64.2xlarge.
timeout-minutes:
@ -70,6 +70,10 @@ on:
required: false
type: string
description: Desired libtorch variant (for libtorch builds only)
DESIRED_DEVTOOLSET:
required: false
type: string
description: Desired dev toolset
DESIRED_PYTHON:
required: false
type: string
@ -100,6 +104,7 @@ jobs:
SKIP_ALL_TESTS: 1
LIBTORCH_CONFIG: ${{ inputs.LIBTORCH_CONFIG }}
LIBTORCH_VARIANT: ${{ inputs.LIBTORCH_VARIANT }}
DESIRED_DEVTOOLSET: ${{ inputs.DESIRED_DEVTOOLSET }}
DESIRED_PYTHON: ${{ inputs.DESIRED_PYTHON }}
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: ${{ inputs.PYTORCH_EXTRA_INSTALL_REQUIREMENTS }}
ALPINE_IMAGE: ${{ inputs.ALPINE_IMAGE }}
@ -125,6 +130,7 @@ jobs:
echo "SKIP_ALL_TESTS=${{ env.SKIP_ALL_TESTS }}"
echo "LIBTORCH_CONFIG=${{ env.LIBTORCH_CONFIG }}"
echo "LIBTORCH_VARIANT=${{ env.LIBTORCH_VARIANT }}"
echo "DESIRED_DEVTOOLSET=${{ env.DESIRED_DEVTOOLSET }}"
echo "DESIRED_PYTHON=${{ env.DESIRED_PYTHON }}"
echo "PYTORCH_EXTRA_INSTALL_REQUIREMENTS=${{ env.PYTORCH_EXTRA_INSTALL_REQUIREMENTS }}"
echo "ALPINE_IMAGE=${{ env.ALPINE_IMAGE }}"
@ -218,6 +224,7 @@ jobs:
-e BINARY_ENV_FILE \
-e BUILD_ENVIRONMENT \
-e DESIRED_CUDA \
-e DESIRED_DEVTOOLSET \
-e DESIRED_PYTHON \
-e GITHUB_ACTIONS \
-e GPU_ARCH_TYPE \

View File

@ -47,6 +47,10 @@ on:
required: false
type: string
description: Desired libtorch variant (for libtorch builds only)
DESIRED_DEVTOOLSET:
required: false
type: string
description: Desired dev toolset
DESIRED_PYTHON:
required: false
type: string
@ -88,6 +92,7 @@ jobs:
SKIP_ALL_TESTS: 1
LIBTORCH_CONFIG: ${{ inputs.LIBTORCH_CONFIG }}
LIBTORCH_VARIANT: ${{ inputs.LIBTORCH_VARIANT }}
DESIRED_DEVTOOLSET: ${{ inputs.DESIRED_DEVTOOLSET }}
DESIRED_PYTHON: ${{ inputs.DESIRED_PYTHON }}
ALPINE_IMAGE: ${{ inputs.ALPINE_IMAGE }}
AWS_DEFAULT_REGION: us-east-1
@ -113,6 +118,7 @@ jobs:
echo "SKIP_ALL_TESTS=${{ env.SKIP_ALL_TESTS }}"
echo "LIBTORCH_CONFIG=${{ env.LIBTORCH_CONFIG }}"
echo "LIBTORCH_VARIANT=${{ env.LIBTORCH_VARIANT }}"
echo "DESIRED_DEVTOOLSET=${{ env.DESIRED_DEVTOOLSET }}"
echo "DESIRED_PYTHON=${{ env.DESIRED_PYTHON }}"
echo "ALPINE_IMAGE=${{ env.ALPINE_IMAGE }}"

View File

@ -43,6 +43,10 @@ on:
required: false
type: string
description: Desired libtorch variant (for libtorch builds only)
DESIRED_DEVTOOLSET:
required: false
type: string
description: Desired dev toolset
DESIRED_PYTHON:
required: false
type: string
@ -62,6 +66,7 @@ on:
jobs:
upload:
runs-on: ubuntu-22.04
environment: ${{ (github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || startsWith(github.event.ref, 'refs/tags/v'))) && 'conda-aws-upload' || '' }}
container:
image: continuumio/miniconda3:4.12.0
env:
@ -76,6 +81,7 @@ jobs:
SKIP_ALL_TESTS: 1
LIBTORCH_CONFIG: ${{ inputs.LIBTORCH_CONFIG }}
LIBTORCH_VARIANT: ${{ inputs.LIBTORCH_VARIANT }}
DESIRED_DEVTOOLSET: ${{ inputs.DESIRED_DEVTOOLSET }}
DESIRED_PYTHON: ${{ inputs.DESIRED_PYTHON }}
BINARY_ENV_FILE: /tmp/env
GITHUB_TOKEN: ${{ secrets.github-token }}

View File

@ -69,11 +69,13 @@ on:
required: false
type: string
default: ""
max-jobs:
use_split_build:
description: |
Overwrite the number of jobs to use for the build
[Experimental] Build a libtorch only wheel and build pytorch such that
are built from the libtorch wheel.
required: false
type: string
type: boolean
default: false
secrets:
HUGGING_FACE_HUB_TOKEN:
@ -208,7 +210,7 @@ jobs:
OUR_GITHUB_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
MAX_JOBS_OVERRIDE: ${{ inputs.max-jobs }}
USE_SPLIT_BUILD: ${{ inputs.use_split_build }}
run: |
START_TIME=$(date +%s)
if [[ ${BUILD_ENVIRONMENT} == *"s390x"* ]]; then
@ -228,12 +230,6 @@ jobs:
DOCKER_SHELL_CMD=
fi
if [[ ${MAX_JOBS_OVERRIDE} == "" ]]; then
MAX_JOBS="$(nproc --ignore=2)"
else
MAX_JOBS="${MAX_JOBS_OVERRIDE}"
fi
# Leaving 1GB for the runner and other things
TOTAL_AVAILABLE_MEMORY_IN_GB=$(awk '/MemTotal/ { printf "%.3f \n", $2/1024/1024 - 1 }' /proc/meminfo)
# https://docs.docker.com/engine/containers/resource_constraints/#--memory-swap-details, the 3GB swap
@ -245,8 +241,7 @@ jobs:
# shellcheck disable=SC2086
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e MAX_JOBS=${MAX_JOBS} \
-e MAX_JOBS_OVERRIDE \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e AWS_DEFAULT_REGION \
-e PR_NUMBER \
-e SHA1 \
@ -287,7 +282,7 @@ jobs:
- name: Store PyTorch Build Artifacts on S3
uses: seemethere/upload-artifact-s3@v5
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.build-environment != 'linux-s390x-binary-manywheel'
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build && inputs.build-environment != 'linux-s390x-binary-manywheel'
with:
name: ${{ inputs.build-environment }}
retention-days: 14
@ -295,15 +290,34 @@ jobs:
path: artifacts.zip
s3-bucket: ${{ inputs.s3-bucket }}
- name: Store PyTorch Build Artifacts on S3 for split build
uses: seemethere/upload-artifact-s3@v5
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build && inputs.build-environment != 'linux-s390x-binary-manywheel'
with:
name: ${{ inputs.build-environment }}-experimental-split-build
retention-days: 14
if-no-files-found: error
path: artifacts.zip
s3-bucket: ${{ inputs.s3-bucket }}
- name: Store PyTorch Build Artifacts for s390x
uses: actions/upload-artifact@v4
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.build-environment == 'linux-s390x-binary-manywheel'
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build && inputs.build-environment == 'linux-s390x-binary-manywheel'
with:
name: ${{ inputs.build-environment }}
retention-days: 14
if-no-files-found: error
path: artifacts.zip
- name: Store PyTorch Build Artifacts for s390x for split build
uses: actions/upload-artifact@v4
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build && inputs.build-environment == 'linux-s390x-binary-manywheel'
with:
name: ${{ inputs.build-environment }}-experimental-split-build
retention-days: 14
if-no-files-found: error
path: artifacts.zip
- name: Upload sccache stats
if: steps.build.outcome != 'skipped' && inputs.build-environment != 'linux-s390x-binary-manywheel'
uses: ./.github/actions/upload-sccache-stats

View File

@ -33,6 +33,10 @@ on:
default: "3.9"
description: |
The python version to be used. Will be 3.9 by default
environment-file:
required: false
type: string
description: Set the conda environment file used to setup macOS build.
test-matrix:
required: false
type: string
@ -82,12 +86,23 @@ jobs:
fi
- name: Setup miniconda
if: inputs.environment-file == ''
uses: pytorch/test-infra/.github/actions/setup-miniconda@main
with:
python-version: ${{ inputs.python-version }}
environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}
pip-requirements-file: .github/requirements/pip-requirements-${{ runner.os }}.txt
# This option is used when cross-compiling arm64 from x86-64. Specifically, we need arm64 conda
# environment even though the arch is x86-64
- name: Setup miniconda using the provided environment file
if: inputs.environment-file != ''
uses: pytorch/test-infra/.github/actions/setup-miniconda@main
with:
python-version: ${{ inputs.python-version }}
environment-file: ${{ inputs.environment-file }}
pip-requirements-file: .github/requirements/pip-requirements-${{ runner.os }}.txt
- name: Install sccache (only for non-forked PRs, and pushes to trunk)
uses: nick-fields/retry@v3.0.0
if: ${{ github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }}

View File

@ -251,11 +251,6 @@ jobs:
# copy test results back to the mounted workspace, needed sudo, resulting permissions were correct
docker exec -t "${{ env.CONTAINER_NAME }}" sh -c "cd ../pytorch && sudo cp -R test/test-reports ../workspace/test"
- name: Change permissions (only needed for MI300 runners for now)
if: ${{ always() && steps.test.conclusion && contains(matrix.runner, 'mi300') }}
run: |
docker exec -t "${{ env.CONTAINER_NAME }}" sh -c "sudo chown -R 1001:1001 test"
- name: Print remaining test logs
shell: bash
if: always() && steps.test.conclusion

View File

@ -62,12 +62,7 @@ jobs:
if: cancelled()
shell: bash
run: |
# If podman build command is interrupted,
# if podman build command is interrupted,
# it can leave a couple of processes still running.
# Order them to stop for clean shutdown.
# It looks like sometimes some processes remain
# after first cleanup.
# Wait a bit and do cleanup again. It looks like it helps.
docker system prune --build -f || true
sleep 60
# order them to stop for clean shutdown.
docker system prune --build -f || true

Some files were not shown because too many files have changed in this diff Show More