mirror of
https://github.com/pytorch/pytorch.git
synced 2025-11-01 22:14:53 +08:00
Compare commits
1 Commits
zhxchen17/
...
yguo/patch
| Author | SHA1 | Date | |
|---|---|---|---|
| 2d757f6517 |
@ -20,7 +20,7 @@ cd /
|
||||
# on the mounted pytorch repo
|
||||
git config --global --add safe.directory /pytorch
|
||||
pip install -r /pytorch/requirements.txt
|
||||
pip install auditwheel==6.2.0
|
||||
pip install auditwheel
|
||||
if [ "$DESIRED_CUDA" = "cpu" ]; then
|
||||
echo "BASE_CUDA_VERSION is not set. Building cpu wheel."
|
||||
#USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
|
||||
|
||||
@ -39,7 +39,7 @@ def build_ArmComputeLibrary() -> None:
|
||||
"clone",
|
||||
"https://github.com/ARM-software/ComputeLibrary.git",
|
||||
"-b",
|
||||
"v25.02",
|
||||
"v24.09",
|
||||
"--depth",
|
||||
"1",
|
||||
"--shallow-submodules",
|
||||
@ -99,14 +99,10 @@ def update_wheel(wheel_path, desired_cuda) -> None:
|
||||
if "126" in desired_cuda:
|
||||
libs_to_copy += [
|
||||
"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.6",
|
||||
"/usr/local/cuda/lib64/libcufile.so.0",
|
||||
"/usr/local/cuda/lib64/libcufile_rdma.so.1",
|
||||
]
|
||||
elif "128" in desired_cuda:
|
||||
libs_to_copy += [
|
||||
"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.8",
|
||||
"/usr/local/cuda/lib64/libcufile.so.0",
|
||||
"/usr/local/cuda/lib64/libcufile_rdma.so.1",
|
||||
]
|
||||
else:
|
||||
libs_to_copy += [
|
||||
@ -208,7 +204,7 @@ if __name__ == "__main__":
|
||||
else:
|
||||
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 "
|
||||
elif branch.startswith(("v1.", "v2.")):
|
||||
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1 : branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "
|
||||
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "
|
||||
|
||||
if enable_mkldnn:
|
||||
build_ArmComputeLibrary()
|
||||
|
||||
@ -19,11 +19,13 @@ import boto3
|
||||
|
||||
# AMI images for us-east-1, change the following based on your ~/.aws/config
|
||||
os_amis = {
|
||||
"ubuntu18_04": "ami-078eece1d8119409f", # login_name: ubuntu
|
||||
"ubuntu20_04": "ami-052eac90edaa9d08f", # login_name: ubuntu
|
||||
"ubuntu22_04": "ami-0c6c29c5125214c77", # login_name: ubuntu
|
||||
"redhat8": "ami-0698b90665a2ddcf1", # login_name: ec2-user
|
||||
}
|
||||
|
||||
ubuntu18_04_ami = os_amis["ubuntu18_04"]
|
||||
ubuntu20_04_ami = os_amis["ubuntu20_04"]
|
||||
|
||||
|
||||
@ -327,7 +329,7 @@ def build_ArmComputeLibrary(host: RemoteHost, git_clone_flags: str = "") -> None
|
||||
]
|
||||
)
|
||||
host.run_cmd(
|
||||
f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v25.02 {git_clone_flags}"
|
||||
f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v24.09 {git_clone_flags}"
|
||||
)
|
||||
|
||||
host.run_cmd(f"cd ComputeLibrary && scons Werror=1 -j8 {acl_build_flags}")
|
||||
@ -657,6 +659,18 @@ def configure_system(
|
||||
"sudo apt-get install -y python3-dev python3-yaml python3-setuptools python3-wheel python3-pip"
|
||||
)
|
||||
host.run_cmd("pip3 install dataclasses typing-extensions")
|
||||
# Install and switch to gcc-8 on Ubuntu-18.04
|
||||
if not host.using_docker() and host.ami == ubuntu18_04_ami and compiler == "gcc-8":
|
||||
host.run_cmd("sudo apt-get install -y g++-8 gfortran-8")
|
||||
host.run_cmd(
|
||||
"sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 100"
|
||||
)
|
||||
host.run_cmd(
|
||||
"sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-8 100"
|
||||
)
|
||||
host.run_cmd(
|
||||
"sudo update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-8 100"
|
||||
)
|
||||
if not use_conda:
|
||||
print("Installing Cython + numpy from PyPy")
|
||||
host.run_cmd("sudo pip3 install Cython")
|
||||
@ -747,7 +761,7 @@ def start_build(
|
||||
version = host.check_output("cat pytorch/version.txt").strip()[:-2]
|
||||
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1"
|
||||
if branch.startswith(("v1.", "v2.")):
|
||||
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1 : branch.find('-')]} PYTORCH_BUILD_NUMBER=1"
|
||||
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1"
|
||||
if host.using_docker():
|
||||
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
|
||||
if enable_mkldnn:
|
||||
|
||||
@ -1,8 +1,4 @@
|
||||
#!/bin/bash
|
||||
# The purpose of this script is to:
|
||||
# 1. Extract the set of parameters to be used for a docker build based on the provided image name.
|
||||
# 2. Run docker build with the parameters found in step 1.
|
||||
# 3. Run the built image and print out the expected and actual versions of packages installed.
|
||||
|
||||
set -ex
|
||||
|
||||
@ -99,12 +95,13 @@ fi
|
||||
# configuration, so we hardcode everything here rather than do it
|
||||
# from scratch
|
||||
case "$image" in
|
||||
pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc11)
|
||||
CUDA_VERSION=12.6.3
|
||||
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9)
|
||||
CUDA_VERSION=12.4.1
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=11
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
@ -118,6 +115,7 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
@ -132,6 +130,7 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.12
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
@ -146,61 +145,7 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.13
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
INDUCTOR_BENCHMARKS=yes
|
||||
;;
|
||||
pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9)
|
||||
CUDA_VERSION=12.6.3
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9-inductor-benchmarks)
|
||||
CUDA_VERSION=12.6.3
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
INDUCTOR_BENCHMARKS=yes
|
||||
;;
|
||||
pytorch-linux-focal-cuda12.6-cudnn9-py3.12-gcc9-inductor-benchmarks)
|
||||
CUDA_VERSION=12.6.3
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.12
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
INDUCTOR_BENCHMARKS=yes
|
||||
;;
|
||||
pytorch-linux-focal-cuda12.6-cudnn9-py3.13-gcc9-inductor-benchmarks)
|
||||
CUDA_VERSION=12.6.3
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.13
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
@ -215,6 +160,7 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
@ -226,6 +172,7 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
CLANG_VERSION=10
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
CONDA_CMAKE=yes
|
||||
ONNX=yes
|
||||
@ -234,7 +181,10 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
CLANG_VERSION=10
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
VULKAN_SDK_VERSION=1.2.162.1
|
||||
SWIFTSHADER=yes
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
@ -242,7 +192,10 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.11
|
||||
CLANG_VERSION=10
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
VULKAN_SDK_VERSION=1.2.162.1
|
||||
SWIFTSHADER=yes
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
@ -250,6 +203,7 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
@ -258,6 +212,7 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=11
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
ROCM_VERSION=6.2.4
|
||||
NINJA_VERSION=1.9.0
|
||||
@ -272,6 +227,7 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=11
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
ROCM_VERSION=6.3
|
||||
NINJA_VERSION=1.9.0
|
||||
@ -286,6 +242,7 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
GCC_VERSION=11
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
XPU_VERSION=0.5
|
||||
NINJA_VERSION=1.9.0
|
||||
@ -296,6 +253,7 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
GCC_VERSION=11
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
XPU_VERSION=2025.0
|
||||
NINJA_VERSION=1.9.0
|
||||
@ -306,6 +264,7 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
GCC_VERSION=11
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
CONDA_CMAKE=yes
|
||||
@ -319,6 +278,7 @@ case "$image" in
|
||||
CUDNN_VERSION=9
|
||||
CLANG_VERSION=12
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
@ -326,6 +286,7 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
CLANG_VERSION=12
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
@ -346,6 +307,7 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
GCC_VERSION=11
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
CONDA_CMAKE=yes
|
||||
@ -360,7 +322,7 @@ case "$image" in
|
||||
EXECUTORCH=yes
|
||||
;;
|
||||
pytorch-linux-jammy-py3.12-halide)
|
||||
CUDA_VERSION=12.6
|
||||
CUDA_VERSION=12.4
|
||||
ANACONDA_PYTHON_VERSION=3.12
|
||||
GCC_VERSION=11
|
||||
CONDA_CMAKE=yes
|
||||
@ -368,7 +330,7 @@ case "$image" in
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-jammy-py3.12-triton-cpu)
|
||||
CUDA_VERSION=12.6
|
||||
CUDA_VERSION=12.4
|
||||
ANACONDA_PYTHON_VERSION=3.12
|
||||
GCC_VERSION=11
|
||||
CONDA_CMAKE=yes
|
||||
@ -378,19 +340,20 @@ case "$image" in
|
||||
# TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
|
||||
# We will need to update mypy version eventually, but that's for another day. The task
|
||||
# would be to upgrade mypy to 1.0.0 with Python 3.11
|
||||
PYTHON_VERSION=3.9
|
||||
PIP_CMAKE=yes
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
CONDA_CMAKE=yes
|
||||
;;
|
||||
pytorch-linux-jammy-cuda11.8-cudnn9-py3.9-linter)
|
||||
PYTHON_VERSION=3.9
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
CUDA_VERSION=11.8
|
||||
PIP_CMAKE=yes
|
||||
CONDA_CMAKE=yes
|
||||
;;
|
||||
pytorch-linux-jammy-aarch64-py3.10-gcc11)
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=11
|
||||
ACL=yes
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
CONDA_CMAKE=yes
|
||||
# snadampal: skipping llvm src build install because the current version
|
||||
@ -402,6 +365,7 @@ case "$image" in
|
||||
GCC_VERSION=11
|
||||
ACL=yes
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
CONDA_CMAKE=yes
|
||||
# snadampal: skipping llvm src build install because the current version
|
||||
@ -412,6 +376,7 @@ case "$image" in
|
||||
*)
|
||||
# Catch-all for builds that are not hardcoded.
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
echo "image '$image' did not match an existing build configuration"
|
||||
if [[ "$image" == *py* ]]; then
|
||||
@ -467,6 +432,7 @@ docker build \
|
||||
--build-arg "BUILD_ENVIRONMENT=${image}" \
|
||||
--build-arg "PROTOBUF=${PROTOBUF:-}" \
|
||||
--build-arg "LLVMDEV=${LLVMDEV:-}" \
|
||||
--build-arg "DB=${DB:-}" \
|
||||
--build-arg "VISION=${VISION:-}" \
|
||||
--build-arg "UBUNTU_VERSION=${UBUNTU_VERSION}" \
|
||||
--build-arg "CENTOS_VERSION=${CENTOS_VERSION}" \
|
||||
@ -474,12 +440,13 @@ docker build \
|
||||
--build-arg "GLIBC_VERSION=${GLIBC_VERSION}" \
|
||||
--build-arg "CLANG_VERSION=${CLANG_VERSION}" \
|
||||
--build-arg "ANACONDA_PYTHON_VERSION=${ANACONDA_PYTHON_VERSION}" \
|
||||
--build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \
|
||||
--build-arg "GCC_VERSION=${GCC_VERSION}" \
|
||||
--build-arg "CUDA_VERSION=${CUDA_VERSION}" \
|
||||
--build-arg "CUDNN_VERSION=${CUDNN_VERSION}" \
|
||||
--build-arg "TENSORRT_VERSION=${TENSORRT_VERSION}" \
|
||||
--build-arg "GRADLE_VERSION=${GRADLE_VERSION}" \
|
||||
--build-arg "VULKAN_SDK_VERSION=${VULKAN_SDK_VERSION}" \
|
||||
--build-arg "SWIFTSHADER=${SWIFTSHADER}" \
|
||||
--build-arg "CMAKE_VERSION=${CMAKE_VERSION:-}" \
|
||||
--build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
|
||||
--build-arg "KATEX=${KATEX:-}" \
|
||||
@ -489,7 +456,6 @@ docker build \
|
||||
--build-arg "UCX_COMMIT=${UCX_COMMIT}" \
|
||||
--build-arg "UCC_COMMIT=${UCC_COMMIT}" \
|
||||
--build-arg "CONDA_CMAKE=${CONDA_CMAKE}" \
|
||||
--build-arg "PIP_CMAKE=${PIP_CMAKE}" \
|
||||
--build-arg "TRITON=${TRITON}" \
|
||||
--build-arg "TRITON_CPU=${TRITON_CPU}" \
|
||||
--build-arg "ONNX=${ONNX}" \
|
||||
|
||||
@ -55,6 +55,13 @@ RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
|
||||
RUN rm install_protobuf.sh
|
||||
ENV INSTALLED_PROTOBUF ${PROTOBUF}
|
||||
|
||||
# (optional) Install database packages like LMDB and LevelDB
|
||||
ARG DB
|
||||
COPY ./common/install_db.sh install_db.sh
|
||||
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
|
||||
RUN rm install_db.sh
|
||||
ENV INSTALLED_DB ${DB}
|
||||
|
||||
# (optional) Install vision packages like OpenCV
|
||||
ARG VISION
|
||||
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
|
||||
|
||||
@ -1 +1 @@
|
||||
01a22b6f16d117454b7d21ebdc691b0785b84a7f
|
||||
5e4d6b6380d575e48e37e9d987fded4ec588e7bc
|
||||
|
||||
@ -1 +1 @@
|
||||
v2.26.2-1
|
||||
v2.25.1-1
|
||||
|
||||
@ -1 +1 @@
|
||||
83111ab22be6e4a588d184ac45175986a7dde9fc
|
||||
e98b6fcb8df5b44eb0d0addb6767c573d37ba024
|
||||
|
||||
@ -1 +1 @@
|
||||
96316ce50fade7e209553aba4898cd9b82aab83b
|
||||
4b3bb1f8da0ded6ccd572dd1358ef45af5a1befe
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
set -euo pipefail
|
||||
|
||||
readonly version=v25.02
|
||||
readonly version=v24.04
|
||||
readonly src_host=https://github.com/ARM-software
|
||||
readonly src_repo=ComputeLibrary
|
||||
|
||||
|
||||
@ -4,10 +4,16 @@ set -ex
|
||||
|
||||
if [ -n "$CLANG_VERSION" ]; then
|
||||
|
||||
if [[ $UBUNTU_VERSION == 22.04 ]]; then
|
||||
if [[ $CLANG_VERSION == 9 && $UBUNTU_VERSION == 18.04 ]]; then
|
||||
sudo apt-get update
|
||||
# gpg-agent is not available by default on 18.04
|
||||
sudo apt-get install -y --no-install-recommends gpg-agent
|
||||
wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
|
||||
apt-add-repository "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-${CLANG_VERSION} main"
|
||||
elif [[ $UBUNTU_VERSION == 22.04 ]]; then
|
||||
# work around ubuntu apt-get conflicts
|
||||
sudo apt-get -y -f install
|
||||
wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
|
||||
wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
|
||||
if [[ $CLANG_VERSION == 18 ]]; then
|
||||
apt-add-repository "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main"
|
||||
fi
|
||||
@ -35,7 +41,7 @@ if [ -n "$CLANG_VERSION" ]; then
|
||||
# clang's packaging is a little messed up (the runtime libs aren't
|
||||
# added into the linker path), so give it a little help
|
||||
clang_lib=("/usr/lib/llvm-$CLANG_VERSION/lib/clang/"*"/lib/linux")
|
||||
echo "$clang_lib" >/etc/ld.so.conf.d/clang.conf
|
||||
echo "$clang_lib" > /etc/ld.so.conf.d/clang.conf
|
||||
ldconfig
|
||||
|
||||
# Cleanup package manager
|
||||
|
||||
@ -66,7 +66,7 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
|
||||
|
||||
# Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README
|
||||
if [[ $(uname -m) == "aarch64" ]]; then
|
||||
conda_install "openblas==0.3.29=*openmp*"
|
||||
conda_install "openblas==0.3.28=*openmp*"
|
||||
else
|
||||
conda_install "mkl=2021.4.0 mkl-include=2021.4.0"
|
||||
fi
|
||||
|
||||
@ -240,7 +240,7 @@ function prune_126 {
|
||||
}
|
||||
|
||||
function install_128 {
|
||||
CUDNN_VERSION=9.8.0.87
|
||||
CUDNN_VERSION=9.7.1.26
|
||||
echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
|
||||
rm -rf /usr/local/cuda-12.8 /usr/local/cuda
|
||||
# install CUDA 12.8.0 in the same container
|
||||
|
||||
@ -3,8 +3,19 @@
|
||||
|
||||
set -ex
|
||||
|
||||
NCCL_VERSION=v2.26.2-1
|
||||
CUDNN_VERSION=9.8.0.87
|
||||
NCCL_VERSION=v2.21.5-1
|
||||
CUDNN_VERSION=9.5.1.17
|
||||
|
||||
function install_cusparselt_062 {
|
||||
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
|
||||
mkdir tmp_cusparselt && pushd tmp_cusparselt
|
||||
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.6.2.3-archive.tar.xz
|
||||
tar xf libcusparse_lt-linux-sbsa-0.6.2.3-archive.tar.xz
|
||||
cp -a libcusparse_lt-linux-sbsa-0.6.2.3-archive/include/* /usr/local/cuda/include/
|
||||
cp -a libcusparse_lt-linux-sbsa-0.6.2.3-archive/lib/* /usr/local/cuda/lib64/
|
||||
popd
|
||||
rm -rf tmp_cusparselt
|
||||
}
|
||||
|
||||
function install_cusparselt_063 {
|
||||
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
|
||||
@ -17,7 +28,140 @@ function install_cusparselt_063 {
|
||||
rm -rf tmp_cusparselt
|
||||
}
|
||||
|
||||
function install_124 {
|
||||
CUDNN_VERSION=9.1.0.70
|
||||
echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
|
||||
rm -rf /usr/local/cuda-12.4 /usr/local/cuda
|
||||
# install CUDA 12.4.1 in the same container
|
||||
wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux_sbsa.run
|
||||
chmod +x cuda_12.4.1_550.54.15_linux_sbsa.run
|
||||
./cuda_12.4.1_550.54.15_linux_sbsa.run --toolkit --silent
|
||||
rm -f cuda_12.4.1_550.54.15_linux_sbsa.run
|
||||
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.4 /usr/local/cuda
|
||||
|
||||
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
|
||||
mkdir tmp_cudnn && cd tmp_cudnn
|
||||
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz
|
||||
tar xf cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz
|
||||
cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
|
||||
cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf tmp_cudnn
|
||||
|
||||
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
|
||||
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
|
||||
git clone -b ${NCCL_VERSION} --depth 1 https://github.com/NVIDIA/nccl.git
|
||||
cd nccl && make -j src.build
|
||||
cp -a build/include/* /usr/local/cuda/include/
|
||||
cp -a build/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf nccl
|
||||
|
||||
install_cusparselt_063
|
||||
|
||||
ldconfig
|
||||
}
|
||||
|
||||
function prune_124 {
|
||||
echo "Pruning CUDA 12.4"
|
||||
#####################################################################################
|
||||
# CUDA 12.4 prune static libs
|
||||
#####################################################################################
|
||||
export NVPRUNE="/usr/local/cuda-12.4/bin/nvprune"
|
||||
export CUDA_LIB_DIR="/usr/local/cuda-12.4/lib64"
|
||||
|
||||
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
|
||||
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
|
||||
|
||||
if [[ -n "$OVERRIDE_GENCODE" ]]; then
|
||||
export GENCODE=$OVERRIDE_GENCODE
|
||||
fi
|
||||
|
||||
# all CUDA libs except CuDNN and CuBLAS
|
||||
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
|
||||
| xargs -I {} bash -c \
|
||||
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
|
||||
|
||||
# prune CuDNN and CuBLAS
|
||||
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
|
||||
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
|
||||
|
||||
#####################################################################################
|
||||
# CUDA 12.4 prune visual tools
|
||||
#####################################################################################
|
||||
export CUDA_BASE="/usr/local/cuda-12.4/"
|
||||
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/
|
||||
}
|
||||
|
||||
function install_126 {
|
||||
echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
|
||||
rm -rf /usr/local/cuda-12.6 /usr/local/cuda
|
||||
# install CUDA 12.6.3 in the same container
|
||||
wget -q https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda_12.6.3_560.35.05_linux_sbsa.run
|
||||
chmod +x cuda_12.6.3_560.35.05_linux_sbsa.run
|
||||
./cuda_12.6.3_560.35.05_linux_sbsa.run --toolkit --silent
|
||||
rm -f cuda_12.6.3_560.35.05_linux_sbsa.run
|
||||
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.6 /usr/local/cuda
|
||||
|
||||
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
|
||||
mkdir tmp_cudnn && cd tmp_cudnn
|
||||
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz
|
||||
tar xf cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz
|
||||
cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
|
||||
cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf tmp_cudnn
|
||||
|
||||
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
|
||||
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
|
||||
git clone -b ${NCCL_VERSION} --depth 1 https://github.com/NVIDIA/nccl.git
|
||||
cd nccl && make -j src.build
|
||||
cp -a build/include/* /usr/local/cuda/include/
|
||||
cp -a build/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf nccl
|
||||
|
||||
install_cusparselt_063
|
||||
|
||||
ldconfig
|
||||
}
|
||||
|
||||
function prune_126 {
|
||||
echo "Pruning CUDA 12.6"
|
||||
#####################################################################################
|
||||
# CUDA 12.6 prune static libs
|
||||
#####################################################################################
|
||||
export NVPRUNE="/usr/local/cuda-12.6/bin/nvprune"
|
||||
export CUDA_LIB_DIR="/usr/local/cuda-12.6/lib64"
|
||||
|
||||
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
|
||||
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
|
||||
|
||||
if [[ -n "$OVERRIDE_GENCODE" ]]; then
|
||||
export GENCODE=$OVERRIDE_GENCODE
|
||||
fi
|
||||
if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then
|
||||
export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN
|
||||
fi
|
||||
|
||||
# all CUDA libs except CuDNN and CuBLAS
|
||||
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
|
||||
| xargs -I {} bash -c \
|
||||
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
|
||||
|
||||
# prune CuDNN and CuBLAS
|
||||
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
|
||||
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
|
||||
|
||||
#####################################################################################
|
||||
# CUDA 12.6 prune visual tools
|
||||
#####################################################################################
|
||||
export CUDA_BASE="/usr/local/cuda-12.6/"
|
||||
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/
|
||||
}
|
||||
|
||||
function install_128 {
|
||||
CUDNN_VERSION=9.7.1.26
|
||||
echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
|
||||
rm -rf /usr/local/cuda-12.8 /usr/local/cuda
|
||||
# install CUDA 12.8.0 in the same container
|
||||
@ -54,6 +198,10 @@ function install_128 {
|
||||
while test $# -gt 0
|
||||
do
|
||||
case "$1" in
|
||||
12.4) install_124; prune_124
|
||||
;;
|
||||
12.6) install_126; prune_126
|
||||
;;
|
||||
12.8) install_128;
|
||||
;;
|
||||
*) echo "bad argument $1"; exit 1
|
||||
|
||||
@ -5,7 +5,7 @@ if [[ -n "${CUDNN_VERSION}" ]]; then
|
||||
mkdir tmp_cudnn
|
||||
pushd tmp_cudnn
|
||||
if [[ ${CUDA_VERSION:0:4} == "12.8" ]]; then
|
||||
CUDNN_NAME="cudnn-linux-x86_64-9.8.0.87_cuda12-archive"
|
||||
CUDNN_NAME="cudnn-linux-x86_64-9.7.1.26_cuda12-archive"
|
||||
elif [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then
|
||||
CUDNN_NAME="cudnn-linux-x86_64-9.5.1.17_cuda12-archive"
|
||||
elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then
|
||||
|
||||
38
.ci/docker/common/install_db.sh
Executable file
38
.ci/docker/common/install_db.sh
Executable file
@ -0,0 +1,38 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
install_ubuntu() {
|
||||
apt-get update
|
||||
|
||||
# Cleanup
|
||||
apt-get autoclean && apt-get clean
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
}
|
||||
|
||||
install_centos() {
|
||||
# Need EPEL for many packages we depend on.
|
||||
# See http://fedoraproject.org/wiki/EPEL
|
||||
yum --enablerepo=extras install -y epel-release
|
||||
|
||||
# Cleanup
|
||||
yum clean all
|
||||
rm -rf /var/cache/yum
|
||||
rm -rf /var/lib/yum/yumdb
|
||||
rm -rf /var/lib/yum/history
|
||||
}
|
||||
|
||||
# Install base packages depending on the base OS
|
||||
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
|
||||
case "$ID" in
|
||||
ubuntu)
|
||||
install_ubuntu
|
||||
;;
|
||||
centos)
|
||||
install_centos
|
||||
;;
|
||||
*)
|
||||
echo "Unable to determine OS..."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
@ -53,7 +53,7 @@ setup_executorch() {
|
||||
export EXECUTORCH_BUILD_PYBIND=ON
|
||||
export CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON"
|
||||
|
||||
as_jenkins .ci/scripts/setup-linux.sh --build-tool cmake || true
|
||||
as_jenkins .ci/scripts/setup-linux.sh cmake || true
|
||||
popd
|
||||
}
|
||||
|
||||
|
||||
@ -2,6 +2,8 @@
|
||||
|
||||
set -ex
|
||||
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
|
||||
|
||||
if [ -n "${UBUNTU_VERSION}" ]; then
|
||||
apt update
|
||||
apt-get install -y clang doxygen git graphviz nodejs npm libtinfo5
|
||||
@ -13,8 +15,8 @@ chown -R jenkins pytorch
|
||||
|
||||
pushd pytorch
|
||||
# Install all linter dependencies
|
||||
pip install -r requirements.txt
|
||||
lintrunner init
|
||||
pip_install -r requirements.txt
|
||||
conda_run lintrunner init
|
||||
|
||||
# Cache .lintbin directory as part of the Docker image
|
||||
cp -r .lintbin /tmp
|
||||
|
||||
@ -4,15 +4,10 @@ set -ex
|
||||
|
||||
[ -n "$NINJA_VERSION" ]
|
||||
|
||||
arch=$(uname -m)
|
||||
if [ "$arch" == "aarch64" ]; then
|
||||
url="https://github.com/ninja-build/ninja/releases/download/v${NINJA_VERSION}/ninja-linux-aarch64.zip"
|
||||
else
|
||||
url="https://github.com/ninja-build/ninja/releases/download/v${NINJA_VERSION}/ninja-linux.zip"
|
||||
fi
|
||||
url="https://github.com/ninja-build/ninja/releases/download/v${NINJA_VERSION}/ninja-linux.zip"
|
||||
|
||||
pushd /tmp
|
||||
wget --no-verbose --output-document=ninja-linux.zip "$url"
|
||||
unzip ninja-linux.zip -d /usr/local/bin
|
||||
rm -f ninja-linux.zip
|
||||
popd
|
||||
popd
|
||||
|
||||
@ -32,7 +32,7 @@ pip_install coloredlogs packaging
|
||||
|
||||
pip_install onnxruntime==1.18.1
|
||||
pip_install onnx==1.17.0
|
||||
pip_install onnxscript==0.2.2 --no-deps
|
||||
pip_install onnxscript==0.1.0 --no-deps
|
||||
# required by onnxscript
|
||||
pip_install ml_dtypes
|
||||
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
set -ex
|
||||
|
||||
cd /
|
||||
git clone https://github.com/OpenMathLib/OpenBLAS.git -b v0.3.29 --depth 1 --shallow-submodules
|
||||
git clone https://github.com/OpenMathLib/OpenBLAS.git -b v0.3.28 --depth 1 --shallow-submodules
|
||||
|
||||
|
||||
OPENBLAS_BUILD_FLAGS="
|
||||
|
||||
@ -1,18 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
apt-get update
|
||||
# Use deadsnakes in case we need an older python version
|
||||
sudo add-apt-repository ppa:deadsnakes/ppa
|
||||
apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python3-pip python${PYTHON_VERSION}-venv
|
||||
|
||||
# Use a venv because uv and some other package managers don't support --user install
|
||||
ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python
|
||||
python -m venv /var/lib/jenkins/ci_env
|
||||
source /var/lib/jenkins/ci_env/bin/activate
|
||||
|
||||
python -mpip install --upgrade pip
|
||||
python -mpip install -r /opt/requirements-ci.txt
|
||||
if [ -n "${PIP_CMAKE}" ]; then
|
||||
python -mpip install cmake==3.31.6
|
||||
fi
|
||||
@ -8,6 +8,10 @@ ver() {
|
||||
|
||||
install_ubuntu() {
|
||||
apt-get update
|
||||
if [[ $UBUNTU_VERSION == 18.04 ]]; then
|
||||
# gpg-agent is not available by default on 18.04
|
||||
apt-get install -y --no-install-recommends gpg-agent
|
||||
fi
|
||||
if [[ $UBUNTU_VERSION == 20.04 ]]; then
|
||||
# gpg-agent is not available by default on 20.04
|
||||
apt-get install -y --no-install-recommends gpg-agent
|
||||
|
||||
@ -25,9 +25,7 @@ python3 -m pip install meson ninja
|
||||
###########################
|
||||
### clone repo
|
||||
###########################
|
||||
# TEMPORARY FIX: https://gitlab.freedesktop.org/mesa/drm.git is down until 2025/03/22
|
||||
# GIT_SSL_NO_VERIFY=true git clone https://gitlab.freedesktop.org/mesa/drm.git
|
||||
GIT_SSL_NO_VERIFY=true git clone git://anongit.freedesktop.org/mesa/drm
|
||||
GIT_SSL_NO_VERIFY=true git clone https://gitlab.freedesktop.org/mesa/drm.git
|
||||
pushd drm
|
||||
|
||||
###########################
|
||||
@ -117,7 +115,7 @@ index a5007ffc..13fa07fc 100644
|
||||
if (!fp) {
|
||||
- fprintf(stderr, "%s: %s\n", AMDGPU_ASIC_ID_TABLE,
|
||||
- strerror(errno));
|
||||
+ //fprintf(stderr, "amdgpu.ids: No such file or directory\n");
|
||||
+ fprintf(stderr, "amdgpu.ids: No such file or directory\n");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
24
.ci/docker/common/install_swiftshader.sh
Executable file
24
.ci/docker/common/install_swiftshader.sh
Executable file
@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
[ -n "${SWIFTSHADER}" ]
|
||||
|
||||
retry () {
|
||||
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
|
||||
}
|
||||
|
||||
_https_amazon_aws=https://ossci-android.s3.amazonaws.com
|
||||
|
||||
# SwiftShader
|
||||
_swiftshader_dir=/var/lib/jenkins/swiftshader
|
||||
_swiftshader_file_targz=swiftshader-abe07b943-prebuilt.tar.gz
|
||||
mkdir -p $_swiftshader_dir
|
||||
_tmp_swiftshader_targz="/tmp/${_swiftshader_file_targz}"
|
||||
|
||||
curl --silent --show-error --location --fail --retry 3 \
|
||||
--output "${_tmp_swiftshader_targz}" "$_https_amazon_aws/${_swiftshader_file_targz}"
|
||||
|
||||
tar -C "${_swiftshader_dir}" -xzf "${_tmp_swiftshader_targz}"
|
||||
|
||||
export VK_ICD_FILENAMES="${_swiftshader_dir}/build/Linux/vk_swiftshader_icd.json"
|
||||
@ -60,15 +60,15 @@ if [ -n "${UBUNTU_VERSION}" ] && [ -n "${GCC_VERSION}" ] && [[ "${GCC_VERSION}"
|
||||
# Triton needs at least gcc-9 to build
|
||||
apt-get install -y g++-9
|
||||
|
||||
CXX=g++-9 pip_install .
|
||||
CXX=g++-9 pip_install -e .
|
||||
elif [ -n "${UBUNTU_VERSION}" ] && [ -n "${CLANG_VERSION}" ]; then
|
||||
# Triton needs <filesystem> which surprisingly is not available with clang-9 toolchain
|
||||
add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
apt-get install -y g++-9
|
||||
|
||||
CXX=g++-9 pip_install .
|
||||
CXX=g++-9 pip_install -e .
|
||||
else
|
||||
pip_install .
|
||||
pip_install -e .
|
||||
fi
|
||||
|
||||
if [ -n "${CONDA_CMAKE}" ]; then
|
||||
|
||||
24
.ci/docker/common/install_vulkan_sdk.sh
Executable file
24
.ci/docker/common/install_vulkan_sdk.sh
Executable file
@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
[ -n "${VULKAN_SDK_VERSION}" ]
|
||||
|
||||
retry () {
|
||||
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
|
||||
}
|
||||
|
||||
_vulkansdk_dir=/var/lib/jenkins/vulkansdk
|
||||
_tmp_vulkansdk_targz=/tmp/vulkansdk.tar.gz
|
||||
|
||||
curl \
|
||||
--silent \
|
||||
--show-error \
|
||||
--location \
|
||||
--fail \
|
||||
--retry 3 \
|
||||
--output "${_tmp_vulkansdk_targz}" "https://ossci-android.s3.amazonaws.com/vulkansdk-linux-x86_64-${VULKAN_SDK_VERSION}.tar.gz"
|
||||
|
||||
mkdir -p "${_vulkansdk_dir}"
|
||||
tar -C "${_vulkansdk_dir}" -xzf "${_tmp_vulkansdk_targz}" --strip-components 1
|
||||
rm -rf "${_tmp_vulkansdk_targz}"
|
||||
@ -39,7 +39,7 @@ case ${GPU_ARCH_TYPE} in
|
||||
BASE_TARGET=rocm
|
||||
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
|
||||
GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-complete
|
||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx942"
|
||||
DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}"
|
||||
;;
|
||||
*)
|
||||
|
||||
@ -18,14 +18,15 @@ COPY ./common/install_user.sh install_user.sh
|
||||
RUN bash ./install_user.sh && rm install_user.sh
|
||||
|
||||
# Install conda and other packages (e.g., numpy, pytest)
|
||||
ARG PYTHON_VERSION
|
||||
ARG PIP_CMAKE
|
||||
# Put venv into the env vars so users don't need to activate it
|
||||
ENV PATH /var/lib/jenkins/ci_env/bin:$PATH
|
||||
ENV VIRTUAL_ENV /var/lib/jenkins/ci_env
|
||||
COPY requirements-ci.txt /opt/requirements-ci.txt
|
||||
COPY ./common/install_python.sh install_python.sh
|
||||
RUN bash ./install_python.sh && rm install_python.sh /opt/requirements-ci.txt
|
||||
ARG ANACONDA_PYTHON_VERSION
|
||||
ARG CONDA_CMAKE
|
||||
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
|
||||
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
|
||||
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
|
||||
COPY ./common/install_conda.sh install_conda.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ./common/install_magma_conda.sh install_magma_conda.sh
|
||||
RUN bash ./install_conda.sh && rm install_conda.sh install_magma_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
|
||||
|
||||
# Install cuda and cudnn
|
||||
ARG CUDA_VERSION
|
||||
@ -36,10 +37,9 @@ ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
|
||||
|
||||
# Note that Docker build forbids copying file outside the build context
|
||||
COPY ./common/install_linter.sh install_linter.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
RUN bash ./install_linter.sh
|
||||
RUN rm install_linter.sh
|
||||
|
||||
RUN chown -R jenkins:jenkins /var/lib/jenkins/ci_env
|
||||
RUN rm install_linter.sh common_utils.sh
|
||||
|
||||
USER jenkins
|
||||
CMD ["bash"]
|
||||
|
||||
@ -15,18 +15,20 @@ COPY ./common/install_user.sh install_user.sh
|
||||
RUN bash ./install_user.sh && rm install_user.sh
|
||||
|
||||
# Install conda and other packages (e.g., numpy, pytest)
|
||||
ARG PYTHON_VERSION
|
||||
ARG PIP_CMAKE
|
||||
ENV PATH /var/lib/jenkins/ci_env/bin:$PATH
|
||||
ENV VIRTUAL_ENV /var/lib/jenkins/ci_env
|
||||
COPY requirements-ci.txt /opt/requirements-ci.txt
|
||||
COPY ./common/install_python.sh install_python.sh
|
||||
RUN bash ./install_python.sh && rm install_python.sh /opt/requirements-ci.txt
|
||||
ARG ANACONDA_PYTHON_VERSION
|
||||
ARG CONDA_CMAKE
|
||||
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
|
||||
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
|
||||
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
|
||||
COPY ./common/install_conda.sh install_conda.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
|
||||
|
||||
# Note that Docker build forbids copying file outside the build context
|
||||
COPY ./common/install_linter.sh install_linter.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
RUN bash ./install_linter.sh
|
||||
RUN rm install_linter.sh
|
||||
RUN rm install_linter.sh common_utils.sh
|
||||
|
||||
USER jenkins
|
||||
CMD ["bash"]
|
||||
|
||||
153
.ci/docker/manywheel/Dockerfile_2014
Normal file
153
.ci/docker/manywheel/Dockerfile_2014
Normal file
@ -0,0 +1,153 @@
|
||||
# syntax = docker/dockerfile:experimental
|
||||
ARG ROCM_VERSION=3.7
|
||||
ARG BASE_CUDA_VERSION=10.2
|
||||
ARG GPU_IMAGE=nvidia/cuda:${BASE_CUDA_VERSION}-devel-centos7
|
||||
FROM quay.io/pypa/manylinux2014_x86_64 as base
|
||||
|
||||
ENV LC_ALL en_US.UTF-8
|
||||
ENV LANG en_US.UTF-8
|
||||
ENV LANGUAGE en_US.UTF-8
|
||||
|
||||
RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
|
||||
RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
|
||||
RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
|
||||
RUN yum install -y wget curl perl util-linux xz bzip2 git patch which perl zlib-devel
|
||||
RUN yum install -y yum-utils centos-release-scl sudo
|
||||
RUN yum-config-manager --enable rhel-server-rhscl-7-rpms
|
||||
RUN yum install -y devtoolset-7-gcc devtoolset-7-gcc-c++ devtoolset-7-gcc-gfortran devtoolset-7-binutils
|
||||
ENV PATH=/opt/rh/devtoolset-7/root/usr/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-7/root/usr/lib64:/opt/rh/devtoolset-7/root/usr/lib:$LD_LIBRARY_PATH
|
||||
|
||||
# cmake
|
||||
RUN yum install -y cmake3 && \
|
||||
ln -s /usr/bin/cmake3 /usr/bin/cmake
|
||||
FROM base as openssl
|
||||
# Install openssl (this must precede `build python` step)
|
||||
# (In order to have a proper SSL module, Python is compiled
|
||||
# against a recent openssl [see env vars above], which is linked
|
||||
# statically. We delete openssl afterwards.)
|
||||
ADD ./common/install_openssl.sh install_openssl.sh
|
||||
RUN bash ./install_openssl.sh && rm install_openssl.sh
|
||||
|
||||
|
||||
|
||||
# remove unncessary python versions
|
||||
RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2
|
||||
RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4
|
||||
RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
|
||||
RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
|
||||
|
||||
FROM base as cuda
|
||||
ARG BASE_CUDA_VERSION=10.2
|
||||
# Install CUDA
|
||||
ADD ./common/install_cuda.sh install_cuda.sh
|
||||
RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh
|
||||
|
||||
FROM base as intel
|
||||
# MKL
|
||||
ADD ./common/install_mkl.sh install_mkl.sh
|
||||
RUN bash ./install_mkl.sh && rm install_mkl.sh
|
||||
|
||||
FROM base as magma
|
||||
ARG BASE_CUDA_VERSION=10.2
|
||||
# Install magma
|
||||
ADD ./common/install_magma.sh install_magma.sh
|
||||
RUN bash ./install_magma.sh ${BASE_CUDA_VERSION} && rm install_magma.sh
|
||||
|
||||
FROM base as jni
|
||||
# Install java jni header
|
||||
ADD ./common/install_jni.sh install_jni.sh
|
||||
ADD ./java/jni.h jni.h
|
||||
RUN bash ./install_jni.sh && rm install_jni.sh
|
||||
|
||||
FROM base as libpng
|
||||
# Install libpng
|
||||
ADD ./common/install_libpng.sh install_libpng.sh
|
||||
RUN bash ./install_libpng.sh && rm install_libpng.sh
|
||||
|
||||
FROM ${GPU_IMAGE} as common
|
||||
RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
|
||||
RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
|
||||
RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
|
||||
ENV LC_ALL en_US.UTF-8
|
||||
ENV LANG en_US.UTF-8
|
||||
ENV LANGUAGE en_US.UTF-8
|
||||
RUN yum install -y \
|
||||
aclocal \
|
||||
autoconf \
|
||||
automake \
|
||||
bison \
|
||||
bzip2 \
|
||||
curl \
|
||||
diffutils \
|
||||
file \
|
||||
git \
|
||||
make \
|
||||
patch \
|
||||
perl \
|
||||
unzip \
|
||||
util-linux \
|
||||
wget \
|
||||
which \
|
||||
xz \
|
||||
yasm
|
||||
RUN yum install -y \
|
||||
https://repo.ius.io/ius-release-el7.rpm \
|
||||
https://ossci-linux.s3.amazonaws.com/epel-release-7-14.noarch.rpm
|
||||
|
||||
RUN yum swap -y git git236-core
|
||||
# git236+ would refuse to run git commands in repos owned by other users
|
||||
# Which causes version check to fail, as pytorch repo is bind-mounted into the image
|
||||
# Override this behaviour by treating every folder as safe
|
||||
# For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
|
||||
RUN git config --global --add safe.directory "*"
|
||||
|
||||
ENV SSL_CERT_FILE=/opt/_internal/certs.pem
|
||||
# Install LLVM version
|
||||
COPY --from=openssl /opt/openssl /opt/openssl
|
||||
COPY --from=base /opt/python /opt/python
|
||||
COPY --from=base /opt/_internal /opt/_internal
|
||||
COPY --from=base /usr/local/bin/auditwheel /usr/local/bin/auditwheel
|
||||
COPY --from=intel /opt/intel /opt/intel
|
||||
COPY --from=base /usr/local/bin/patchelf /usr/local/bin/patchelf
|
||||
COPY --from=libpng /usr/local/bin/png* /usr/local/bin/
|
||||
COPY --from=libpng /usr/local/bin/libpng* /usr/local/bin/
|
||||
COPY --from=libpng /usr/local/include/png* /usr/local/include/
|
||||
COPY --from=libpng /usr/local/include/libpng* /usr/local/include/
|
||||
COPY --from=libpng /usr/local/lib/libpng* /usr/local/lib/
|
||||
COPY --from=libpng /usr/local/lib/pkgconfig /usr/local/lib/pkgconfig
|
||||
COPY --from=jni /usr/local/include/jni.h /usr/local/include/jni.h
|
||||
|
||||
FROM common as cpu_final
|
||||
ARG BASE_CUDA_VERSION=10.2
|
||||
RUN yum install -y yum-utils centos-release-scl
|
||||
RUN yum-config-manager --enable rhel-server-rhscl-7-rpms
|
||||
RUN yum install -y devtoolset-7-gcc devtoolset-7-gcc-c++ devtoolset-7-gcc-gfortran devtoolset-7-binutils
|
||||
ENV PATH=/opt/rh/devtoolset-7/root/usr/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-7/root/usr/lib64:/opt/rh/devtoolset-7/root/usr/lib:$LD_LIBRARY_PATH
|
||||
|
||||
# cmake
|
||||
RUN yum install -y cmake3 && \
|
||||
ln -s /usr/bin/cmake3 /usr/bin/cmake
|
||||
|
||||
# ninja
|
||||
RUN yum install -y http://repo.okay.com.mx/centos/7/x86_64/release/okay-release-1-1.noarch.rpm
|
||||
RUN yum install -y ninja-build
|
||||
|
||||
FROM cpu_final as cuda_final
|
||||
RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION}
|
||||
COPY --from=cuda /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION}
|
||||
COPY --from=magma /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION}
|
||||
|
||||
FROM common as rocm_final
|
||||
ARG ROCM_VERSION=3.7
|
||||
# Install ROCm
|
||||
ADD ./common/install_rocm.sh install_rocm.sh
|
||||
RUN bash ./install_rocm.sh ${ROCM_VERSION} && rm install_rocm.sh
|
||||
# cmake is already installed inside the rocm base image, but both 2 and 3 exist
|
||||
# cmake3 is needed for the later MIOpen custom build, so that step is last.
|
||||
RUN yum install -y cmake3 && \
|
||||
rm -f /usr/bin/cmake && \
|
||||
ln -s /usr/bin/cmake3 /usr/bin/cmake
|
||||
ADD ./common/install_miopen.sh install_miopen.sh
|
||||
RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
|
||||
@ -38,12 +38,6 @@ RUN yum install -y \
|
||||
sudo \
|
||||
gcc-toolset-${GCCTOOLSET_VERSION}-toolchain
|
||||
|
||||
# (optional) Install non-default Ninja version
|
||||
ARG NINJA_VERSION
|
||||
COPY ./common/install_ninja.sh install_ninja.sh
|
||||
RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
|
||||
RUN rm install_ninja.sh
|
||||
|
||||
# Ensure the expected devtoolset is used
|
||||
ENV PATH=/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
|
||||
|
||||
@ -42,7 +42,6 @@ RUN yum install -y \
|
||||
llvm-devel \
|
||||
libzstd-devel \
|
||||
python3.12-devel \
|
||||
python3.12-test \
|
||||
python3.12-setuptools \
|
||||
python3.12-pip \
|
||||
python3-virtualenv \
|
||||
@ -102,33 +101,24 @@ CMD ["/bin/bash"]
|
||||
|
||||
# install test dependencies:
|
||||
# - grpcio requires system openssl, bundled crypto fails to build
|
||||
# - ml_dtypes 0.4.0 requires some fixes provided in later commits to build
|
||||
RUN dnf install -y \
|
||||
protobuf-devel \
|
||||
protobuf-c-devel \
|
||||
protobuf-lite-devel \
|
||||
hdf5-devel \
|
||||
python3-h5py \
|
||||
git
|
||||
wget \
|
||||
patch
|
||||
|
||||
RUN env GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=True pip3 install grpcio
|
||||
|
||||
# cmake-3.28.0 from pip for onnxruntime
|
||||
RUN python3 -mpip install cmake==3.28.0
|
||||
|
||||
# build onnxruntime 1.21.0 from sources.
|
||||
# it is not possible to build it from sources using pip,
|
||||
# so just build it from upstream repository.
|
||||
# h5py is dependency of onnxruntime_training.
|
||||
# h5py==3.11.0 builds with hdf5-devel 1.10.5 from repository.
|
||||
# install newest flatbuffers version first:
|
||||
# for some reason old version is getting pulled in otherwise.
|
||||
# packaging package is required for onnxruntime wheel build.
|
||||
RUN pip3 install flatbuffers && \
|
||||
pip3 install h5py==3.11.0 && \
|
||||
pip3 install packaging && \
|
||||
git clone https://github.com/microsoft/onnxruntime && \
|
||||
cd onnxruntime && git checkout v1.21.0 && \
|
||||
RUN env GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=True pip3 install grpcio==1.65.4
|
||||
RUN cd ~ && \
|
||||
git clone https://github.com/jax-ml/ml_dtypes && \
|
||||
cd ml_dtypes && \
|
||||
git checkout v0.4.0 && \
|
||||
git submodule update --init --recursive && \
|
||||
./build.sh --config Release --parallel 0 --enable_pybind --build_wheel --enable_training --enable_training_apis --enable_training_ops --skip_tests --allow_running_as_root && \
|
||||
pip3 install ./build/Linux/Release/dist/onnxruntime_training-*.whl && \
|
||||
cd .. && /bin/rm -rf ./onnxruntime
|
||||
wget https://github.com/jax-ml/ml_dtypes/commit/b969f76914d6b30676721bc92bf0f6021a0d1321.patch && \
|
||||
wget https://github.com/jax-ml/ml_dtypes/commit/d4e6d035ecda073eab8bcf60f4eef572ee7087e6.patch && \
|
||||
patch -p1 < b969f76914d6b30676721bc92bf0f6021a0d1321.patch && \
|
||||
patch -p1 < d4e6d035ecda073eab8bcf60f4eef572ee7087e6.patch && \
|
||||
python3 setup.py bdist_wheel && \
|
||||
pip3 install dist/*.whl && \
|
||||
rm -rf ml_dtypes
|
||||
|
||||
@ -48,7 +48,7 @@ case ${GPU_ARCH_TYPE} in
|
||||
TARGET=final
|
||||
DOCKER_TAG=cpu-aarch64
|
||||
GPU_IMAGE=arm64v8/almalinux:8
|
||||
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11 --build-arg NINJA_VERSION=1.12.1"
|
||||
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11"
|
||||
MANY_LINUX_VERSION="2_28_aarch64"
|
||||
;;
|
||||
cpu-cxx11-abi)
|
||||
@ -97,7 +97,7 @@ case ${GPU_ARCH_TYPE} in
|
||||
DEVTOOLSET_VERSION="11"
|
||||
GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete
|
||||
fi
|
||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101"
|
||||
DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}"
|
||||
;;
|
||||
xpu)
|
||||
@ -121,8 +121,7 @@ fi
|
||||
(
|
||||
set -x
|
||||
|
||||
# Only activate this if in CI
|
||||
if [ "$(uname -m)" != "s390x" ] && [ -v CI ]; then
|
||||
if [ "$(uname -m)" != "s390x" ]; then
|
||||
# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
|
||||
# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
|
||||
sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
|
||||
@ -140,7 +139,7 @@ fi
|
||||
"${TOPDIR}/.ci/docker/"
|
||||
)
|
||||
|
||||
GITHUB_REF=${GITHUB_REF:-"dev")}
|
||||
GITHUB_REF=${GITHUB_REF:-$(git symbolic-ref -q HEAD || git describe --tags --exact-match)}
|
||||
GIT_BRANCH_NAME=${GITHUB_REF##*/}
|
||||
GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)}
|
||||
DOCKER_IMAGE_BRANCH_TAG=${DOCKER_IMAGE}-${GIT_BRANCH_NAME}
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
# Script used only in CD pipeline
|
||||
|
||||
OPENSSL_DOWNLOAD_URL=https://www.openssl.org/source/old/1.1.1/
|
||||
CURL_DOWNLOAD_URL=https://curl.se/download
|
||||
CURL_DOWNLOAD_URL=https://curl.askapache.com/download
|
||||
|
||||
AUTOCONF_DOWNLOAD_URL=https://ftp.gnu.org/gnu/autoconf
|
||||
|
||||
|
||||
@ -41,14 +41,11 @@ fbscribelogger==0.1.7
|
||||
#Pinned versions: 0.1.6
|
||||
#test that import:
|
||||
|
||||
flatbuffers==2.0 ; platform_machine != "s390x"
|
||||
flatbuffers==2.0
|
||||
#Description: cross platform serialization library
|
||||
#Pinned versions: 2.0
|
||||
#test that import:
|
||||
|
||||
flatbuffers ; platform_machine == "s390x"
|
||||
#Description: cross platform serialization library; Newer version is required on s390x for new python version
|
||||
|
||||
hypothesis==5.35.1
|
||||
# Pin hypothesis to avoid flakiness: https://github.com/pytorch/pytorch/issues/31136
|
||||
#Description: advanced library for generating parametrized tests
|
||||
@ -93,10 +90,10 @@ librosa>=0.6.2 ; python_version < "3.11"
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
mypy==1.14.0
|
||||
mypy==1.13.0
|
||||
# Pin MyPy version because new errors are likely to appear with each release
|
||||
#Description: linter
|
||||
#Pinned versions: 1.14.0
|
||||
#Pinned versions: 1.10.0
|
||||
#test that import: test_typing.py, test_type_hints.py
|
||||
|
||||
networkx==2.8.8
|
||||
@ -105,10 +102,10 @@ networkx==2.8.8
|
||||
#Pinned versions: 2.8.8
|
||||
#test that import: functorch
|
||||
|
||||
ninja==1.11.1.3
|
||||
#Description: build system. Used in some tests. Used in build to generate build
|
||||
#time tracing information
|
||||
#Pinned versions: 1.11.1.3
|
||||
#ninja
|
||||
#Description: build system. Note that it install from
|
||||
#here breaks things so it is commented out
|
||||
#Pinned versions: 1.10.0.post1
|
||||
#test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py
|
||||
|
||||
numba==0.49.0 ; python_version < "3.9"
|
||||
@ -297,7 +294,7 @@ ghstack==0.8.0
|
||||
#Pinned versions: 0.8.0
|
||||
#test that import:
|
||||
|
||||
jinja2==3.1.6
|
||||
jinja2==3.1.5
|
||||
#Description: jinja2 template engine
|
||||
#Pinned versions: 3.1.4
|
||||
#test that import:
|
||||
@ -342,7 +339,7 @@ onnx==1.17.0
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
onnxscript==0.2.2
|
||||
onnxscript==0.1.0
|
||||
#Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
@ -368,6 +365,7 @@ PyYAML
|
||||
pyzstd
|
||||
setuptools
|
||||
|
||||
ninja==1.11.1 ; platform_machine == "aarch64"
|
||||
scons==4.5.2 ; platform_machine == "aarch64"
|
||||
|
||||
pulp==2.9.0 ; python_version >= "3.8"
|
||||
|
||||
@ -1 +1 @@
|
||||
3.3.0
|
||||
3.2.0
|
||||
|
||||
@ -50,6 +50,13 @@ RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
|
||||
RUN rm install_protobuf.sh
|
||||
ENV INSTALLED_PROTOBUF ${PROTOBUF}
|
||||
|
||||
# (optional) Install database packages like LMDB and LevelDB
|
||||
ARG DB
|
||||
COPY ./common/install_db.sh install_db.sh
|
||||
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
|
||||
RUN rm install_db.sh
|
||||
ENV INSTALLED_DB ${DB}
|
||||
|
||||
# (optional) Install vision packages like OpenCV
|
||||
ARG VISION
|
||||
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
|
||||
|
||||
@ -50,6 +50,13 @@ RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
|
||||
RUN rm install_protobuf.sh
|
||||
ENV INSTALLED_PROTOBUF ${PROTOBUF}
|
||||
|
||||
# (optional) Install database packages like LMDB and LevelDB
|
||||
ARG DB
|
||||
COPY ./common/install_db.sh install_db.sh
|
||||
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
|
||||
RUN rm install_db.sh
|
||||
ENV INSTALLED_DB ${DB}
|
||||
|
||||
# (optional) Install vision packages like OpenCV
|
||||
ARG VISION
|
||||
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
|
||||
|
||||
@ -77,6 +77,13 @@ COPY triton_version.txt triton_version.txt
|
||||
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
|
||||
RUN rm install_triton.sh common_utils.sh triton-xpu.txt triton_version.txt
|
||||
|
||||
# (optional) Install database packages like LMDB and LevelDB
|
||||
ARG DB
|
||||
COPY ./common/install_db.sh install_db.sh
|
||||
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
|
||||
RUN rm install_db.sh
|
||||
ENV INSTALLED_DB ${DB}
|
||||
|
||||
# (optional) Install vision packages like OpenCV
|
||||
ARG VISION
|
||||
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
|
||||
|
||||
@ -74,6 +74,13 @@ RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
|
||||
RUN rm install_protobuf.sh
|
||||
ENV INSTALLED_PROTOBUF ${PROTOBUF}
|
||||
|
||||
# (optional) Install database packages like LMDB and LevelDB
|
||||
ARG DB
|
||||
COPY ./common/install_db.sh install_db.sh
|
||||
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
|
||||
RUN rm install_db.sh
|
||||
ENV INSTALLED_DB ${DB}
|
||||
|
||||
# (optional) Install vision packages like OpenCV
|
||||
ARG VISION
|
||||
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
|
||||
@ -81,6 +88,18 @@ RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
|
||||
RUN rm install_vision.sh cache_vision_models.sh common_utils.sh
|
||||
ENV INSTALLED_VISION ${VISION}
|
||||
|
||||
# (optional) Install Vulkan SDK
|
||||
ARG VULKAN_SDK_VERSION
|
||||
COPY ./common/install_vulkan_sdk.sh install_vulkan_sdk.sh
|
||||
RUN if [ -n "${VULKAN_SDK_VERSION}" ]; then bash ./install_vulkan_sdk.sh; fi
|
||||
RUN rm install_vulkan_sdk.sh
|
||||
|
||||
# (optional) Install swiftshader
|
||||
ARG SWIFTSHADER
|
||||
COPY ./common/install_swiftshader.sh install_swiftshader.sh
|
||||
RUN if [ -n "${SWIFTSHADER}" ]; then bash ./install_swiftshader.sh; fi
|
||||
RUN rm install_swiftshader.sh
|
||||
|
||||
# (optional) Install non-default CMake version
|
||||
ARG CMAKE_VERSION
|
||||
COPY ./common/install_cmake.sh install_cmake.sh
|
||||
|
||||
@ -12,7 +12,7 @@ DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
|
||||
-e PACKAGE_NAME=${PACKAGE_NAME}${DESIRED_CUDA_SHORT} \
|
||||
-e DESIRED_CUDA=${DESIRED_CUDA} \
|
||||
-e CUDA_ARCH_LIST="${CUDA_ARCH_LIST}" \
|
||||
"pytorch/manylinux2_28-builder:cuda${DESIRED_CUDA}-main" \
|
||||
"pytorch/manylinux-builder:cuda${DESIRED_CUDA}-main" \
|
||||
magma/build_magma.sh
|
||||
|
||||
.PHONY: all
|
||||
|
||||
@ -111,6 +111,12 @@ case ${DESIRED_PYTHON} in
|
||||
;;
|
||||
esac
|
||||
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
export _GLIBCXX_USE_CXX11_ABI=1
|
||||
else
|
||||
export _GLIBCXX_USE_CXX11_ABI=0
|
||||
fi
|
||||
|
||||
if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
|
||||
echo "Calling build_amd.py at $(date)"
|
||||
python tools/amd_build/build_amd.py
|
||||
@ -203,6 +209,12 @@ if [[ -n "$BUILD_PYTHONLESS" ]]; then
|
||||
|
||||
mkdir -p /tmp/$LIBTORCH_HOUSE_DIR
|
||||
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
LIBTORCH_ABI="cxx11-abi-"
|
||||
else
|
||||
LIBTORCH_ABI=
|
||||
fi
|
||||
|
||||
zip -rq /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip libtorch
|
||||
cp /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip \
|
||||
/tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-latest.zip
|
||||
|
||||
@ -54,11 +54,11 @@ cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.')
|
||||
TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6"
|
||||
case ${CUDA_VERSION} in
|
||||
12.8)
|
||||
TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;9.0;10.0;12.0+PTX" #removing sm_50-sm_70 as these architectures are deprecated in CUDA 12.8 and will be removed in future releases
|
||||
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0;10.0;12.0+PTX" #Ripping out 5.0 and 6.0 due to ld error
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
|
||||
;;
|
||||
12.6)
|
||||
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0"
|
||||
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0+PTX"
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
|
||||
;;
|
||||
12.4)
|
||||
|
||||
@ -95,6 +95,12 @@ python setup.py clean
|
||||
retry pip install -qr requirements.txt
|
||||
retry pip install -q numpy==2.0.1
|
||||
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
export _GLIBCXX_USE_CXX11_ABI=1
|
||||
else
|
||||
export _GLIBCXX_USE_CXX11_ABI=0
|
||||
fi
|
||||
|
||||
if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
|
||||
echo "Calling build_amd.py at $(date)"
|
||||
python tools/amd_build/build_amd.py
|
||||
@ -163,6 +169,12 @@ fi
|
||||
|
||||
)
|
||||
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
LIBTORCH_ABI="cxx11-abi-"
|
||||
else
|
||||
LIBTORCH_ABI=
|
||||
fi
|
||||
|
||||
(
|
||||
set -x
|
||||
|
||||
|
||||
@ -173,7 +173,6 @@ if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
|
||||
source /opt/intel/oneapi/compiler/latest/env/vars.sh
|
||||
# XPU kineto feature dependencies are not fully ready, disable kineto build as temp WA
|
||||
export USE_KINETO=0
|
||||
export TORCH_XPU_ARCH_LIST=pvc
|
||||
fi
|
||||
|
||||
# sccache will fail for CUDA builds if all cores are used for compiling
|
||||
@ -192,7 +191,7 @@ fi
|
||||
|
||||
# We only build FlashAttention files for CUDA 8.0+, and they require large amounts of
|
||||
# memory to build and will OOM
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]] && [[ 1 -eq $(echo "${TORCH_CUDA_ARCH_LIST} >= 8.0" | bc) ]] && [ -z "$MAX_JOBS_OVERRIDE" ]; then
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]] && [[ 1 -eq $(echo "${TORCH_CUDA_ARCH_LIST} >= 8.0" | bc) ]]; then
|
||||
echo "WARNING: FlashAttention files require large amounts of memory to build and will OOM"
|
||||
echo "Setting MAX_JOBS=(nproc-2)/3 to reduce memory usage"
|
||||
export MAX_JOBS="$(( $(nproc --ignore=2) / 3 ))"
|
||||
@ -378,10 +377,8 @@ else
|
||||
# This is an attempt to mitigate flaky libtorch build OOM error. By default, the build parallelization
|
||||
# is set to be the number of CPU minus 2. So, let's try a more conservative value here. A 4xlarge has
|
||||
# 16 CPUs
|
||||
if [ -z "$MAX_JOBS_OVERRIDE" ]; then
|
||||
MAX_JOBS=$(nproc --ignore=4)
|
||||
export MAX_JOBS
|
||||
fi
|
||||
MAX_JOBS=$(nproc --ignore=4)
|
||||
export MAX_JOBS
|
||||
|
||||
# NB: Install outside of source directory (at the same level as the root
|
||||
# pytorch folder) so that it doesn't get cleaned away prior to docker push.
|
||||
|
||||
@ -73,14 +73,26 @@ fi
|
||||
# Check GCC ABI
|
||||
###############################################################################
|
||||
|
||||
# NOTE: As of https://github.com/pytorch/pytorch/issues/126551 we only produce
|
||||
# wheels with cxx11-abi
|
||||
# NOTE [ Building libtorch with old vs. new gcc ABI ]
|
||||
#
|
||||
# Packages built with one version of ABI could not be linked against by client
|
||||
# C++ libraries that were compiled using the other version of ABI. Since both
|
||||
# gcc ABIs are still common in the wild, we need to support both ABIs. Currently:
|
||||
#
|
||||
# - All the nightlies built on CentOS 7 + devtoolset7 use the old gcc ABI.
|
||||
# - All the nightlies built on Ubuntu 16.04 + gcc 5.4 use the new gcc ABI.
|
||||
|
||||
echo "Checking that the gcc ABI is what we expect"
|
||||
if [[ "$(uname)" != 'Darwin' ]]; then
|
||||
function is_expected() {
|
||||
if [[ "$1" -gt 0 || "$1" == "ON " ]]; then
|
||||
echo 1
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* || "$DESIRED_CUDA" == *"rocm"* ]]; then
|
||||
if [[ "$1" -gt 0 || "$1" == "ON " ]]; then
|
||||
echo 1
|
||||
fi
|
||||
else
|
||||
if [[ -z "$1" || "$1" == 0 || "$1" == "OFF" ]]; then
|
||||
echo 1
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
@ -196,11 +208,35 @@ setup_link_flags () {
|
||||
|
||||
TEST_CODE_DIR="$(dirname $(realpath ${BASH_SOURCE[0]}))/test_example_code"
|
||||
build_and_run_example_cpp () {
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
GLIBCXX_USE_CXX11_ABI=1
|
||||
else
|
||||
GLIBCXX_USE_CXX11_ABI=0
|
||||
fi
|
||||
setup_link_flags
|
||||
g++ ${TEST_CODE_DIR}/$1.cpp -I${install_root}/include -I${install_root}/include/torch/csrc/api/include -std=gnu++17 -L${install_root}/lib ${REF_LIB} ${ADDITIONAL_LINKER_FLAGS} -ltorch $TORCH_CPU_LINK_FLAGS $TORCH_CUDA_LINK_FLAGS $C10_LINK_FLAGS -o $1
|
||||
g++ ${TEST_CODE_DIR}/$1.cpp -I${install_root}/include -I${install_root}/include/torch/csrc/api/include -D_GLIBCXX_USE_CXX11_ABI=$GLIBCXX_USE_CXX11_ABI -std=gnu++17 -L${install_root}/lib ${REF_LIB} ${ADDITIONAL_LINKER_FLAGS} -ltorch $TORCH_CPU_LINK_FLAGS $TORCH_CUDA_LINK_FLAGS $C10_LINK_FLAGS -o $1
|
||||
./$1
|
||||
}
|
||||
|
||||
build_example_cpp_with_incorrect_abi () {
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
GLIBCXX_USE_CXX11_ABI=0
|
||||
else
|
||||
GLIBCXX_USE_CXX11_ABI=1
|
||||
fi
|
||||
set +e
|
||||
setup_link_flags
|
||||
g++ ${TEST_CODE_DIR}/$1.cpp -I${install_root}/include -I${install_root}/include/torch/csrc/api/include -D_GLIBCXX_USE_CXX11_ABI=$GLIBCXX_USE_CXX11_ABI -std=gnu++17 -L${install_root}/lib ${REF_LIB} ${ADDITIONAL_LINKER_FLAGS} -ltorch $TORCH_CPU_LINK_FLAGS $TORCH_CUDA_LINK_FLAGS $C10_LINK_FLAGS -o $1
|
||||
ERRCODE=$?
|
||||
set -e
|
||||
if [ "$ERRCODE" -eq "0" ]; then
|
||||
echo "Building example with incorrect ABI didn't throw error. Aborting."
|
||||
exit 1
|
||||
else
|
||||
echo "Building example with incorrect ABI throws expected error. Proceeding."
|
||||
fi
|
||||
}
|
||||
|
||||
###############################################################################
|
||||
# Check simple Python/C++ calls
|
||||
###############################################################################
|
||||
@ -210,6 +246,11 @@ if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
|
||||
export LD_LIBRARY_PATH=/usr/local/cuda/lib64
|
||||
fi
|
||||
build_and_run_example_cpp simple-torch-test
|
||||
# `_GLIBCXX_USE_CXX11_ABI` is always ignored by gcc in devtoolset7, so we test
|
||||
# the expected failure case for Ubuntu 16.04 + gcc 5.4 only.
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
build_example_cpp_with_incorrect_abi simple-torch-test
|
||||
fi
|
||||
else
|
||||
pushd /tmp
|
||||
python -c 'import torch'
|
||||
|
||||
@ -121,9 +121,9 @@ def main() -> None:
|
||||
else:
|
||||
install_root = Path(distutils.sysconfig.get_python_lib()) / "torch"
|
||||
|
||||
libtorch_cpu_path = str(install_root / "lib" / "libtorch_cpu.so")
|
||||
# NOTE: All binaries are built with cxx11abi now
|
||||
check_lib_symbols_for_abi_correctness(libtorch_cpu_path, False)
|
||||
libtorch_cpu_path = install_root / "lib" / "libtorch_cpu.so"
|
||||
pre_cxx11_abi = "cxx11-abi" not in os.getenv("DESIRED_DEVTOOLSET", "")
|
||||
check_lib_symbols_for_abi_correctness(libtorch_cpu_path, pre_cxx11_abi)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@ -46,9 +46,7 @@ def train(args, model, device, train_loader, optimizer, epoch):
|
||||
optimizer.step()
|
||||
if batch_idx % args.log_interval == 0:
|
||||
print(
|
||||
f"Train Epoch: {epoch} "
|
||||
f"[{batch_idx * len(data)}/{len(train_loader.dataset)} "
|
||||
f"({100.0 * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}"
|
||||
f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}" # noqa: B950
|
||||
)
|
||||
if args.dry_run:
|
||||
break
|
||||
@ -73,9 +71,7 @@ def test(model, device, test_loader):
|
||||
test_loss /= len(test_loader.dataset)
|
||||
|
||||
print(
|
||||
f"\nTest set: Average loss: {test_loss:.4f}, "
|
||||
f"Accuracy: {correct}/{len(test_loader.dataset)} "
|
||||
f"({100.0 * correct / len(test_loader.dataset):.0f}%)\n"
|
||||
f"\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)\n" # noqa: B950
|
||||
)
|
||||
|
||||
|
||||
|
||||
@ -76,13 +76,10 @@ def read_release_matrix():
|
||||
|
||||
|
||||
def test_numpy():
|
||||
try:
|
||||
import numpy as np
|
||||
import numpy as np
|
||||
|
||||
x = np.arange(5)
|
||||
torch.tensor(x)
|
||||
except ImportError:
|
||||
print("Numpy check skipped. Numpy is not installed.")
|
||||
x = np.arange(5)
|
||||
torch.tensor(x)
|
||||
|
||||
|
||||
def check_version(package: str) -> None:
|
||||
@ -169,10 +166,6 @@ def test_cuda_gds_errors_captured() -> None:
|
||||
major_version = int(torch.version.cuda.split(".")[0])
|
||||
minor_version = int(torch.version.cuda.split(".")[1])
|
||||
|
||||
if target_os == "windows":
|
||||
print(f"{target_os} is not supported for GDS smoke test")
|
||||
return
|
||||
|
||||
if major_version < 12 or (major_version == 12 and minor_version < 6):
|
||||
print("CUDA version is not supported for GDS smoke test")
|
||||
return
|
||||
@ -413,7 +406,6 @@ def main() -> None:
|
||||
smoke_test_conv2d()
|
||||
test_linalg()
|
||||
test_numpy()
|
||||
|
||||
if is_cuda_system:
|
||||
test_linalg("cuda")
|
||||
test_cuda_gds_errors_captured()
|
||||
|
||||
@ -314,13 +314,6 @@ test_python() {
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
test_lazy_tensor_meta_reference_disabled() {
|
||||
export TORCH_DISABLE_FUNCTIONALIZATION_META_REFERENCE=1
|
||||
echo "Testing lazy tensor operations without meta reference"
|
||||
time python test/run_test.py --include lazy/test_ts_opinfo.py --verbose
|
||||
export -n TORCH_DISABLE_FUNCTIONALIZATION_META_REFERENCE
|
||||
}
|
||||
|
||||
|
||||
test_dynamo_wrapped_shard() {
|
||||
if [[ -z "$NUM_TEST_SHARDS" ]]; then
|
||||
@ -483,8 +476,6 @@ elif [[ "${TEST_CONFIG}" == *aot_eager* ]]; then
|
||||
DYNAMO_BENCHMARK_FLAGS+=(--backend aot_eager)
|
||||
elif [[ "${TEST_CONFIG}" == *aot_inductor* ]]; then
|
||||
DYNAMO_BENCHMARK_FLAGS+=(--export-aot-inductor)
|
||||
elif [[ "${TEST_CONFIG}" == *max_autotune_inductor* ]]; then
|
||||
DYNAMO_BENCHMARK_FLAGS+=(--inductor --inductor-compile-mode max-autotune)
|
||||
elif [[ "${TEST_CONFIG}" == *inductor* && "${TEST_CONFIG}" != *perf* ]]; then
|
||||
DYNAMO_BENCHMARK_FLAGS+=(--inductor)
|
||||
fi
|
||||
@ -499,59 +490,6 @@ else
|
||||
DYNAMO_BENCHMARK_FLAGS+=(--device cuda)
|
||||
fi
|
||||
|
||||
test_cachebench() {
|
||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
|
||||
local BENCHMARK
|
||||
if [[ "${SHARD_NUMBER}" == 1 ]]; then
|
||||
local BENCHMARK=torchbench
|
||||
elif [[ "${SHARD_NUMBER}" == 2 ]]; then
|
||||
local BENCHMARK=huggingface
|
||||
else
|
||||
echo "invalid SHARD_NUMBER: ${SHARD_NUMBER}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local mode_options=("training" "inference")
|
||||
|
||||
for mode in "${mode_options[@]}"; do
|
||||
$TASKSET python "benchmarks/dynamo/cachebench.py" \
|
||||
--mode "$mode" \
|
||||
--device cuda \
|
||||
--benchmark "$BENCHMARK" \
|
||||
--repeat 3 \
|
||||
--output "$TEST_REPORTS_DIR/cachebench_${BENCHMARK}_${mode}.json"
|
||||
|
||||
$TASKSET python "benchmarks/dynamo/cachebench.py" \
|
||||
--mode "$mode" \
|
||||
--dynamic \
|
||||
--device cuda \
|
||||
--benchmark "$BENCHMARK" \
|
||||
--repeat 3 \
|
||||
--output "$TEST_REPORTS_DIR/cachebench_${BENCHMARK}_${mode}_dynamic.json"
|
||||
done
|
||||
}
|
||||
|
||||
test_verify_cachebench() {
|
||||
TMP_TEST_REPORTS_DIR=$(mktemp -d)
|
||||
TEST_OUTPUT="$TMP_TEST_REPORTS_DIR/test.json"
|
||||
|
||||
$TASKSET python "benchmarks/dynamo/cachebench.py" \
|
||||
--mode training \
|
||||
--device cpu \
|
||||
--model nanogpt \
|
||||
--benchmark torchbench \
|
||||
--output "$TEST_OUTPUT"
|
||||
|
||||
# -s checks file exists and is non empty
|
||||
if [[ ! -s "$TEST_OUTPUT" ]]; then
|
||||
echo "Cachebench failed to produce an output."
|
||||
echo "Run 'python benchmarks/dynamo/cachebench.py' to make sure it works"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
test_perf_for_dashboard() {
|
||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
@ -580,8 +518,6 @@ test_perf_for_dashboard() {
|
||||
test_inductor_set_cpu_affinity
|
||||
elif [[ "${TEST_CONFIG}" == *cuda_a10g* ]]; then
|
||||
device=cuda_a10g
|
||||
elif [[ "${TEST_CONFIG}" == *h100* ]]; then
|
||||
device=cuda_h100
|
||||
elif [[ "${TEST_CONFIG}" == *rocm* ]]; then
|
||||
device=rocm
|
||||
fi
|
||||
@ -762,8 +698,6 @@ test_dynamo_benchmark() {
|
||||
fi
|
||||
elif [[ "${TEST_CONFIG}" == *aot_inductor* ]]; then
|
||||
test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --bfloat16 "$@"
|
||||
elif [[ "${TEST_CONFIG}" == *max_autotune_inductor* ]]; then
|
||||
test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --bfloat16 "$@"
|
||||
else
|
||||
test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --bfloat16 "$@"
|
||||
test_single_dynamo_benchmark "training" "$suite" "$shard_id" --training --amp "$@"
|
||||
@ -1481,7 +1415,7 @@ test_executorch() {
|
||||
bash examples/models/llama3_2_vision/install_requirements.sh
|
||||
# NB: We need to rebuild ExecuTorch runner here because it depends on PyTorch
|
||||
# from the PR
|
||||
bash .ci/scripts/setup-linux.sh --build-tool cmake
|
||||
bash .ci/scripts/setup-linux.sh cmake
|
||||
|
||||
echo "Run ExecuTorch unit tests"
|
||||
pytest -v -n auto
|
||||
@ -1505,7 +1439,7 @@ test_executorch() {
|
||||
test_linux_aarch64() {
|
||||
python test/run_test.py --include test_modules test_mkldnn test_mkldnn_fusion test_openmp test_torch test_dynamic_shapes \
|
||||
test_transformers test_multiprocessing test_numpy_interop test_autograd test_binary_ufuncs test_complex test_spectral_ops \
|
||||
test_foreach test_reductions test_unary_ufuncs test_tensor_creation_ops test_ops \
|
||||
test_foreach test_reductions test_unary_ufuncs test_tensor_creation_ops \
|
||||
--shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
|
||||
|
||||
# Dynamo tests
|
||||
@ -1573,16 +1507,6 @@ elif [[ "${TEST_CONFIG}" == *timm* ]]; then
|
||||
install_torchvision
|
||||
id=$((SHARD_NUMBER-1))
|
||||
test_dynamo_benchmark timm_models "$id"
|
||||
elif [[ "${TEST_CONFIG}" == cachebench ]]; then
|
||||
install_torchaudio cuda
|
||||
install_torchvision
|
||||
checkout_install_torchbench nanogpt BERT_pytorch resnet50 hf_T5 llama moco
|
||||
PYTHONPATH=$(pwd)/torchbench test_cachebench
|
||||
elif [[ "${TEST_CONFIG}" == verify_cachebench ]]; then
|
||||
install_torchaudio cpu
|
||||
install_torchvision
|
||||
checkout_install_torchbench nanogpt
|
||||
PYTHONPATH=$(pwd)/torchbench test_verify_cachebench
|
||||
elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
|
||||
if [[ "${TEST_CONFIG}" == *cpu* ]]; then
|
||||
install_torchaudio cpu
|
||||
@ -1619,7 +1543,6 @@ elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper* ]]; then
|
||||
install_torchvision
|
||||
checkout_install_torchbench hf_T5 llama moco
|
||||
PYTHONPATH=$(pwd)/torchbench test_inductor_cpp_wrapper_shard "$SHARD_NUMBER"
|
||||
test_inductor_aoti
|
||||
elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
|
||||
install_torchvision
|
||||
test_inductor_shard "${SHARD_NUMBER}"
|
||||
@ -1639,7 +1562,6 @@ elif [[ "${BUILD_ENVIRONMENT}" == *rocm* && -n "$TESTS_TO_INCLUDE" ]]; then
|
||||
test_python_shard "$SHARD_NUMBER"
|
||||
test_aten
|
||||
elif [[ "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
|
||||
test_lazy_tensor_meta_reference_disabled
|
||||
test_without_numpy
|
||||
install_torchvision
|
||||
test_python_shard 1
|
||||
|
||||
@ -17,24 +17,32 @@ curl -L -o "%INSTALLER_FILE%" %DOWNLOAD_URL%
|
||||
:: Install the Visual Studio Build Tools with C++ components
|
||||
echo Installing Visual Studio Build Tools with C++ components...
|
||||
echo Installing MSVC %MSVC_VERSION%
|
||||
"%INSTALLER_FILE%" --norestart --quiet --wait --installPath "%DEPENDENCIES_DIR%\VSBuildTools" ^
|
||||
--add Microsoft.VisualStudio.Workload.VCTools ^
|
||||
--add Microsoft.VisualStudio.Component.Windows10SDK ^
|
||||
--add Microsoft.VisualStudio.Component.Windows11SDK.22621 ^
|
||||
--add Microsoft.VisualStudio.Component.VC.ASAN ^
|
||||
--add Microsoft.VisualStudio.Component.VC.CMake.Project ^
|
||||
--add Microsoft.VisualStudio.Component.VC.CoreBuildTools ^
|
||||
--add Microsoft.VisualStudio.Component.VC.CoreIde ^
|
||||
--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest ^
|
||||
--add Microsoft.VisualStudio.Component.VC.Tools.ARM64EC ^
|
||||
--add Microsoft.VisualStudio.Component.VC.Tools.ARM64 ^
|
||||
--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64
|
||||
|
||||
echo exitcode = %errorlevel%
|
||||
if "%MSVC_VERSION%" == "latest" (
|
||||
"%INSTALLER_FILE%" --norestart --nocache --quiet --wait --installPath "%DEPENDENCIES_DIR%\VSBuildTools" ^
|
||||
--add Microsoft.VisualStudio.Component.Windows11SDK.22621 ^
|
||||
--add Microsoft.VisualStudio.Component.VC.ASAN ^
|
||||
--add Microsoft.VisualStudio.Component.VC.CMake.Project ^
|
||||
--add Microsoft.VisualStudio.Component.VC.Tools.ARM64 ^
|
||||
--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64
|
||||
) else if "%MSVC_VERSION%" == "14.40" (
|
||||
"%INSTALLER_FILE%" --norestart --nocache --quiet --wait --installPath "%DEPENDENCIES_DIR%\VSBuildTools" ^
|
||||
--add Microsoft.VisualStudio.Component.Windows11SDK.22621 ^
|
||||
--add Microsoft.VisualStudio.Component.VC.ASAN ^
|
||||
--add Microsoft.VisualStudio.Component.VC.CMake.Project ^
|
||||
--add Microsoft.VisualStudio.Component.VC.14.40.17.10.ARM64 ^
|
||||
--add Microsoft.VisualStudio.Component.VC.14.40.17.10.x86.x64
|
||||
) else if "%MSVC_VERSION%" == "14.36" (
|
||||
"%INSTALLER_FILE%" --norestart --nocache --quiet --wait --installPath "%DEPENDENCIES_DIR%\VSBuildTools" ^
|
||||
--add Microsoft.VisualStudio.Component.Windows11SDK.22621 ^
|
||||
--add Microsoft.VisualStudio.Component.VC.ASAN ^
|
||||
--add Microsoft.VisualStudio.Component.VC.CMake.Project ^
|
||||
--add Microsoft.VisualStudio.Component.VC.14.36.17.6.ARM64 ^
|
||||
--add Microsoft.VisualStudio.Component.VC.14.36.17.6.x86.x64
|
||||
)
|
||||
|
||||
:: Check if installation was successful
|
||||
if %errorlevel% neq 0 (
|
||||
echo Failed to install Visual Studio Build Tools with C++ components.
|
||||
echo "Failed to install Visual Studio Build Tools with C++ components. (exitcode = %errorlevel%)"
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
|
||||
@ -6,25 +6,22 @@ echo Dependency Python installation started.
|
||||
if not exist "%DOWNLOADS_DIR%" mkdir %DOWNLOADS_DIR%
|
||||
if not exist "%DEPENDENCIES_DIR%" mkdir %DEPENDENCIES_DIR%
|
||||
|
||||
if "%DESIRED_PYTHON%" == "3.13" (
|
||||
echo Python version is set to 3.13
|
||||
set DOWNLOAD_URL=https://www.python.org/ftp/python/3.13.2/python-3.13.2-arm64.exe
|
||||
) else if "%DESIRED_PYTHON%" == "3.12" (
|
||||
echo Python version is set to 3.12
|
||||
set DOWNLOAD_URL=https://www.python.org/ftp/python/3.12.7/python-3.12.7-arm64.exe
|
||||
) else if "%DESIRED_PYTHON%" == "3.11" (
|
||||
echo Python version is set to 3.11
|
||||
set DOWNLOAD_URL=https://www.python.org/ftp/python/3.11.9/python-3.11.9-arm64.exe
|
||||
if "%PYTHON_VERSION%"=="Python312" (
|
||||
echo Python version is set to Python312
|
||||
set DOWNLOAD_URL="https://www.python.org/ftp/python/3.12.7/python-3.12.7-arm64.exe"
|
||||
) else if "%PYTHON_VERSION%"=="Python311" (
|
||||
echo Python version is set to Python311
|
||||
set DOWNLOAD_URL="https://www.python.org/ftp/python/3.11.9/python-3.11.9-arm64.exe"
|
||||
) else (
|
||||
echo DESIRED_PYTHON not defined, Python version is set to 3.12
|
||||
set DOWNLOAD_URL=https://www.python.org/ftp/python/3.12.7/python-3.12.7-arm64.exe
|
||||
echo PYTHON_VERSION not defined, Python version is set to Python312
|
||||
set DOWNLOAD_URL="https://www.python.org/ftp/python/3.12.7/python-3.12.7-arm64.exe"
|
||||
)
|
||||
|
||||
set INSTALLER_FILE=%DOWNLOADS_DIR%\python-installer.exe
|
||||
|
||||
:: Download installer
|
||||
echo Downloading Python...
|
||||
curl -L -o "%INSTALLER_FILE%" "%DOWNLOAD_URL%"
|
||||
curl -L -o "%INSTALLER_FILE%" %DOWNLOAD_URL%
|
||||
|
||||
:: Install Python
|
||||
echo Installing Python...
|
||||
|
||||
@ -14,7 +14,7 @@ where python
|
||||
:: install dependencies
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
pip install pytest numpy protobuf expecttest hypothesis
|
||||
pip install pytest numpy
|
||||
|
||||
:: find file name for pytorch wheel
|
||||
for /f "delims=" %%f in ('dir /b "%PYTORCH_FINAL_PACKAGE_DIR%" ^| findstr "torch-"') do set "TORCH_WHEEL_FILENAME=%PYTORCH_FINAL_PACKAGE_DIR%\%%f"
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
@echo off
|
||||
setlocal
|
||||
|
||||
set "ORIG_PATH=%PATH%"
|
||||
|
||||
if "%PACKAGE_TYPE%" == "wheel" goto wheel
|
||||
if "%PACKAGE_TYPE%" == "libtorch" goto libtorch
|
||||
|
||||
@ -8,7 +10,21 @@ echo "unknown package type"
|
||||
exit /b 1
|
||||
|
||||
:wheel
|
||||
call %PYTORCH_ROOT%\.ci\pytorch\windows\arm64\bootstrap_tests.bat
|
||||
echo "install wheel package"
|
||||
|
||||
echo Running pip install...
|
||||
pip install -q --pre numpy protobuf
|
||||
echo Error level after pip install: %ERRORLEVEL%
|
||||
if errorlevel 1 exit /b 1
|
||||
|
||||
for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.whl') do pip install "%%i"
|
||||
if errorlevel 1 exit /b 1
|
||||
|
||||
goto smoke_test
|
||||
|
||||
:smoke_test
|
||||
python -c "import torch"
|
||||
if ERRORLEVEL 1 exit /b 1
|
||||
|
||||
echo Running python rnn_smoke.py...
|
||||
python %PYTORCH_ROOT%\.ci\pytorch\test_example_code\rnn_smoke_win_arm64.py
|
||||
@ -23,12 +39,10 @@ goto end
|
||||
:libtorch
|
||||
echo "install and test libtorch"
|
||||
|
||||
if not exist tmp mkdir tmp
|
||||
|
||||
for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *-latest.zip') do C:\Windows\System32\tar.exe -xf "%%i" -C tmp
|
||||
for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *-latest.zip') do tar -xf "%%i" -C tmp
|
||||
if ERRORLEVEL 1 exit /b 1
|
||||
|
||||
pushd tmp
|
||||
pushd tmp\libtorch
|
||||
|
||||
set VC_VERSION_LOWER=14
|
||||
set VC_VERSION_UPPER=36
|
||||
@ -46,4 +60,6 @@ if ERRORLEVEL 1 exit /b 1
|
||||
.\simple-torch-test.exe
|
||||
if ERRORLEVEL 1 exit /b 1
|
||||
|
||||
:end
|
||||
:end
|
||||
set "PATH=%ORIG_PATH%"
|
||||
popd
|
||||
@ -71,20 +71,11 @@ if "%DESIRED_PYTHON%" == "3.13" %PYTHON_EXEC% -m pip install --pre numpy==2.1.2
|
||||
if "%DESIRED_PYTHON%" == "3.12" %PYTHON_EXEC% -m pip install --pre numpy==2.0.2 protobuf
|
||||
if "%DESIRED_PYTHON%" == "3.11" %PYTHON_EXEC% -m pip install --pre numpy==2.0.2 protobuf
|
||||
if "%DESIRED_PYTHON%" == "3.10" %PYTHON_EXEC% -m pip install --pre numpy==2.0.2 protobuf
|
||||
if "%DESIRED_PYTHON%" == "3.9" %PYTHON_EXEC% -m pip install --pre numpy==2.0.2 protobuf networkx
|
||||
if "%DESIRED_PYTHON%" == "3.9" %PYTHON_EXEC% -m pip install --pre numpy==2.0.2 protobuf
|
||||
|
||||
if errorlevel 1 exit /b 1
|
||||
|
||||
if "%PYTORCH_BUILD_VERSION:dev=%" NEQ "%PYTORCH_BUILD_VERSION%" (
|
||||
set "CHANNEL=nightly"
|
||||
) else (
|
||||
set "CHANNEL=test"
|
||||
)
|
||||
|
||||
set "EXTRA_INDEX= "
|
||||
if "%CUDA_VERSION%" == "xpu" set "EXTRA_INDEX=--index-url https://download.pytorch.org/whl/%CHANNEL%/xpu"
|
||||
|
||||
for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.whl') do %PYTHON_EXEC% -m pip install "%%i" %EXTRA_INDEX%
|
||||
for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.whl') do %PYTHON_EXEC% -m pip install "%%i"
|
||||
if errorlevel 1 exit /b 1
|
||||
|
||||
goto smoke_test
|
||||
|
||||
@ -47,9 +47,9 @@ set XPU_EXTRA_INSTALLED=0
|
||||
set XPU_EXTRA_UNINSTALL=0
|
||||
|
||||
if not [%XPU_VERSION%]==[] if [%XPU_VERSION%]==[2025.0] (
|
||||
set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/9d6d6c17-ca2d-4735-9331-99447e4a1280/intel-deep-learning-essentials-2025.0.1.28_offline.exe
|
||||
set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/efc86abd-cb77-452e-a03f-a741895b8ece/intel-deep-learning-essentials-2025.0.0.336_offline.exe
|
||||
set XPU_BUNDLE_PRODUCT_NAME=intel.oneapi.win.deep-learning-essentials.product
|
||||
set XPU_BUNDLE_VERSION=2025.0.1+20
|
||||
set XPU_BUNDLE_VERSION=2025.0.0+335
|
||||
set XPU_BUNDLE_INSTALLED=0
|
||||
set XPU_BUNDLE_UNINSTALL=0
|
||||
set XPU_EXTRA_URL=NULL
|
||||
|
||||
@ -31,9 +31,9 @@ fi
|
||||
export DOCKER_IMAGE=${DOCKER_IMAGE:-}
|
||||
if [[ -z "$DOCKER_IMAGE" ]]; then
|
||||
if [[ "$DESIRED_CUDA" == cpu ]]; then
|
||||
export DOCKER_IMAGE="pytorch/manylinux2_28:cpu"
|
||||
export DOCKER_IMAGE="pytorch/manylinux:cpu"
|
||||
else
|
||||
export DOCKER_IMAGE="pytorch/manylinux2_28-builder:${DESIRED_CUDA:2}"
|
||||
export DOCKER_IMAGE="pytorch/manylinux-builder:${DESIRED_CUDA:2}"
|
||||
fi
|
||||
fi
|
||||
|
||||
@ -74,12 +74,6 @@ TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt)
|
||||
|
||||
# Here PYTORCH_EXTRA_INSTALL_REQUIREMENTS is already set for the all the wheel builds hence append TRITON_CONSTRAINT
|
||||
TRITON_CONSTRAINT="platform_system == 'Linux' and platform_machine == 'x86_64'"
|
||||
|
||||
# CUDA 12.8 builds have triton for Linux and Linux aarch64 binaries.
|
||||
if [[ "$DESIRED_CUDA" == cu128 ]]; then
|
||||
TRITON_CONSTRAINT="platform_system == 'Linux'"
|
||||
fi
|
||||
|
||||
if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" && ! "$PYTORCH_BUILD_VERSION" =~ .*xpu.* ]]; then
|
||||
TRITON_REQUIREMENT="triton==${TRITON_VERSION}; ${TRITON_CONSTRAINT}"
|
||||
if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then
|
||||
@ -104,11 +98,11 @@ if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_B
|
||||
fi
|
||||
|
||||
# Set triton via PYTORCH_EXTRA_INSTALL_REQUIREMENTS for triton xpu package
|
||||
if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*xpu.* ]]; then
|
||||
TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}"
|
||||
if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*xpu.* && $(uname) == "Linux" ]]; then
|
||||
TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}; ${TRITON_CONSTRAINT}"
|
||||
if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then
|
||||
TRITON_SHORTHASH=$(cut -c1-8 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton-xpu.txt)
|
||||
TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}+git${TRITON_SHORTHASH}"
|
||||
TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}+git${TRITON_SHORTHASH}; ${TRITON_CONSTRAINT}"
|
||||
fi
|
||||
if [[ -z "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" ]]; then
|
||||
export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${TRITON_REQUIREMENT}"
|
||||
|
||||
@ -55,16 +55,12 @@ s3_upload() {
|
||||
s3_upload_dir="${s3_root_dir}/${UPLOAD_SUBFOLDER}/"
|
||||
fi
|
||||
(
|
||||
cache_control_flag=""
|
||||
if [[ "${UPLOAD_CHANNEL}" = "test" ]]; then
|
||||
cache_control_flag="--cache-control='no-cache,no-store,must-revalidate'"
|
||||
fi
|
||||
for pkg in ${PKG_DIR}/*.${extension}; do
|
||||
(
|
||||
set -x
|
||||
shm_id=$(sha256sum "${pkg}" | awk '{print $1}')
|
||||
${AWS_S3_CP} --no-progress --acl public-read "${pkg}" "${s3_upload_dir}" \
|
||||
--metadata "checksum-sha256=${shm_id}" ${cache_control_flag}
|
||||
--metadata "checksum-sha256=${shm_id}"
|
||||
)
|
||||
done
|
||||
)
|
||||
|
||||
@ -1,22 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -eux -o pipefail
|
||||
|
||||
source "${BINARY_ENV_FILE:-/c/w/env}"
|
||||
mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR"
|
||||
|
||||
export USE_SCCACHE=1
|
||||
export SCCACHE_IGNORE_SERVER_IO_ERROR=1
|
||||
|
||||
echo "Free space on filesystem before build:"
|
||||
df -h
|
||||
|
||||
export NIGHTLIES_PYTORCH_ROOT="$PYTORCH_ROOT"
|
||||
|
||||
if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
|
||||
pytorch/.ci/pytorch/windows/arm64/build_libtorch.bat
|
||||
elif [[ "$PACKAGE_TYPE" == 'wheel' ]]; then
|
||||
pytorch/.ci/pytorch/windows/arm64/build_pytorch.bat
|
||||
fi
|
||||
|
||||
echo "Free space on filesystem after build:"
|
||||
df -h
|
||||
@ -1,6 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -eux -o pipefail
|
||||
|
||||
source "${BINARY_ENV_FILE:-/c/w/env}"
|
||||
|
||||
pytorch/.ci/pytorch/windows/arm64/smoke_test.bat
|
||||
@ -13,7 +13,6 @@ export VC_YEAR=2022
|
||||
if [[ "$DESIRED_CUDA" == 'xpu' ]]; then
|
||||
export USE_SCCACHE=0
|
||||
export XPU_VERSION=2025.0
|
||||
export XPU_ENABLE_KINETO=1
|
||||
fi
|
||||
|
||||
echo "Free space on filesystem before build:"
|
||||
|
||||
@ -12,7 +12,6 @@ bugprone-*,
|
||||
-bugprone-macro-parentheses,
|
||||
-bugprone-lambda-function-name,
|
||||
-bugprone-reserved-identifier,
|
||||
-bugprone-return-const-ref-from-parameter,
|
||||
-bugprone-swapped-arguments,
|
||||
clang-analyzer-core.*,
|
||||
clang-analyzer-cplusplus.*,
|
||||
@ -25,7 +24,6 @@ cppcoreguidelines-*,
|
||||
-cppcoreguidelines-avoid-non-const-global-variables,
|
||||
-cppcoreguidelines-interfaces-global-init,
|
||||
-cppcoreguidelines-macro-usage,
|
||||
-cppcoreguidelines-macro-to-enum,
|
||||
-cppcoreguidelines-owning-memory,
|
||||
-cppcoreguidelines-pro-bounds-array-to-pointer-decay,
|
||||
-cppcoreguidelines-pro-bounds-constant-array-index,
|
||||
@ -57,7 +55,6 @@ modernize-*,
|
||||
-modernize-use-trailing-return-type,
|
||||
-modernize-use-nodiscard,
|
||||
performance-*,
|
||||
-performance-enum-size,
|
||||
readability-container-size-empty,
|
||||
readability-delete-null-pointer,
|
||||
readability-duplicate-include
|
||||
|
||||
1
.flake8
1
.flake8
@ -38,7 +38,6 @@ per-file-ignores =
|
||||
torchgen/api/types/__init__.py: F401,F403
|
||||
torchgen/executorch/api/types/__init__.py: F401,F403
|
||||
test/dynamo/test_higher_order_ops.py: B950
|
||||
test/dynamo/test_error_messages.py: B950
|
||||
torch/testing/_internal/dynamo_test_failures.py: B950
|
||||
# TOR901 is only for test, we want to ignore it for everything else.
|
||||
# It's not easy to configure this without affecting other per-file-ignores,
|
||||
|
||||
9
.github/actionlint.yaml
vendored
9
.github/actionlint.yaml
vendored
@ -1,13 +1,8 @@
|
||||
self-hosted-runner:
|
||||
labels:
|
||||
# GitHub hosted runner that actionlint doesn't recognize because actionlint version (1.6.21) is too old
|
||||
- ubuntu-24.04
|
||||
# GitHub hosted x86 Linux runners
|
||||
# TODO: Cleanup mentions of linux.20_04 when upgrade to linux.24_04 is complete
|
||||
- linux.20_04.4x
|
||||
- linux.20_04.16x
|
||||
- linux.24_04.4x
|
||||
- linux.24_04.16x
|
||||
# Organization-wide AWS Linux Runners
|
||||
- linux.large
|
||||
- linux.2xlarge
|
||||
@ -15,6 +10,7 @@ self-hosted-runner:
|
||||
- linux.9xlarge.ephemeral
|
||||
- am2.linux.9xlarge.ephemeral
|
||||
- linux.12xlarge
|
||||
- linux.12xlarge.ephemeral
|
||||
- linux.24xlarge
|
||||
- linux.24xlarge.ephemeral
|
||||
- linux.arm64.2xlarge
|
||||
@ -46,13 +42,10 @@ self-hosted-runner:
|
||||
- windows.8xlarge.nvidia.gpu
|
||||
- windows.8xlarge.nvidia.gpu.nonephemeral
|
||||
- windows.g5.4xlarge.nvidia.gpu
|
||||
# Windows ARM64 runners
|
||||
- windows-11-arm64
|
||||
# Organization-wide AMD hosted runners
|
||||
- linux.rocm.gpu
|
||||
- linux.rocm.gpu.2
|
||||
- linux.rocm.gpu.4
|
||||
- rocm-docker
|
||||
# Repo-specific Apple hosted runners
|
||||
- macos-m1-ultra
|
||||
- macos-m2-14
|
||||
|
||||
41
.github/actions/checkout-pytorch/action.yml
vendored
41
.github/actions/checkout-pytorch/action.yml
vendored
@ -23,44 +23,9 @@ runs:
|
||||
id: check_container_runner
|
||||
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Set up parallel fetch and clean workspace
|
||||
id: first-clean
|
||||
continue-on-error: true
|
||||
- name: Clean workspace
|
||||
shell: bash
|
||||
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
|
||||
env:
|
||||
NO_SUDO: ${{ inputs.no-sudo }}
|
||||
run: |
|
||||
# Use all available CPUs for fetching
|
||||
cd "${GITHUB_WORKSPACE}"
|
||||
git config --global fetch.parallel 0
|
||||
git config --global submodule.fetchJobs 0
|
||||
|
||||
# Clean workspace. The default checkout action should also do this, but
|
||||
# do it here as well just in case
|
||||
if [[ -d .git ]]; then
|
||||
if [ -z "${NO_SUDO}" ]; then
|
||||
sudo git clean -ffdx
|
||||
else
|
||||
git clean -ffdx
|
||||
fi
|
||||
fi
|
||||
|
||||
- name: Checkout PyTorch
|
||||
id: first-checkout-attempt
|
||||
continue-on-error: true
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# --depth=1 for speed, manually fetch history and other refs as necessary
|
||||
fetch-depth: ${{ inputs.fetch-depth }}
|
||||
submodules: ${{ inputs.submodules }}
|
||||
show-progress: false
|
||||
|
||||
- name: Clean workspace (try again)
|
||||
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' &&
|
||||
(steps.first-clean.outcome != 'success' || steps.first-checkout-attempt.outcome != 'success') }}
|
||||
shell: bash
|
||||
env:
|
||||
NO_SUDO: ${{ inputs.no-sudo }}
|
||||
run: |
|
||||
@ -75,11 +40,11 @@ runs:
|
||||
fi
|
||||
mkdir "${GITHUB_WORKSPACE}"
|
||||
|
||||
- name: Checkout PyTorch (try again)
|
||||
- name: Checkout PyTorch
|
||||
uses: actions/checkout@v4
|
||||
if: ${{ steps.first-clean.outcome != 'success' || steps.first-checkout-attempt.outcome != 'success' }}
|
||||
with:
|
||||
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
# --depth=1 for speed, manually fetch history and other refs as necessary
|
||||
fetch-depth: ${{ inputs.fetch-depth }}
|
||||
submodules: ${{ inputs.submodules }}
|
||||
show-progress: false
|
||||
|
||||
@ -15,6 +15,7 @@ runs:
|
||||
-e BINARY_ENV_FILE \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e DESIRED_CUDA \
|
||||
-e DESIRED_DEVTOOLSET \
|
||||
-e DESIRED_PYTHON \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e GPU_ARCH_TYPE \
|
||||
|
||||
2
.github/ci_commit_pins/audio.txt
vendored
2
.github/ci_commit_pins/audio.txt
vendored
@ -1 +1 @@
|
||||
318bace01aebc1f82ae13d0d133fcf9fede73383
|
||||
f084f34bbb743fada85f66b0ed8041387565e69c
|
||||
|
||||
2
.github/labeler.yml
vendored
2
.github/labeler.yml
vendored
@ -98,7 +98,7 @@
|
||||
- test/distributed/**
|
||||
- torch/testing/_internal/distributed/**
|
||||
|
||||
"release notes: distributed (checkpoint)":
|
||||
"module: distributed_checkpoint":
|
||||
- torch/distributed/checkpoint/**
|
||||
- test/distributed/checkpoint/**
|
||||
|
||||
|
||||
3
.github/merge_rules.yaml
vendored
3
.github/merge_rules.yaml
vendored
@ -334,7 +334,6 @@
|
||||
- XiaobingSuper
|
||||
- jgong5
|
||||
- mingfeima
|
||||
- EikanWang
|
||||
mandatory_checks_name:
|
||||
- EasyCLA
|
||||
- Lint
|
||||
@ -367,7 +366,6 @@
|
||||
- jgong5
|
||||
- vfdev-5
|
||||
- leslie-fang-intel
|
||||
- EikanWang
|
||||
mandatory_checks_name:
|
||||
- EasyCLA
|
||||
- Lint
|
||||
@ -381,7 +379,6 @@
|
||||
approved_by:
|
||||
- leslie-fang-intel
|
||||
- jgong5
|
||||
- EikanWang
|
||||
mandatory_checks_name:
|
||||
- EasyCLA
|
||||
- Lint
|
||||
|
||||
2
.github/pytorch-probot.yml
vendored
2
.github/pytorch-probot.yml
vendored
@ -7,7 +7,6 @@ ciflow_push_tags:
|
||||
- ciflow/inductor
|
||||
- ciflow/inductor-periodic
|
||||
- ciflow/inductor-rocm
|
||||
- ciflow/inductor-perf-test-nightly-rocm
|
||||
- ciflow/inductor-perf-compare
|
||||
- ciflow/inductor-micro-benchmark
|
||||
- ciflow/inductor-micro-benchmark-cpu-x86
|
||||
@ -17,7 +16,6 @@ ciflow_push_tags:
|
||||
- ciflow/nightly
|
||||
- ciflow/periodic
|
||||
- ciflow/rocm
|
||||
- ciflow/rocm-mi300
|
||||
- ciflow/s390
|
||||
- ciflow/slow
|
||||
- ciflow/trunk
|
||||
|
||||
2
.github/requirements-gha-cache.txt
vendored
2
.github/requirements-gha-cache.txt
vendored
@ -5,7 +5,7 @@
|
||||
# functorch/docs/requirements.txt
|
||||
# .ci/docker/requirements-ci.txt
|
||||
boto3==1.35.42
|
||||
jinja2==3.1.6
|
||||
jinja2==3.1.5
|
||||
lintrunner==0.10.7
|
||||
ninja==1.10.0.post1
|
||||
nvidia-ml-py==11.525.84
|
||||
|
||||
2
.github/scripts/build_triton_wheel.py
vendored
2
.github/scripts/build_triton_wheel.py
vendored
@ -123,7 +123,7 @@ def main() -> None:
|
||||
parser = ArgumentParser("Build Triton binaries")
|
||||
parser.add_argument("--release", action="store_true")
|
||||
parser.add_argument(
|
||||
"--device", type=str, default="cuda", choices=["cuda", "rocm", "xpu", "aarch64"]
|
||||
"--device", type=str, default="cuda", choices=["cuda", "rocm", "xpu"]
|
||||
)
|
||||
parser.add_argument("--py-version", type=str)
|
||||
parser.add_argument("--commit-hash", type=str)
|
||||
|
||||
121
.github/scripts/generate_binary_build_matrix.py
vendored
121
.github/scripts/generate_binary_build_matrix.py
vendored
@ -16,15 +16,16 @@ from typing import Optional
|
||||
|
||||
|
||||
# NOTE: Also update the CUDA sources in tools/nightly.py when changing this list
|
||||
CUDA_ARCHES = ["11.8", "12.6", "12.8"]
|
||||
CUDA_STABLE = "12.6"
|
||||
CUDA_ARCHES = ["11.8", "12.4", "12.6", "12.8"]
|
||||
CUDA_ARCHES_FULL_VERSION = {
|
||||
"11.8": "11.8.0",
|
||||
"12.4": "12.4.1",
|
||||
"12.6": "12.6.3",
|
||||
"12.8": "12.8.0",
|
||||
}
|
||||
CUDA_ARCHES_CUDNN_VERSION = {
|
||||
"11.8": "9",
|
||||
"12.4": "9",
|
||||
"12.6": "9",
|
||||
"12.8": "9",
|
||||
}
|
||||
@ -34,11 +35,13 @@ ROCM_ARCHES = ["6.2.4", "6.3"]
|
||||
|
||||
XPU_ARCHES = ["xpu"]
|
||||
|
||||
CPU_CXX11_ABI_ARCH = ["cpu-cxx11-abi"]
|
||||
|
||||
CPU_AARCH64_ARCH = ["cpu-aarch64"]
|
||||
|
||||
CPU_S390X_ARCH = ["cpu-s390x"]
|
||||
|
||||
CUDA_AARCH64_ARCHES = ["12.8-aarch64"]
|
||||
CUDA_AARCH64_ARCHES = ["12.6-aarch64", "12.8-aarch64"]
|
||||
|
||||
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
||||
@ -55,6 +58,21 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
||||
"nvidia-nccl-cu11==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'"
|
||||
),
|
||||
"12.4": (
|
||||
"nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'"
|
||||
),
|
||||
"12.6": (
|
||||
"nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
@ -66,7 +84,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
||||
"nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'"
|
||||
@ -75,30 +93,26 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
|
||||
"nvidia-cuda-nvrtc-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cuda-runtime-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cuda-cupti-cu12==12.8.57; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cudnn-cu12==9.8.0.87; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cudnn-cu12==9.7.1.26; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cublas-cu12==12.8.3.14; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cufft-cu12==11.3.3.41; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-curand-cu12==10.3.9.55; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusolver-cu12==11.7.2.55; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusparse-cu12==12.5.7.53; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cusparselt-cu12==0.6.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nccl-cu12==2.26.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nccl-cu12==2.25.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvtx-cu12==12.8.55; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-nvjitlink-cu12==12.8.61; platform_system == 'Linux' and platform_machine == 'x86_64' | "
|
||||
"nvidia-cufile-cu12==1.13.0.11; platform_system == 'Linux' and platform_machine == 'x86_64'"
|
||||
),
|
||||
"xpu": (
|
||||
"intel-cmplr-lib-rt==2025.0.4; platform_system == 'Linux' | "
|
||||
"intel-cmplr-lib-ur==2025.0.4; platform_system == 'Linux' | "
|
||||
"intel-cmplr-lic-rt==2025.0.4; platform_system == 'Linux' | "
|
||||
"intel-sycl-rt==2025.0.4; platform_system == 'Linux' | "
|
||||
"intel-cmplr-lib-rt==2025.0.5; platform_system == 'Windows' | "
|
||||
"intel-cmplr-lib-ur==2025.0.5; platform_system == 'Windows' | "
|
||||
"intel-cmplr-lic-rt==2025.0.5; platform_system == 'Windows' | "
|
||||
"intel-sycl-rt==2025.0.5; platform_system == 'Windows' | "
|
||||
"intel-cmplr-lib-rt==2025.0.2 | "
|
||||
"intel-cmplr-lib-ur==2025.0.2 | "
|
||||
"intel-cmplr-lic-rt==2025.0.2 | "
|
||||
"intel-sycl-rt==2025.0.2 | "
|
||||
"tcmlib==1.2.0 | "
|
||||
"umf==0.9.1 | "
|
||||
"intel-pti==0.10.1"
|
||||
"intel-pti==0.10.0"
|
||||
),
|
||||
}
|
||||
|
||||
@ -144,6 +158,8 @@ def arch_type(arch_version: str) -> str:
|
||||
return "rocm"
|
||||
elif arch_version in XPU_ARCHES:
|
||||
return "xpu"
|
||||
elif arch_version in CPU_CXX11_ABI_ARCH:
|
||||
return "cpu-cxx11-abi"
|
||||
elif arch_version in CPU_AARCH64_ARCH:
|
||||
return "cpu-aarch64"
|
||||
elif arch_version in CPU_S390X_ARCH:
|
||||
@ -172,23 +188,31 @@ WHEEL_CONTAINER_IMAGES = {
|
||||
},
|
||||
"xpu": f"pytorch/manylinux2_28-builder:xpu-{DEFAULT_TAG}",
|
||||
"cpu": f"pytorch/manylinux2_28-builder:cpu-{DEFAULT_TAG}",
|
||||
"cpu-cxx11-abi": f"pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-{DEFAULT_TAG}",
|
||||
"cpu-aarch64": f"pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-{DEFAULT_TAG}",
|
||||
"cpu-s390x": f"pytorch/manylinuxs390x-builder:cpu-s390x-{DEFAULT_TAG}",
|
||||
}
|
||||
|
||||
CXX11_ABI = "cxx11-abi"
|
||||
RELEASE = "release"
|
||||
DEBUG = "debug"
|
||||
|
||||
LIBTORCH_CONTAINER_IMAGES: dict[str, str] = {
|
||||
LIBTORCH_CONTAINER_IMAGES: dict[tuple[str, str], str] = {
|
||||
**{
|
||||
gpu_arch: f"pytorch/libtorch-cxx11-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
|
||||
(
|
||||
gpu_arch,
|
||||
CXX11_ABI,
|
||||
): f"pytorch/libtorch-cxx11-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
|
||||
for gpu_arch in CUDA_ARCHES
|
||||
},
|
||||
**{
|
||||
gpu_arch: f"pytorch/libtorch-cxx11-builder:rocm{gpu_arch}-{DEFAULT_TAG}"
|
||||
(
|
||||
gpu_arch,
|
||||
CXX11_ABI,
|
||||
): f"pytorch/libtorch-cxx11-builder:rocm{gpu_arch}-{DEFAULT_TAG}"
|
||||
for gpu_arch in ROCM_ARCHES
|
||||
},
|
||||
"cpu": f"pytorch/libtorch-cxx11-builder:cpu-{DEFAULT_TAG}",
|
||||
("cpu", CXX11_ABI): f"pytorch/libtorch-cxx11-builder:cpu-{DEFAULT_TAG}",
|
||||
}
|
||||
|
||||
FULL_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13", "3.13t"]
|
||||
@ -198,6 +222,7 @@ def translate_desired_cuda(gpu_arch_type: str, gpu_arch_version: str) -> str:
|
||||
return {
|
||||
"cpu": "cpu",
|
||||
"cpu-aarch64": "cpu",
|
||||
"cpu-cxx11-abi": "cpu-cxx11-abi",
|
||||
"cpu-s390x": "cpu",
|
||||
"cuda": f"cu{gpu_arch_version.replace('.', '')}",
|
||||
"cuda-aarch64": f"cu{gpu_arch_version.replace('-aarch64', '').replace('.', '')}",
|
||||
@ -212,7 +237,7 @@ def list_without(in_list: list[str], without: list[str]) -> list[str]:
|
||||
|
||||
def generate_libtorch_matrix(
|
||||
os: str,
|
||||
release_type: str,
|
||||
abi_version: str,
|
||||
arches: Optional[list[str]] = None,
|
||||
libtorch_variants: Optional[list[str]] = None,
|
||||
) -> list[dict[str, str]]:
|
||||
@ -221,8 +246,14 @@ def generate_libtorch_matrix(
|
||||
if os == "linux":
|
||||
arches += CUDA_ARCHES
|
||||
arches += ROCM_ARCHES
|
||||
# skip CUDA 12.8 builds for libtorch
|
||||
if "12.8" in arches:
|
||||
arches.remove("12.8")
|
||||
elif os == "windows":
|
||||
arches += CUDA_ARCHES
|
||||
# skip CUDA 12.8 builds on Windows
|
||||
if "12.8" in arches:
|
||||
arches.remove("12.8")
|
||||
if libtorch_variants is None:
|
||||
libtorch_variants = [
|
||||
"shared-with-deps",
|
||||
@ -234,6 +265,9 @@ def generate_libtorch_matrix(
|
||||
ret: list[dict[str, str]] = []
|
||||
for arch_version in arches:
|
||||
for libtorch_variant in libtorch_variants:
|
||||
# one of the values in the following list must be exactly
|
||||
# CXX11_ABI, but the precise value of the other one doesn't
|
||||
# matter
|
||||
gpu_arch_type = arch_type(arch_version)
|
||||
gpu_arch_version = "" if arch_version == "cpu" else arch_version
|
||||
# ROCm builds without-deps failed even in ROCm runners; skip for now
|
||||
@ -246,15 +280,16 @@ def generate_libtorch_matrix(
|
||||
"desired_cuda": translate_desired_cuda(
|
||||
gpu_arch_type, gpu_arch_version
|
||||
),
|
||||
"libtorch_config": release_type,
|
||||
"libtorch_variant": libtorch_variant,
|
||||
"libtorch_config": abi_version if os == "windows" else "",
|
||||
"devtoolset": abi_version if os != "windows" else "",
|
||||
"container_image": (
|
||||
LIBTORCH_CONTAINER_IMAGES[arch_version]
|
||||
if os not in ("windows", "windows-arm64")
|
||||
LIBTORCH_CONTAINER_IMAGES[(arch_version, abi_version)]
|
||||
if os != "windows"
|
||||
else ""
|
||||
),
|
||||
"package_type": "libtorch",
|
||||
"build_name": f"libtorch-{gpu_arch_type}{gpu_arch_version}-{libtorch_variant}-{release_type}".replace(
|
||||
"build_name": f"libtorch-{gpu_arch_type}{gpu_arch_version}-{libtorch_variant}-{abi_version}".replace(
|
||||
".", "_"
|
||||
),
|
||||
}
|
||||
@ -280,9 +315,12 @@ def generate_wheels_matrix(
|
||||
# Define default compute archivectures
|
||||
arches = ["cpu"]
|
||||
if os == "linux":
|
||||
arches += CUDA_ARCHES + ROCM_ARCHES + XPU_ARCHES
|
||||
arches += CPU_CXX11_ABI_ARCH + CUDA_ARCHES + ROCM_ARCHES + XPU_ARCHES
|
||||
elif os == "windows":
|
||||
arches += CUDA_ARCHES + XPU_ARCHES
|
||||
# skip CUDA 12.8 builds on Windows until available
|
||||
if "12.8" in arches:
|
||||
arches.remove("12.8")
|
||||
elif os == "linux-aarch64":
|
||||
# Separate new if as the CPU type is different and
|
||||
# uses different build/test scripts
|
||||
@ -299,6 +337,7 @@ def generate_wheels_matrix(
|
||||
gpu_arch_version = (
|
||||
""
|
||||
if arch_version == "cpu"
|
||||
or arch_version == "cpu-cxx11-abi"
|
||||
or arch_version == "cpu-aarch64"
|
||||
or arch_version == "cpu-s390x"
|
||||
or arch_version == "xpu"
|
||||
@ -310,7 +349,7 @@ def generate_wheels_matrix(
|
||||
continue
|
||||
|
||||
if use_split_build and (
|
||||
arch_version not in ["12.6", "12.8", "11.8", "cpu"] or os != "linux"
|
||||
arch_version not in ["12.6", "12.4", "11.8", "cpu"] or os != "linux"
|
||||
):
|
||||
raise RuntimeError(
|
||||
"Split build is only supported on linux with cuda 12*, 11.8, and cpu.\n"
|
||||
@ -321,26 +360,26 @@ def generate_wheels_matrix(
|
||||
# cuda linux wheels require PYTORCH_EXTRA_INSTALL_REQUIREMENTS to install
|
||||
|
||||
if (
|
||||
arch_version in ["12.8", "12.6", "11.8"]
|
||||
arch_version in ["12.8", "12.6", "12.4", "11.8"]
|
||||
and os == "linux"
|
||||
or arch_version in CUDA_AARCH64_ARCHES
|
||||
):
|
||||
desired_cuda = translate_desired_cuda(gpu_arch_type, gpu_arch_version)
|
||||
ret.append(
|
||||
{
|
||||
"python_version": python_version,
|
||||
"gpu_arch_type": gpu_arch_type,
|
||||
"gpu_arch_version": gpu_arch_version,
|
||||
"desired_cuda": desired_cuda,
|
||||
"desired_cuda": translate_desired_cuda(
|
||||
gpu_arch_type, gpu_arch_version
|
||||
),
|
||||
"use_split_build": "True" if use_split_build else "False",
|
||||
"devtoolset": "cxx11-abi",
|
||||
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
|
||||
"package_type": package_type,
|
||||
"pytorch_extra_install_requirements": (
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS[
|
||||
f"{desired_cuda[2:4]}.{desired_cuda[4:]}" # for cuda-aarch64: cu126 -> 12.6
|
||||
]
|
||||
if os == "linux-aarch64"
|
||||
else PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version]
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version]
|
||||
if os != "linux-aarch64"
|
||||
else ""
|
||||
),
|
||||
"build_name": (
|
||||
f"{package_type}-py{python_version}-{gpu_arch_type}"
|
||||
@ -350,8 +389,8 @@ def generate_wheels_matrix(
|
||||
), # include special case for aarch64 build, remove the -aarch64 postfix
|
||||
}
|
||||
)
|
||||
# Special build building to use on Colab. Python 3.11 for 12.6 CUDA
|
||||
if python_version == "3.11" and arch_version == CUDA_STABLE:
|
||||
# Special build building to use on Colab. Python 3.11 for 12.4 CUDA
|
||||
if python_version == "3.11" and arch_version == "12.4":
|
||||
ret.append(
|
||||
{
|
||||
"python_version": python_version,
|
||||
@ -361,6 +400,7 @@ def generate_wheels_matrix(
|
||||
gpu_arch_type, gpu_arch_version
|
||||
),
|
||||
"use_split_build": "True" if use_split_build else "False",
|
||||
"devtoolset": "",
|
||||
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
|
||||
"package_type": package_type,
|
||||
"pytorch_extra_install_requirements": "",
|
||||
@ -379,6 +419,12 @@ def generate_wheels_matrix(
|
||||
gpu_arch_type, gpu_arch_version
|
||||
),
|
||||
"use_split_build": "True" if use_split_build else "False",
|
||||
"devtoolset": (
|
||||
"cxx11-abi"
|
||||
if (arch_version in ["cpu-cxx11-abi", "cpu-aarch64"])
|
||||
or os == "linux"
|
||||
else ""
|
||||
),
|
||||
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
|
||||
"package_type": package_type,
|
||||
"build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace(
|
||||
@ -387,7 +433,7 @@ def generate_wheels_matrix(
|
||||
"pytorch_extra_install_requirements": (
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS["xpu"]
|
||||
if gpu_arch_type == "xpu"
|
||||
else PYTORCH_EXTRA_INSTALL_REQUIREMENTS[CUDA_STABLE]
|
||||
else PYTORCH_EXTRA_INSTALL_REQUIREMENTS["12.4"]
|
||||
if os != "linux"
|
||||
else ""
|
||||
),
|
||||
@ -399,4 +445,5 @@ def generate_wheels_matrix(
|
||||
|
||||
validate_nccl_dep_consistency("12.8")
|
||||
validate_nccl_dep_consistency("12.6")
|
||||
validate_nccl_dep_consistency("12.4")
|
||||
validate_nccl_dep_consistency("11.8")
|
||||
|
||||
72
.github/scripts/generate_ci_workflows.py
vendored
72
.github/scripts/generate_ci_workflows.py
vendored
@ -54,6 +54,7 @@ class BinaryBuildWorkflow:
|
||||
|
||||
# Optional fields
|
||||
build_environment: str = ""
|
||||
abi_version: str = ""
|
||||
ciflow_config: CIFlowConfig = field(default_factory=CIFlowConfig)
|
||||
is_scheduled: str = ""
|
||||
branches: str = "nightly"
|
||||
@ -63,7 +64,12 @@ class BinaryBuildWorkflow:
|
||||
use_split_build: bool = False
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
self.build_environment = f"{self.os}-binary-{self.package_type}"
|
||||
if self.abi_version:
|
||||
self.build_environment = (
|
||||
f"{self.os}-binary-{self.package_type}-{self.abi_version}"
|
||||
)
|
||||
else:
|
||||
self.build_environment = f"{self.os}-binary-{self.package_type}"
|
||||
if self.use_split_build:
|
||||
# added to distinguish concurrency groups
|
||||
self.build_environment += "-split"
|
||||
@ -90,7 +96,6 @@ class BinaryBuildWorkflow:
|
||||
class OperatingSystem:
|
||||
LINUX = "linux"
|
||||
WINDOWS = "windows"
|
||||
WINDOWS_ARM64 = "windows-arm64"
|
||||
MACOS = "macos"
|
||||
MACOS_ARM64 = "macos-arm64"
|
||||
LINUX_AARCH64 = "linux-aarch64"
|
||||
@ -127,9 +132,10 @@ LINUX_BINARY_BUILD_WORFKLOWS = [
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.LINUX,
|
||||
package_type="libtorch",
|
||||
abi_version=generate_binary_build_matrix.CXX11_ABI,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.LINUX,
|
||||
generate_binary_build_matrix.RELEASE,
|
||||
generate_binary_build_matrix.CXX11_ABI,
|
||||
libtorch_variants=["shared-with-deps"],
|
||||
),
|
||||
ciflow_config=CIFlowConfig(
|
||||
@ -145,7 +151,7 @@ LINUX_BINARY_SMOKE_WORKFLOWS = [
|
||||
package_type="manywheel",
|
||||
build_configs=generate_binary_build_matrix.generate_wheels_matrix(
|
||||
OperatingSystem.LINUX,
|
||||
arches=["11.8", "12.6", "12.8"],
|
||||
arches=["11.8", "12.4", "12.6", "12.8"],
|
||||
python_versions=["3.9"],
|
||||
),
|
||||
branches="main",
|
||||
@ -169,9 +175,10 @@ LINUX_BINARY_SMOKE_WORKFLOWS = [
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.LINUX,
|
||||
package_type="libtorch",
|
||||
abi_version=generate_binary_build_matrix.CXX11_ABI,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.LINUX,
|
||||
generate_binary_build_matrix.RELEASE,
|
||||
generate_binary_build_matrix.CXX11_ABI,
|
||||
arches=["cpu"],
|
||||
libtorch_variants=["shared-with-deps"],
|
||||
),
|
||||
@ -194,6 +201,7 @@ WINDOWS_BINARY_BUILD_WORKFLOWS = [
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.WINDOWS,
|
||||
package_type="libtorch",
|
||||
abi_version=generate_binary_build_matrix.RELEASE,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.WINDOWS,
|
||||
generate_binary_build_matrix.RELEASE,
|
||||
@ -207,6 +215,7 @@ WINDOWS_BINARY_BUILD_WORKFLOWS = [
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.WINDOWS,
|
||||
package_type="libtorch",
|
||||
abi_version=generate_binary_build_matrix.DEBUG,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.WINDOWS,
|
||||
generate_binary_build_matrix.DEBUG,
|
||||
@ -223,6 +232,7 @@ WINDOWS_BINARY_SMOKE_WORKFLOWS = [
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.WINDOWS,
|
||||
package_type="libtorch",
|
||||
abi_version=generate_binary_build_matrix.RELEASE,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.WINDOWS,
|
||||
generate_binary_build_matrix.RELEASE,
|
||||
@ -237,6 +247,7 @@ WINDOWS_BINARY_SMOKE_WORKFLOWS = [
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.WINDOWS,
|
||||
package_type="libtorch",
|
||||
abi_version=generate_binary_build_matrix.DEBUG,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.WINDOWS,
|
||||
generate_binary_build_matrix.DEBUG,
|
||||
@ -250,57 +261,14 @@ WINDOWS_BINARY_SMOKE_WORKFLOWS = [
|
||||
),
|
||||
]
|
||||
|
||||
WINDOWS_ARM64_BINARY_BUILD_WORKFLOWS = [
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.WINDOWS_ARM64,
|
||||
package_type="wheel",
|
||||
build_configs=generate_binary_build_matrix.generate_wheels_matrix(
|
||||
OperatingSystem.WINDOWS_ARM64,
|
||||
arches=["cpu"],
|
||||
python_versions=["3.12"],
|
||||
),
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels={LABEL_CIFLOW_BINARIES, LABEL_CIFLOW_BINARIES_WHEEL},
|
||||
isolated_workflow=True,
|
||||
),
|
||||
),
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.WINDOWS_ARM64,
|
||||
package_type="libtorch",
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.WINDOWS_ARM64,
|
||||
generate_binary_build_matrix.RELEASE,
|
||||
arches=["cpu"],
|
||||
libtorch_variants=["shared-with-deps"],
|
||||
),
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels={LABEL_CIFLOW_BINARIES, LABEL_CIFLOW_BINARIES_LIBTORCH},
|
||||
isolated_workflow=True,
|
||||
),
|
||||
),
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.WINDOWS_ARM64,
|
||||
package_type="libtorch",
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.WINDOWS_ARM64,
|
||||
generate_binary_build_matrix.DEBUG,
|
||||
arches=["cpu"],
|
||||
libtorch_variants=["shared-with-deps"],
|
||||
),
|
||||
ciflow_config=CIFlowConfig(
|
||||
labels={LABEL_CIFLOW_BINARIES, LABEL_CIFLOW_BINARIES_LIBTORCH},
|
||||
isolated_workflow=True,
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
MACOS_BINARY_BUILD_WORKFLOWS = [
|
||||
BinaryBuildWorkflow(
|
||||
os=OperatingSystem.MACOS_ARM64,
|
||||
package_type="libtorch",
|
||||
abi_version=generate_binary_build_matrix.CXX11_ABI,
|
||||
build_configs=generate_binary_build_matrix.generate_libtorch_matrix(
|
||||
OperatingSystem.MACOS,
|
||||
generate_binary_build_matrix.RELEASE,
|
||||
generate_binary_build_matrix.CXX11_ABI,
|
||||
libtorch_variants=["shared-with-deps"],
|
||||
),
|
||||
cross_compile_arm64=False,
|
||||
@ -387,10 +355,6 @@ def main() -> None:
|
||||
jinja_env.get_template("windows_binary_build_workflow.yml.j2"),
|
||||
WINDOWS_BINARY_SMOKE_WORKFLOWS,
|
||||
),
|
||||
(
|
||||
jinja_env.get_template("windows_arm64_binary_build_workflow.yml.j2"),
|
||||
WINDOWS_ARM64_BINARY_BUILD_WORKFLOWS,
|
||||
),
|
||||
(
|
||||
jinja_env.get_template("macos_binary_build_workflow.yml.j2"),
|
||||
MACOS_BINARY_BUILD_WORKFLOWS,
|
||||
|
||||
30
.github/scripts/get_ci_variable.py
vendored
30
.github/scripts/get_ci_variable.py
vendored
@ -1,30 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Helper script - Return CI variables such as stable cuda, min python version, etc."""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
|
||||
def main(args: list[str]) -> None:
|
||||
import generate_binary_build_matrix
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--cuda-stable-version",
|
||||
action="store_true",
|
||||
help="get cuda stable version",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--min-python-version",
|
||||
action="store_true",
|
||||
help="get min supported python version",
|
||||
)
|
||||
options = parser.parse_args(args)
|
||||
if options.cuda_stable_version:
|
||||
return print(generate_binary_build_matrix.CUDA_STABLE)
|
||||
if options.min_python_version:
|
||||
return print(generate_binary_build_matrix.FULL_PYTHON_VERSIONS[0])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv[1:])
|
||||
8
.github/scripts/github_utils.py
vendored
8
.github/scripts/github_utils.py
vendored
@ -57,10 +57,10 @@ def gh_fetch_url_and_headers(
|
||||
print(
|
||||
f"""{url}
|
||||
Rate limit exceeded:
|
||||
Used: {err.headers["X-RateLimit-Used"]}
|
||||
Limit: {err.headers["X-RateLimit-Limit"]}
|
||||
Remaining: {err.headers["X-RateLimit-Remaining"]}
|
||||
Resets at: {err.headers["x-RateLimit-Reset"]}"""
|
||||
Used: {err.headers['X-RateLimit-Used']}
|
||||
Limit: {err.headers['X-RateLimit-Limit']}
|
||||
Remaining: {err.headers['X-RateLimit-Remaining']}
|
||||
Resets at: {err.headers['x-RateLimit-Reset']}"""
|
||||
)
|
||||
else:
|
||||
print(f"Error fetching {url} {err}")
|
||||
|
||||
6
.github/scripts/label_utils.py
vendored
6
.github/scripts/label_utils.py
vendored
@ -63,9 +63,9 @@ def gh_get_labels(org: str, repo: str) -> list[str]:
|
||||
update_labels(labels, info)
|
||||
|
||||
last_page = get_last_page_num_from_header(header)
|
||||
assert last_page > 0, (
|
||||
"Error reading header info to determine total number of pages of labels"
|
||||
)
|
||||
assert (
|
||||
last_page > 0
|
||||
), "Error reading header info to determine total number of pages of labels"
|
||||
for page_number in range(2, last_page + 1): # skip page 1
|
||||
_, info = request_for_labels(prefix + f"&page={page_number}")
|
||||
update_labels(labels, info)
|
||||
|
||||
5
.github/scripts/lintrunner.sh
vendored
5
.github/scripts/lintrunner.sh
vendored
@ -1,6 +1,11 @@
|
||||
#!/usr/bin/env bash
|
||||
set -ex
|
||||
|
||||
# The generic Linux job chooses to use base env, not the one setup by the image
|
||||
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
|
||||
eval "$(command conda 'shell.bash' 'hook' 2> /dev/null)"
|
||||
conda activate "${CONDA_ENV}"
|
||||
|
||||
# Use uv to speed up lintrunner init
|
||||
python3 -m pip install uv==0.1.45
|
||||
|
||||
|
||||
2
.github/scripts/pytest_caching_utils.py
vendored
2
.github/scripts/pytest_caching_utils.py
vendored
@ -33,7 +33,7 @@ class PRIdentifier(str):
|
||||
__slots__ = ()
|
||||
|
||||
def __new__(cls, value: str) -> "PRIdentifier":
|
||||
md5 = hashlib.md5(value.encode("utf-8"), usedforsecurity=False).hexdigest()
|
||||
md5 = hashlib.md5(value.encode("utf-8")).hexdigest()
|
||||
return super().__new__(cls, md5)
|
||||
|
||||
|
||||
|
||||
@ -5,50 +5,6 @@ FROM --platform=linux/amd64 docker.io/ubuntu:24.04 as ld-prefix
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
RUN apt-get update && apt-get -y install ca-certificates libicu74 libssl3
|
||||
|
||||
# Patched podman
|
||||
FROM --platform=linux/s390x docker.io/ubuntu:24.04 as podman
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
RUN sed -i 's/^Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/ubuntu.sources
|
||||
RUN apt-get update && \
|
||||
apt-get install -y \
|
||||
cmake \
|
||||
curl \
|
||||
devscripts \
|
||||
dpkg-dev \
|
||||
gdb \
|
||||
less \
|
||||
make \
|
||||
python3 \
|
||||
python3-pip \
|
||||
quilt \
|
||||
rsync \
|
||||
software-properties-common \
|
||||
stress-ng \
|
||||
vim \
|
||||
nano \
|
||||
wget && \
|
||||
apt-get build-dep -y podman && \
|
||||
apt-get source podman
|
||||
|
||||
COPY podman-patches/podman-25245.patch /tmp/podman-25245.patch
|
||||
COPY podman-patches/podman-25102-backport.patch /tmp/podman-25102-backport.patch
|
||||
|
||||
# import and apply patches
|
||||
# patches:
|
||||
# https://github.com/containers/podman/pull/25102
|
||||
# https://github.com/containers/podman/pull/25245
|
||||
RUN cd /libpod-* && \
|
||||
quilt import /tmp/podman-25245.patch && quilt push && \
|
||||
quilt import /tmp/podman-25102-backport.patch && quilt push && \
|
||||
dch -i "Fix podman deadlock and add option to clean up build leftovers" && \
|
||||
/bin/rm /tmp/podman-25245.patch /tmp/podman-25102-backport.patch
|
||||
|
||||
# build patched podman
|
||||
RUN cd /libpod-* && \
|
||||
debuild -i -us -uc -b && \
|
||||
/bin/rm /podman-remote_*.deb && \
|
||||
mkdir /tmp/podman && cp -v /podman*.deb /tmp/podman
|
||||
|
||||
# Main image.
|
||||
FROM --platform=linux/s390x docker.io/ubuntu:24.04
|
||||
|
||||
@ -89,11 +45,7 @@ COPY fs/ /
|
||||
RUN chmod +x /usr/bin/actions-runner /usr/bin/entrypoint
|
||||
|
||||
# install podman
|
||||
# RUN apt-get update && apt -y install podman podman-docker
|
||||
|
||||
# install patched podman
|
||||
COPY --from=podman /tmp/podman /tmp/podman
|
||||
RUN apt-get update && apt -y install /tmp/podman/*.deb && /bin/rm -rfv /tmp/podman
|
||||
RUN apt -y install podman podman-docker
|
||||
|
||||
# amd64 Github Actions Runner.
|
||||
RUN useradd -m actions-runner
|
||||
@ -113,7 +65,7 @@ RUN virtualenv --system-site-packages venv
|
||||
#
|
||||
COPY --chown=actions-runner:actions-runner manywheel-s390x.tar /home/actions-runner/manywheel-s390x.tar
|
||||
|
||||
RUN curl -L https://github.com/actions/runner/releases/download/v2.322.0/actions-runner-linux-x64-2.322.0.tar.gz | tar -xz
|
||||
RUN curl -L https://github.com/actions/runner/releases/download/v2.317.0/actions-runner-linux-x64-2.317.0.tar.gz | tar -xz
|
||||
|
||||
ENTRYPOINT ["/usr/bin/entrypoint"]
|
||||
CMD ["/usr/bin/actions-runner"]
|
||||
|
||||
@ -1,358 +0,0 @@
|
||||
diff --git a/cmd/podman/system/prune.go b/cmd/podman/system/prune.go
|
||||
index f7cf7b551..739f87cde 100644
|
||||
--- a/cmd/podman/system/prune.go
|
||||
+++ b/cmd/podman/system/prune.go
|
||||
@@ -48,6 +48,7 @@ func init() {
|
||||
flags.BoolVarP(&force, "force", "f", false, "Do not prompt for confirmation. The default is false")
|
||||
flags.BoolVarP(&pruneOptions.All, "all", "a", false, "Remove all unused data")
|
||||
flags.BoolVar(&pruneOptions.External, "external", false, "Remove container data in storage not controlled by podman")
|
||||
+ flags.BoolVar(&pruneOptions.Build, "build", false, "Remove build containers")
|
||||
flags.BoolVar(&pruneOptions.Volume, "volumes", false, "Prune volumes")
|
||||
filterFlagName := "filter"
|
||||
flags.StringArrayVar(&filters, filterFlagName, []string{}, "Provide filter values (e.g. 'label=<key>=<value>')")
|
||||
@@ -64,8 +65,12 @@ func prune(cmd *cobra.Command, args []string) error {
|
||||
volumeString = `
|
||||
- all volumes not used by at least one container`
|
||||
}
|
||||
-
|
||||
- fmt.Printf(createPruneWarningMessage(pruneOptions), volumeString, "Are you sure you want to continue? [y/N] ")
|
||||
+ buildString := ""
|
||||
+ if pruneOptions.Build {
|
||||
+ buildString = `
|
||||
+ - all build containers`
|
||||
+ }
|
||||
+ fmt.Printf(createPruneWarningMessage(pruneOptions), volumeString, buildString, "Are you sure you want to continue? [y/N] ")
|
||||
|
||||
answer, err := reader.ReadString('\n')
|
||||
if err != nil {
|
||||
@@ -124,7 +129,7 @@ func createPruneWarningMessage(pruneOpts entities.SystemPruneOptions) string {
|
||||
if pruneOpts.All {
|
||||
return `WARNING! This command removes:
|
||||
- all stopped containers
|
||||
- - all networks not used by at least one container%s
|
||||
+ - all networks not used by at least one container%s%s
|
||||
- all images without at least one container associated with them
|
||||
- all build cache
|
||||
|
||||
@@ -132,7 +137,7 @@ func createPruneWarningMessage(pruneOpts entities.SystemPruneOptions) string {
|
||||
}
|
||||
return `WARNING! This command removes:
|
||||
- all stopped containers
|
||||
- - all networks not used by at least one container%s
|
||||
+ - all networks not used by at least one container%s%s
|
||||
- all dangling images
|
||||
- all dangling build cache
|
||||
|
||||
diff --git a/docs/source/markdown/podman-system-prune.1.md b/docs/source/markdown/podman-system-prune.1.md
|
||||
index 52f9ec1c7..95099d018 100644
|
||||
--- a/docs/source/markdown/podman-system-prune.1.md
|
||||
+++ b/docs/source/markdown/podman-system-prune.1.md
|
||||
@@ -7,20 +7,28 @@ podman\-system\-prune - Remove all unused pods, containers, images, networks, an
|
||||
**podman system prune** [*options*]
|
||||
|
||||
## DESCRIPTION
|
||||
-**podman system prune** removes all unused containers (both dangling and unreferenced), pods, networks, and optionally, volumes from local storage.
|
||||
+**podman system prune** removes all unused containers (both dangling and unreferenced), build containers, pods, networks, and optionally, volumes from local storage.
|
||||
|
||||
Use the **--all** option to delete all unused images. Unused images are dangling images as well as any image that does not have any containers based on it.
|
||||
|
||||
By default, volumes are not removed to prevent important data from being deleted if there is currently no container using the volume. Use the **--volumes** flag when running the command to prune volumes as well.
|
||||
|
||||
+By default, build containers are not removed to prevent interference with builds in progress. Use the **--build** flag when running the command to remove build containers as well.
|
||||
+
|
||||
## OPTIONS
|
||||
#### **--all**, **-a**
|
||||
|
||||
Recursively remove all unused pods, containers, images, networks, and volume data. (Maximum 50 iterations.)
|
||||
|
||||
+#### **--build**
|
||||
+
|
||||
+Removes any build containers that were created during the build, but were not removed because the build was unexpectedly terminated.
|
||||
+
|
||||
+Note: **This is not safe operation and should be executed only when no builds are in progress. It can interfere with builds in progress.**
|
||||
+
|
||||
#### **--external**
|
||||
|
||||
-Removes all leftover container storage files from local storage not managed by Podman. In normal circumstances, no such data exists, but in case of an unclean shutdown, the Podman database may be corrupted and cause this.
|
||||
+Tries to clean up remainders of previous containers or layers that are not references in the storage json files. These can happen in the case of unclean shutdowns or regular restarts in transient storage mode.
|
||||
|
||||
However, when using transient storage mode, the Podman database does not persist. This means containers leave the writable layers on disk after a reboot. When using a transient store, it is recommended that the **podman system prune --external** command is run during boot.
|
||||
|
||||
diff --git a/libpod/runtime.go b/libpod/runtime.go
|
||||
index 986e40f60..609fbba57 100644
|
||||
--- a/libpod/runtime.go
|
||||
+++ b/libpod/runtime.go
|
||||
@@ -33,6 +33,7 @@ import (
|
||||
"github.com/containers/podman/v4/libpod/lock"
|
||||
"github.com/containers/podman/v4/libpod/plugin"
|
||||
"github.com/containers/podman/v4/libpod/shutdown"
|
||||
+ "github.com/containers/podman/v4/pkg/domain/entities/reports"
|
||||
"github.com/containers/podman/v4/pkg/rootless"
|
||||
"github.com/containers/podman/v4/pkg/systemd"
|
||||
"github.com/containers/podman/v4/pkg/util"
|
||||
@@ -1250,3 +1251,52 @@ func (r *Runtime) LockConflicts() (map[uint32][]string, []uint32, error) {
|
||||
|
||||
return toReturn, locksHeld, nil
|
||||
}
|
||||
+
|
||||
+// Exists checks whether a file or directory exists at the given path.
|
||||
+// If the path is a symlink, the symlink is followed.
|
||||
+func Exists(path string) error {
|
||||
+ // It uses unix.Faccessat which is a faster operation compared to os.Stat for
|
||||
+ // simply checking the existence of a file.
|
||||
+ err := unix.Faccessat(unix.AT_FDCWD, path, unix.F_OK, 0)
|
||||
+ if err != nil {
|
||||
+ return &os.PathError{Op: "faccessat", Path: path, Err: err}
|
||||
+ }
|
||||
+ return nil
|
||||
+}
|
||||
+
|
||||
+// PruneBuildContainers removes any build containers that were created during the build,
|
||||
+// but were not removed because the build was unexpectedly terminated.
|
||||
+//
|
||||
+// Note: This is not safe operation and should be executed only when no builds are in progress. It can interfere with builds in progress.
|
||||
+func (r *Runtime) PruneBuildContainers() ([]*reports.PruneReport, error) {
|
||||
+ stageContainersPruneReports := []*reports.PruneReport{}
|
||||
+
|
||||
+ containers, err := r.store.Containers()
|
||||
+ if err != nil {
|
||||
+ return stageContainersPruneReports, err
|
||||
+ }
|
||||
+ for _, container := range containers {
|
||||
+ path, err := r.store.ContainerDirectory(container.ID)
|
||||
+ if err != nil {
|
||||
+ return stageContainersPruneReports, err
|
||||
+ }
|
||||
+ if err := Exists(filepath.Join(path, "buildah.json")); err != nil {
|
||||
+ continue
|
||||
+ }
|
||||
+
|
||||
+ report := &reports.PruneReport{
|
||||
+ Id: container.ID,
|
||||
+ }
|
||||
+ size, err := r.store.ContainerSize(container.ID)
|
||||
+ if err != nil {
|
||||
+ report.Err = err
|
||||
+ }
|
||||
+ report.Size = uint64(size)
|
||||
+
|
||||
+ if err := r.store.DeleteContainer(container.ID); err != nil {
|
||||
+ report.Err = errors.Join(report.Err, err)
|
||||
+ }
|
||||
+ stageContainersPruneReports = append(stageContainersPruneReports, report)
|
||||
+ }
|
||||
+ return stageContainersPruneReports, nil
|
||||
+}
|
||||
diff --git a/pkg/api/handlers/libpod/system.go b/pkg/api/handlers/libpod/system.go
|
||||
index 70d4493f8..7c129b1ba 100644
|
||||
--- a/pkg/api/handlers/libpod/system.go
|
||||
+++ b/pkg/api/handlers/libpod/system.go
|
||||
@@ -22,6 +22,7 @@ func SystemPrune(w http.ResponseWriter, r *http.Request) {
|
||||
All bool `schema:"all"`
|
||||
Volumes bool `schema:"volumes"`
|
||||
External bool `schema:"external"`
|
||||
+ Build bool `schema:"build"`
|
||||
}{}
|
||||
|
||||
if err := decoder.Decode(&query, r.URL.Query()); err != nil {
|
||||
@@ -43,6 +44,7 @@ func SystemPrune(w http.ResponseWriter, r *http.Request) {
|
||||
Volume: query.Volumes,
|
||||
Filters: *filterMap,
|
||||
External: query.External,
|
||||
+ Build: query.Build,
|
||||
}
|
||||
report, err := containerEngine.SystemPrune(r.Context(), pruneOptions)
|
||||
if err != nil {
|
||||
diff --git a/pkg/bindings/system/types.go b/pkg/bindings/system/types.go
|
||||
index 89e093f68..b4a4ff064 100644
|
||||
--- a/pkg/bindings/system/types.go
|
||||
+++ b/pkg/bindings/system/types.go
|
||||
@@ -18,6 +18,7 @@ type PruneOptions struct {
|
||||
Filters map[string][]string
|
||||
Volumes *bool
|
||||
External *bool
|
||||
+ Build *bool
|
||||
}
|
||||
|
||||
// VersionOptions are optional options for getting version info
|
||||
diff --git a/pkg/bindings/system/types_prune_options.go b/pkg/bindings/system/types_prune_options.go
|
||||
index d00498520..5f3bd652c 100644
|
||||
--- a/pkg/bindings/system/types_prune_options.go
|
||||
+++ b/pkg/bindings/system/types_prune_options.go
|
||||
@@ -76,3 +76,18 @@ func (o *PruneOptions) GetExternal() bool {
|
||||
}
|
||||
return *o.External
|
||||
}
|
||||
+
|
||||
+// WithBuild set field Build to given value
|
||||
+func (o *PruneOptions) WithBuild(value bool) *PruneOptions {
|
||||
+ o.Build = &value
|
||||
+ return o
|
||||
+}
|
||||
+
|
||||
+// GetBuild returns value of field Build
|
||||
+func (o *PruneOptions) GetBuild() bool {
|
||||
+ if o.Build == nil {
|
||||
+ var z bool
|
||||
+ return z
|
||||
+ }
|
||||
+ return *o.Build
|
||||
+}
|
||||
diff --git a/pkg/domain/entities/system.go b/pkg/domain/entities/system.go
|
||||
index 473db3530..f6938652a 100644
|
||||
--- a/pkg/domain/entities/system.go
|
||||
+++ b/pkg/domain/entities/system.go
|
||||
@@ -22,6 +22,7 @@ type SystemPruneOptions struct {
|
||||
Volume bool
|
||||
Filters map[string][]string `json:"filters" schema:"filters"`
|
||||
External bool
|
||||
+ Build bool
|
||||
}
|
||||
|
||||
// SystemPruneReport provides report after system prune is executed.
|
||||
diff --git a/pkg/domain/infra/abi/system.go b/pkg/domain/infra/abi/system.go
|
||||
index 24ee64d29..ea3e5f203 100644
|
||||
--- a/pkg/domain/infra/abi/system.go
|
||||
+++ b/pkg/domain/infra/abi/system.go
|
||||
@@ -150,16 +150,16 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool)
|
||||
return nil
|
||||
}
|
||||
|
||||
-// SystemPrune removes unused data from the system. Pruning pods, containers, networks, volumes and images.
|
||||
+// SystemPrune removes unused data from the system. Pruning pods, containers, build container, networks, volumes and images.
|
||||
func (ic *ContainerEngine) SystemPrune(ctx context.Context, options entities.SystemPruneOptions) (*entities.SystemPruneReport, error) {
|
||||
var systemPruneReport = new(entities.SystemPruneReport)
|
||||
|
||||
if options.External {
|
||||
- if options.All || options.Volume || len(options.Filters) > 0 {
|
||||
+ if options.All || options.Volume || len(options.Filters) > 0 || options.Build {
|
||||
return nil, fmt.Errorf("system prune --external cannot be combined with other options")
|
||||
}
|
||||
- err := ic.Libpod.GarbageCollect()
|
||||
- if err != nil {
|
||||
+
|
||||
+ if err := ic.Libpod.GarbageCollect(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return systemPruneReport, nil
|
||||
@@ -170,6 +170,17 @@ func (ic *ContainerEngine) SystemPrune(ctx context.Context, options entities.Sys
|
||||
filters = append(filters, fmt.Sprintf("%s=%s", k, v[0]))
|
||||
}
|
||||
reclaimedSpace := (uint64)(0)
|
||||
+
|
||||
+ // Prune Build Containers
|
||||
+ if options.Build {
|
||||
+ stageContainersPruneReports, err := ic.Libpod.PruneBuildContainers()
|
||||
+ if err != nil {
|
||||
+ return nil, err
|
||||
+ }
|
||||
+ reclaimedSpace += reports.PruneReportsSize(stageContainersPruneReports)
|
||||
+ systemPruneReport.ContainerPruneReports = append(systemPruneReport.ContainerPruneReports, stageContainersPruneReports...)
|
||||
+ }
|
||||
+
|
||||
found := true
|
||||
for found {
|
||||
found = false
|
||||
diff --git a/pkg/domain/infra/tunnel/system.go b/pkg/domain/infra/tunnel/system.go
|
||||
index fc82e7b2b..142a9fa5c 100644
|
||||
--- a/pkg/domain/infra/tunnel/system.go
|
||||
+++ b/pkg/domain/infra/tunnel/system.go
|
||||
@@ -19,7 +19,7 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool)
|
||||
|
||||
// SystemPrune prunes unused data from the system.
|
||||
func (ic *ContainerEngine) SystemPrune(ctx context.Context, opts entities.SystemPruneOptions) (*entities.SystemPruneReport, error) {
|
||||
- options := new(system.PruneOptions).WithAll(opts.All).WithVolumes(opts.Volume).WithFilters(opts.Filters).WithExternal(opts.External)
|
||||
+ options := new(system.PruneOptions).WithAll(opts.All).WithVolumes(opts.Volume).WithFilters(opts.Filters).WithExternal(opts.External).WithBuild(opts.Build)
|
||||
return system.Prune(ic.ClientCtx, options)
|
||||
}
|
||||
|
||||
diff --git a/test/e2e/prune_test.go b/test/e2e/prune_test.go
|
||||
index 01e848478..57bd5582d 100644
|
||||
--- a/test/e2e/prune_test.go
|
||||
+++ b/test/e2e/prune_test.go
|
||||
@@ -4,6 +4,8 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
+ "syscall"
|
||||
+ "time"
|
||||
|
||||
. "github.com/containers/podman/v4/test/utils"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
@@ -22,6 +24,11 @@ FROM scratch
|
||||
ENV test1=test1
|
||||
ENV test2=test2`
|
||||
|
||||
+var longBuildImage = fmt.Sprintf(`
|
||||
+FROM %s
|
||||
+RUN echo "Hello, World!"
|
||||
+RUN RUN echo "Please use signal 9 this will never ends" && sleep 10000s`, ALPINE)
|
||||
+
|
||||
var _ = Describe("Podman prune", func() {
|
||||
|
||||
It("podman container prune containers", func() {
|
||||
@@ -593,4 +600,63 @@ var _ = Describe("Podman prune", func() {
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(dirents).To(HaveLen(3))
|
||||
})
|
||||
+
|
||||
+ It("podman system prune --build clean up after terminated build", func() {
|
||||
+ useCustomNetworkDir(podmanTest, tempdir)
|
||||
+
|
||||
+ podmanTest.BuildImage(pruneImage, "alpine_notleaker:latest", "false")
|
||||
+
|
||||
+ create := podmanTest.Podman([]string{"create", "--name", "test", BB, "sleep", "10000"})
|
||||
+ create.WaitWithDefaultTimeout()
|
||||
+ Expect(create).Should(ExitCleanly())
|
||||
+
|
||||
+ containerFilePath := filepath.Join(podmanTest.TempDir, "ContainerFile-podman-leaker")
|
||||
+ err := os.WriteFile(containerFilePath, []byte(longBuildImage), 0755)
|
||||
+ Expect(err).ToNot(HaveOccurred())
|
||||
+
|
||||
+ build := podmanTest.Podman([]string{"build", "-f", containerFilePath, "-t", "podmanleaker"})
|
||||
+ // Build will never finish so let's wait for build to ask for SIGKILL to simulate a failed build that leaves stage containers.
|
||||
+ matchedOutput := false
|
||||
+ for range 900 {
|
||||
+ if build.LineInOutputContains("Please use signal 9") {
|
||||
+ matchedOutput = true
|
||||
+ build.Signal(syscall.SIGKILL)
|
||||
+ break
|
||||
+ }
|
||||
+ time.Sleep(100 * time.Millisecond)
|
||||
+ }
|
||||
+ if !matchedOutput {
|
||||
+ Fail("Did not match special string in podman build")
|
||||
+ }
|
||||
+
|
||||
+ // Check Intermediate image of stage container
|
||||
+ none := podmanTest.Podman([]string{"images", "-a"})
|
||||
+ none.WaitWithDefaultTimeout()
|
||||
+ Expect(none).Should(ExitCleanly())
|
||||
+ Expect(none.OutputToString()).Should(ContainSubstring("none"))
|
||||
+
|
||||
+ // Check if Container and Stage Container exist
|
||||
+ count := podmanTest.Podman([]string{"ps", "-aq", "--external"})
|
||||
+ count.WaitWithDefaultTimeout()
|
||||
+ Expect(count).Should(ExitCleanly())
|
||||
+ Expect(count.OutputToStringArray()).To(HaveLen(3))
|
||||
+
|
||||
+ prune := podmanTest.Podman([]string{"system", "prune", "--build", "-f"})
|
||||
+ prune.WaitWithDefaultTimeout()
|
||||
+ Expect(prune).Should(ExitCleanly())
|
||||
+
|
||||
+ // Container should still exist, but no stage containers
|
||||
+ count = podmanTest.Podman([]string{"ps", "-aq", "--external"})
|
||||
+ count.WaitWithDefaultTimeout()
|
||||
+ Expect(count).Should(ExitCleanly())
|
||||
+ Expect(count.OutputToString()).To(BeEmpty())
|
||||
+
|
||||
+ Expect(podmanTest.NumberOfContainers()).To(Equal(0))
|
||||
+
|
||||
+ after := podmanTest.Podman([]string{"images", "-a"})
|
||||
+ after.WaitWithDefaultTimeout()
|
||||
+ Expect(after).Should(ExitCleanly())
|
||||
+ Expect(after.OutputToString()).ShouldNot(ContainSubstring("none"))
|
||||
+ Expect(after.OutputToString()).Should(ContainSubstring("notleaker"))
|
||||
+ })
|
||||
})
|
||||
|
||||
@ -1,21 +0,0 @@
|
||||
diff --git a/pkg/rootless/rootless_linux.c b/pkg/rootless/rootless_linux.c
|
||||
index 4f71d49e5c..3d74af6a6c 100644
|
||||
--- a/pkg/rootless/rootless_linux.c
|
||||
+++ b/pkg/rootless/rootless_linux.c
|
||||
@@ -658,7 +658,7 @@ create_pause_process (const char *pause_pid_file_path, char **argv)
|
||||
if (pipe (p) < 0)
|
||||
return -1;
|
||||
|
||||
- pid = fork ();
|
||||
+ pid = syscall_clone (SIGCHLD, NULL);
|
||||
if (pid < 0)
|
||||
{
|
||||
close (p[0]);
|
||||
@@ -689,7 +689,7 @@ create_pause_process (const char *pause_pid_file_path, char **argv)
|
||||
close (p[0]);
|
||||
|
||||
setsid ();
|
||||
- pid = fork ();
|
||||
+ pid = syscall_clone (SIGCHLD, NULL);
|
||||
if (pid < 0)
|
||||
_exit (EXIT_FAILURE);
|
||||
97
.github/scripts/s390x-ci/tests_list.py
vendored
97
.github/scripts/s390x-ci/tests_list.py
vendored
@ -1,97 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
|
||||
sys.path.insert(1, os.path.join(sys.path[0], "..", "..", ".."))
|
||||
|
||||
from tools.testing.discover_tests import TESTS
|
||||
|
||||
|
||||
skip_list = [
|
||||
# these tests fail due to various reasons
|
||||
"dynamo/test_misc",
|
||||
"inductor/test_aot_inductor",
|
||||
"inductor/test_cpu_repro",
|
||||
"inductor/test_cpu_select_algorithm",
|
||||
"inductor/test_aot_inductor_arrayref",
|
||||
"inductor/test_torchinductor_codegen_dynamic_shapes",
|
||||
"lazy/test_meta_kernel",
|
||||
"onnx/test_utility_funs",
|
||||
"profiler/test_profiler",
|
||||
"test_ao_sparsity",
|
||||
"test_cpp_extensions_open_device_registration",
|
||||
"test_jit",
|
||||
"test_metal",
|
||||
"test_mps",
|
||||
"dynamo/test_torchrec",
|
||||
"inductor/test_aot_inductor_utils",
|
||||
"inductor/test_coordinate_descent_tuner",
|
||||
"test_jiterator",
|
||||
# these tests run long and fail in addition to that
|
||||
"dynamo/test_dynamic_shapes",
|
||||
"test_quantization",
|
||||
"inductor/test_torchinductor",
|
||||
"inductor/test_torchinductor_dynamic_shapes",
|
||||
"inductor/test_torchinductor_opinfo",
|
||||
"test_binary_ufuncs",
|
||||
"test_unary_ufuncs",
|
||||
# these tests fail when cuda is not available
|
||||
"inductor/test_cudacodecache",
|
||||
"inductor/test_inductor_utils",
|
||||
"inductor/test_inplacing_pass",
|
||||
"inductor/test_kernel_benchmark",
|
||||
"inductor/test_max_autotune",
|
||||
"inductor/test_move_constructors_to_cuda",
|
||||
"inductor/test_multi_kernel",
|
||||
"inductor/test_pattern_matcher",
|
||||
"inductor/test_perf",
|
||||
"inductor/test_select_algorithm",
|
||||
"inductor/test_snode_runtime",
|
||||
"inductor/test_triton_wrapper",
|
||||
# these tests fail when mkldnn is not available
|
||||
"inductor/test_custom_post_grad_passes",
|
||||
"inductor/test_mkldnn_pattern_matcher",
|
||||
# lacks quantization support
|
||||
"onnx/test_models_quantized_onnxruntime",
|
||||
"onnx/test_pytorch_onnx_onnxruntime",
|
||||
# https://github.com/pytorch/pytorch/issues/102078
|
||||
"test_decomp",
|
||||
# https://github.com/pytorch/pytorch/issues/146698
|
||||
"test_model_exports_to_core_aten",
|
||||
# runs very long, skip for now
|
||||
"inductor/test_layout_optim",
|
||||
"test_fx",
|
||||
# some false errors
|
||||
"doctests",
|
||||
]
|
||||
|
||||
skip_list_regex = [
|
||||
# distributed tests fail randomly
|
||||
"distributed/.*",
|
||||
]
|
||||
|
||||
all_testfiles = sorted(TESTS)
|
||||
|
||||
filtered_testfiles = []
|
||||
|
||||
for filename in all_testfiles:
|
||||
if filename in skip_list:
|
||||
continue
|
||||
|
||||
regex_filtered = False
|
||||
|
||||
for regex_string in skip_list_regex:
|
||||
if re.fullmatch(regex_string, filename):
|
||||
regex_filtered = True
|
||||
break
|
||||
|
||||
if regex_filtered:
|
||||
continue
|
||||
|
||||
filtered_testfiles.append(filename)
|
||||
|
||||
for filename in filtered_testfiles:
|
||||
print(' "' + filename + '",')
|
||||
74
.github/scripts/trymerge.py
vendored
74
.github/scripts/trymerge.py
vendored
@ -485,7 +485,7 @@ def get_check_run_name_prefix(workflow_run: Any) -> str:
|
||||
if workflow_run is None:
|
||||
return ""
|
||||
else:
|
||||
return f"{workflow_run['workflow']['name']} / "
|
||||
return f'{workflow_run["workflow"]["name"]} / '
|
||||
|
||||
|
||||
def is_passing_status(status: Optional[str]) -> bool:
|
||||
@ -545,7 +545,7 @@ def add_workflow_conclusions(
|
||||
if not isinstance(checkrun_node, dict):
|
||||
warn(f"Expected dictionary, but got {type(checkrun_node)}")
|
||||
continue
|
||||
checkrun_name = f"{get_check_run_name_prefix(workflow_run)}{checkrun_node['name']}"
|
||||
checkrun_name = f'{get_check_run_name_prefix(workflow_run)}{checkrun_node["name"]}'
|
||||
existing_checkrun = workflow_obj.jobs.get(checkrun_name)
|
||||
if existing_checkrun is None or not is_passing_status(
|
||||
existing_checkrun.status
|
||||
@ -819,9 +819,10 @@ class GitHubPR:
|
||||
cursor=info["reviews"]["pageInfo"]["startCursor"],
|
||||
)
|
||||
info = rc["data"]["repository"]["pullRequest"]
|
||||
reviews = {
|
||||
author: state for author, state in self._reviews if state != "COMMENTED"
|
||||
}
|
||||
reviews = {}
|
||||
for author, state in self._reviews:
|
||||
if state != "COMMENTED":
|
||||
reviews[author] = state
|
||||
return list(reviews.items())
|
||||
|
||||
def get_approved_by(self) -> list[str]:
|
||||
@ -1223,17 +1224,9 @@ class GitHubPR:
|
||||
if not self.is_ghstack_pr():
|
||||
msg = self.gen_commit_message()
|
||||
pr_branch_name = f"__pull-request-{self.pr_num}__init__"
|
||||
repo.fetch(self.last_commit()["oid"], pr_branch_name)
|
||||
repo.fetch(f"pull/{self.pr_num}/head", pr_branch_name)
|
||||
repo._run_git("merge", "--squash", pr_branch_name)
|
||||
repo._run_git("commit", f'--author="{self.get_author()}"', "-m", msg)
|
||||
|
||||
# Did the PR change since we started the merge?
|
||||
pulled_sha = repo.show_ref(pr_branch_name)
|
||||
latest_pr_status = GitHubPR(self.org, self.project, self.pr_num)
|
||||
if pulled_sha != latest_pr_status.last_commit()["oid"]:
|
||||
raise RuntimeError(
|
||||
"PR has been updated since CI checks last passed. Please rerun the merge command."
|
||||
)
|
||||
return []
|
||||
else:
|
||||
return self.merge_ghstack_into(
|
||||
@ -1514,36 +1507,6 @@ def checks_to_markdown_bullets(
|
||||
]
|
||||
|
||||
|
||||
def post_starting_merge_comment(
|
||||
repo: GitRepo,
|
||||
pr: GitHubPR,
|
||||
explainer: TryMergeExplainer,
|
||||
dry_run: bool,
|
||||
ignore_current_checks_info: Optional[
|
||||
list[tuple[str, Optional[str], Optional[int]]]
|
||||
] = None,
|
||||
) -> None:
|
||||
"""Post the initial merge starting message on the PR. Also post a short
|
||||
message on all PRs in the stack."""
|
||||
gh_post_pr_comment(
|
||||
pr.org,
|
||||
pr.project,
|
||||
pr.pr_num,
|
||||
explainer.get_merge_message(ignore_current_checks_info),
|
||||
dry_run=dry_run,
|
||||
)
|
||||
if pr.is_ghstack_pr():
|
||||
for additional_prs, _ in get_ghstack_prs(repo, pr):
|
||||
if additional_prs.pr_num != pr.pr_num:
|
||||
gh_post_pr_comment(
|
||||
additional_prs.org,
|
||||
additional_prs.project,
|
||||
additional_prs.pr_num,
|
||||
f"Starting merge as part of PR stack under #{pr.pr_num}",
|
||||
dry_run=dry_run,
|
||||
)
|
||||
|
||||
|
||||
def manually_close_merged_pr(
|
||||
pr: GitHubPR,
|
||||
additional_merged_prs: list[GitHubPR],
|
||||
@ -2167,7 +2130,13 @@ def merge(
|
||||
check_for_sev(pr.org, pr.project, skip_mandatory_checks)
|
||||
|
||||
if skip_mandatory_checks:
|
||||
post_starting_merge_comment(repo, pr, explainer, dry_run)
|
||||
gh_post_pr_comment(
|
||||
pr.org,
|
||||
pr.project,
|
||||
pr.pr_num,
|
||||
explainer.get_merge_message(),
|
||||
dry_run=dry_run,
|
||||
)
|
||||
return pr.merge_into(
|
||||
repo,
|
||||
dry_run=dry_run,
|
||||
@ -2190,12 +2159,12 @@ def merge(
|
||||
)
|
||||
ignore_current_checks_info = failing
|
||||
|
||||
post_starting_merge_comment(
|
||||
repo,
|
||||
pr,
|
||||
explainer,
|
||||
dry_run,
|
||||
ignore_current_checks_info=ignore_current_checks_info,
|
||||
gh_post_pr_comment(
|
||||
pr.org,
|
||||
pr.project,
|
||||
pr.pr_num,
|
||||
explainer.get_merge_message(ignore_current_checks_info),
|
||||
dry_run=dry_run,
|
||||
)
|
||||
|
||||
start_time = time.time()
|
||||
@ -2281,8 +2250,7 @@ def merge(
|
||||
except MandatoryChecksMissingError as ex:
|
||||
last_exception = str(ex)
|
||||
print(
|
||||
f"Merge of https://github.com/{pr.org}/{pr.project}/pull/{pr.pr_num} failed due to: {ex}. Retrying in 5 min",
|
||||
flush=True,
|
||||
f"Merge of https://github.com/{pr.org}/{pr.project}/pull/{pr.pr_num} failed due to: {ex}. Retrying in 5 min"
|
||||
)
|
||||
time.sleep(5 * 60)
|
||||
# Finally report timeout back
|
||||
|
||||
2
.github/scripts/trymerge_explainer.py
vendored
2
.github/scripts/trymerge_explainer.py
vendored
@ -79,7 +79,7 @@ class TryMergeExplainer:
|
||||
(
|
||||
"<details><summary>Advanced Debugging</summary>",
|
||||
"Check the merge workflow status ",
|
||||
f'<a href="{os.getenv("GH_RUN_URL")}">here</a>',
|
||||
f"<a href=\"{os.getenv('GH_RUN_URL')}\">here</a>",
|
||||
"</details>",
|
||||
)
|
||||
)
|
||||
|
||||
17
.github/scripts/windows/build_triton.bat
vendored
17
.github/scripts/windows/build_triton.bat
vendored
@ -1,17 +0,0 @@
|
||||
@echo on
|
||||
|
||||
set PYTHON_PREFIX=%PY_VERS:.=%
|
||||
set PYTHON_PREFIX=py%PYTHON_PREFIX:;=;py%
|
||||
call .ci/pytorch/win-test-helpers/installation-helpers/activate_miniconda3.bat
|
||||
:: Create a new conda environment
|
||||
if "%PY_VERS%" == "3.13t" (
|
||||
call conda create -n %PYTHON_PREFIX% -y -c=conda-forge python-freethreading python=3.13
|
||||
) else (
|
||||
call conda create -n %PYTHON_PREFIX% -y -c=conda-forge python=%PY_VERS%
|
||||
)
|
||||
call conda run -n %PYTHON_PREFIX% pip install wheel pybind11 certifi cython cmake setuptools==72.1.0 ninja
|
||||
|
||||
dir "%VC_INSTALL_PATH%"
|
||||
|
||||
call "%VC_INSTALL_PATH%\VC\Auxiliary\Build\vcvarsall.bat" x64
|
||||
call conda run -n %PYTHON_PREFIX% python .github/scripts/build_triton_wheel.py --device=%BUILD_DEVICE% %RELEASE%
|
||||
35
.github/scripts/windows/install_vs2022.ps1
vendored
35
.github/scripts/windows/install_vs2022.ps1
vendored
@ -1,35 +0,0 @@
|
||||
#Requires -RunAsAdministrator
|
||||
|
||||
# Enable long paths on Windows
|
||||
Set-ItemProperty -Path "HKLM:\\SYSTEM\CurrentControlSet\Control\FileSystem" -Name "LongPathsEnabled" -Value 1
|
||||
|
||||
$VC_VERSION_major = [int] ${env:VC_VERSION}.split(".")[0]
|
||||
$VC_DOWNLOAD_LINK = "https://aka.ms/vs/$VC_VERSION_major/release/vs_BuildTools.exe"
|
||||
$VC_INSTALL_ARGS = @("--nocache","--quiet","--norestart","--wait", "--add Microsoft.VisualStudio.Workload.VCTools",
|
||||
"--add Microsoft.Component.MSBuild",
|
||||
"--add Microsoft.VisualStudio.Component.Roslyn.Compiler",
|
||||
"--add Microsoft.VisualStudio.Component.TextTemplating",
|
||||
"--add Microsoft.VisualStudio.Component.VC.CoreBuildTools",
|
||||
"--add Microsoft.VisualStudio.Component.VC.CoreIde",
|
||||
"--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest",
|
||||
"--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core",
|
||||
"--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64",
|
||||
"--add Microsoft.VisualStudio.Component.Windows11SDK.22621")
|
||||
|
||||
|
||||
echo "Downloading Visual Studio installer from $VC_DOWNLOAD_LINK."
|
||||
curl.exe --retry 3 -kL $VC_DOWNLOAD_LINK --output vs_installer.exe
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
echo "Download of the VS ${env:VC_YEAR} Version ${env:VC_VERSION} installer failed"
|
||||
exit 1
|
||||
}
|
||||
$InstallationPath = ${env:VC_INSTALL_PATH}
|
||||
$VC_INSTALL_ARGS = "--installPath `"$InstallationPath`"" + " " + $VC_INSTALL_ARGS
|
||||
echo "Installing Visual Studio version ${env:VC_VERSION} in $InstallationPath."
|
||||
$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VC_INSTALL_ARGS -NoNewWindow -Wait -PassThru
|
||||
Remove-Item -Path vs_installer.exe -Force
|
||||
$exitCode = $process.ExitCode
|
||||
if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
|
||||
echo "VS ${env:VC_YEAR} installer exited with code $exitCode, which should be one of [0, 3010]."
|
||||
exit 1
|
||||
}
|
||||
1
.github/templates/common.yml.j2
vendored
1
.github/templates/common.yml.j2
vendored
@ -4,7 +4,6 @@
|
||||
{%- set download_artifact_action = "actions/download-artifact@v4.1.7" -%}
|
||||
|
||||
{%- set timeout_minutes = 240 -%}
|
||||
{%- set timeout_minutes_windows_binary = 300 -%}
|
||||
|
||||
{%- macro concurrency(build_environment) -%}
|
||||
concurrency:
|
||||
|
||||
@ -111,10 +111,7 @@ jobs:
|
||||
ALPINE_IMAGE: "docker.io/s390x/alpine"
|
||||
{%- elif config["gpu_arch_type"] == "rocm" %}
|
||||
runs_on: linux.rocm.gpu
|
||||
{%- elif config["gpu_arch_type"] == "cuda" and config["gpu_arch_version"] == "12.8" %}
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8 build needs sm_70+ runner
|
||||
{%- elif config["gpu_arch_type"] == "cuda" and config["gpu_arch_version"] != "12.8"%}
|
||||
{%- elif config["gpu_arch_type"] == "cuda" %}
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
runs_on: linux.4xlarge.nvidia.gpu
|
||||
{%- else %}
|
||||
|
||||
6
.github/templates/upload.yml.j2
vendored
6
.github/templates/upload.yml.j2
vendored
@ -25,6 +25,9 @@
|
||||
DOCKER_IMAGE: !{{ config["container_image"] }}
|
||||
{%- endif %}
|
||||
{%- if config["package_type"] == "manywheel" %}
|
||||
{%- if config["devtoolset"] %}
|
||||
DESIRED_DEVTOOLSET: !{{ config["devtoolset"] }}
|
||||
{%- endif %}
|
||||
{%- if config.use_split_build is defined %}
|
||||
use_split_build: !{{ config["use_split_build"] }}
|
||||
{%- endif %}
|
||||
@ -34,6 +37,9 @@
|
||||
LIBTORCH_CONFIG: !{{ config["libtorch_config"] }}
|
||||
{%- endif %}
|
||||
LIBTORCH_VARIANT: !{{ config["libtorch_variant"] }}
|
||||
{%- if config["devtoolset"] %}
|
||||
DESIRED_DEVTOOLSET: !{{ config["devtoolset"] }}
|
||||
{%- endif %}
|
||||
{%- if is_windows %}
|
||||
# This is a dummy value for libtorch to work correctly with our batch scripts
|
||||
# without this value pip does not get installed for some reason
|
||||
|
||||
@ -1,197 +0,0 @@
|
||||
{% import 'common.yml.j2' as common %}
|
||||
{% import 'upload.yml.j2' as upload %}
|
||||
|
||||
{%- block name -%}
|
||||
# Template is at: .github/templates/windows_arm64_binary_build_workflow.yml.j2
|
||||
# Generation script: .github/scripts/generate_ci_workflows.py
|
||||
name: !{{ build_environment }}
|
||||
{%- endblock %}
|
||||
|
||||
{%- macro set_runner_specific_vars() -%}
|
||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||
# runner.temp variable, which we need.
|
||||
- name: Populate binary env
|
||||
shell: cmd
|
||||
run: |
|
||||
echo BINARY_ENV_FILE=%RUNNER_TEMP%/env>> %GITHUB_ENV%
|
||||
echo PYTORCH_FINAL_PACKAGE_DIR=%RUNNER_TEMP%/artifacts>> %GITHUB_ENV%
|
||||
echo WIN_PACKAGE_WORK_DIR=%RUNNER_TEMP%>> %GITHUB_ENV%
|
||||
{%- endmacro %}
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- !{{ branches }}
|
||||
{%- if branches == "nightly" %}
|
||||
tags:
|
||||
# NOTE: Binary build pipelines should only get triggered on release candidate builds
|
||||
# Release candidate tags look like: v1.11.0-rc1
|
||||
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
|
||||
{%- endif %}
|
||||
{%- for label in ciflow_config.labels | sort %}
|
||||
{%- if loop.first and branches != "nightly" %}
|
||||
tags:
|
||||
{%- endif %}
|
||||
- '!{{ label }}/*'
|
||||
{%- endfor %}
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
BUILD_ENVIRONMENT: !{{ build_environment }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
SKIP_ALL_TESTS: 1
|
||||
PYTORCH_ROOT: /pytorch
|
||||
DOWNLOADS_DIR: c:\temp\downloads
|
||||
DEPENDENCIES_DIR: c:\temp\dependencies
|
||||
ENABLE_APL: 1
|
||||
ENABLE_OPENBLAS: 0
|
||||
MSVC_VERSION : 14.42
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
|
||||
jobs:
|
||||
get-label-type:
|
||||
if: github.repository_owner == 'pytorch'
|
||||
name: get-label-type
|
||||
uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
|
||||
with:
|
||||
triggering_actor: ${{ github.triggering_actor }}
|
||||
issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
|
||||
curr_branch: ${{ github.head_ref || github.ref_name }}
|
||||
curr_ref_type: ${{ github.ref_type }}
|
||||
|
||||
{%- for config in build_configs %}
|
||||
!{{ config["build_name"] }}-build:
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs: get-label-type
|
||||
runs-on: "windows-11-arm64"
|
||||
timeout-minutes: !{{ common.timeout_minutes }}
|
||||
!{{ upload.binary_env(config, True) }}
|
||||
{%- if config.pytorch_extra_install_requirements is defined and config.pytorch_extra_install_requirements|d('')|length > 0 %}
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: !{{ config.pytorch_extra_install_requirements }}
|
||||
{%- endif %}
|
||||
steps:
|
||||
!{{ set_runner_specific_vars() }}
|
||||
- name: Bootstrap folders
|
||||
shell: cmd
|
||||
run: |
|
||||
mkdir "%NIGHTLIES_PYTORCH_ROOT%"
|
||||
mkdir "%PYTORCH_FINAL_PACKAGE_DIR%"
|
||||
- name: Git checkout PyTorch
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
path: "pytorch"
|
||||
- name: Bootstrap Build Tools
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_buildtools.bat"
|
||||
- name: Bootstrap Git
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_git.bat"
|
||||
- name: Remove Pytorch folder
|
||||
shell: cmd
|
||||
run: |
|
||||
rmdir /s /q "pytorch"
|
||||
- name: Git checkout PyTorch - recursive
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
path: "pytorch"
|
||||
submodules: recursive
|
||||
- name: Bootstrap Python
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_python.bat"
|
||||
- name: Bootstrap APL
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_apl.bat"
|
||||
- name: Bootstrap Rust
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat"
|
||||
- name: Bootstrap sccache
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_sccache.bat"
|
||||
- name: Bootstrap Libuv
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_libuv.bat"
|
||||
- name: Populate binary env
|
||||
shell: bash
|
||||
run: |
|
||||
"pytorch/.circleci/scripts/binary_populate_env.sh"
|
||||
- name: Build PyTorch binary
|
||||
shell: bash
|
||||
run: |
|
||||
"pytorch/.circleci/scripts/binary_windows_arm64_build.sh"
|
||||
- uses: !{{ common.upload_artifact_action }}
|
||||
if: always()
|
||||
with:
|
||||
name: !{{ config["build_name"] }}
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||
!{{ config["build_name"] }}-test: # Testing
|
||||
if: ${{ github.repository_owner == 'pytorch' }}
|
||||
needs:
|
||||
- !{{ config["build_name"] }}-build
|
||||
- get-label-type
|
||||
runs-on: "windows-11-arm64"
|
||||
timeout-minutes: !{{ common.timeout_minutes }}
|
||||
!{{ upload.binary_env(config, True) }}
|
||||
steps:
|
||||
!{{ set_runner_specific_vars() }}
|
||||
- uses: !{{ common.download_artifact_action }}
|
||||
name: Download Build Artifacts
|
||||
with:
|
||||
name: !{{ config["build_name"] }}
|
||||
path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
|
||||
- name: Git checkout PyTorch
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
path: "pytorch"
|
||||
- name: Bootstrap Git
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_git.bat"
|
||||
- name: Remove Pytorch folder
|
||||
shell: cmd
|
||||
run: |
|
||||
rmdir /s /q "pytorch"
|
||||
- name: Git checkout PyTorch
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
path: "pytorch"
|
||||
submodules: recursive
|
||||
- name: Bootstrap APL
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_apl.bat"
|
||||
- name: Bootstrap Python
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_python.bat"
|
||||
- name: Bootstrap Build Tools
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_buildtools.bat"
|
||||
- name: Bootstrap Rust
|
||||
shell: cmd
|
||||
run: |
|
||||
"pytorch/.ci/pytorch/windows/arm64/bootstrap_rust.bat"
|
||||
- name: Populate binary env
|
||||
shell: bash
|
||||
run: |
|
||||
"pytorch/.circleci/scripts/binary_populate_env.sh"
|
||||
- name: Test PyTorch binary
|
||||
shell: bash
|
||||
run: |
|
||||
"pytorch/.circleci/scripts/binary_windows_arm64_test.sh"
|
||||
{%- if branches == "nightly" %}
|
||||
!{{ upload.upload_binaries(config, True) }}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
@ -71,7 +71,7 @@ jobs:
|
||||
{%- else %}
|
||||
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
|
||||
{%- endif %}
|
||||
timeout-minutes: !{{ common.timeout_minutes_windows_binary }}
|
||||
timeout-minutes: !{{ common.timeout_minutes }}
|
||||
!{{ upload.binary_env(config, True) }}
|
||||
{%- if config.pytorch_extra_install_requirements is defined and config.pytorch_extra_install_requirements|d('')|length > 0 %}
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: !{{ config.pytorch_extra_install_requirements }}
|
||||
@ -107,14 +107,10 @@ jobs:
|
||||
{%- else %}
|
||||
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge.nonephemeral"
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{%- if branches == "nightly" %}
|
||||
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge"
|
||||
{%- else %}
|
||||
runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
timeout-minutes: !{{ common.timeout_minutes_windows_binary }}
|
||||
timeout-minutes: !{{ common.timeout_minutes }}
|
||||
!{{ upload.binary_env(config, True) }}
|
||||
steps:
|
||||
!{{ common.setup_ec2_windows() }}
|
||||
|
||||
9
.github/workflows/_binary-build-linux.yml
vendored
9
.github/workflows/_binary-build-linux.yml
vendored
@ -18,7 +18,7 @@ on:
|
||||
description: prefix for runner label
|
||||
runs_on:
|
||||
required: false
|
||||
default: linux.12xlarge.memory.ephemeral
|
||||
default: linux.12xlarge.ephemeral
|
||||
type: string
|
||||
description: Hardware to run this "build" job on, linux.12xlarge or linux.arm64.2xlarge.
|
||||
timeout-minutes:
|
||||
@ -70,6 +70,10 @@ on:
|
||||
required: false
|
||||
type: string
|
||||
description: Desired libtorch variant (for libtorch builds only)
|
||||
DESIRED_DEVTOOLSET:
|
||||
required: false
|
||||
type: string
|
||||
description: Desired dev toolset
|
||||
DESIRED_PYTHON:
|
||||
required: false
|
||||
type: string
|
||||
@ -100,6 +104,7 @@ jobs:
|
||||
SKIP_ALL_TESTS: 1
|
||||
LIBTORCH_CONFIG: ${{ inputs.LIBTORCH_CONFIG }}
|
||||
LIBTORCH_VARIANT: ${{ inputs.LIBTORCH_VARIANT }}
|
||||
DESIRED_DEVTOOLSET: ${{ inputs.DESIRED_DEVTOOLSET }}
|
||||
DESIRED_PYTHON: ${{ inputs.DESIRED_PYTHON }}
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: ${{ inputs.PYTORCH_EXTRA_INSTALL_REQUIREMENTS }}
|
||||
ALPINE_IMAGE: ${{ inputs.ALPINE_IMAGE }}
|
||||
@ -125,6 +130,7 @@ jobs:
|
||||
echo "SKIP_ALL_TESTS=${{ env.SKIP_ALL_TESTS }}"
|
||||
echo "LIBTORCH_CONFIG=${{ env.LIBTORCH_CONFIG }}"
|
||||
echo "LIBTORCH_VARIANT=${{ env.LIBTORCH_VARIANT }}"
|
||||
echo "DESIRED_DEVTOOLSET=${{ env.DESIRED_DEVTOOLSET }}"
|
||||
echo "DESIRED_PYTHON=${{ env.DESIRED_PYTHON }}"
|
||||
echo "PYTORCH_EXTRA_INSTALL_REQUIREMENTS=${{ env.PYTORCH_EXTRA_INSTALL_REQUIREMENTS }}"
|
||||
echo "ALPINE_IMAGE=${{ env.ALPINE_IMAGE }}"
|
||||
@ -218,6 +224,7 @@ jobs:
|
||||
-e BINARY_ENV_FILE \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e DESIRED_CUDA \
|
||||
-e DESIRED_DEVTOOLSET \
|
||||
-e DESIRED_PYTHON \
|
||||
-e GITHUB_ACTIONS \
|
||||
-e GPU_ARCH_TYPE \
|
||||
|
||||
6
.github/workflows/_binary-test-linux.yml
vendored
6
.github/workflows/_binary-test-linux.yml
vendored
@ -47,6 +47,10 @@ on:
|
||||
required: false
|
||||
type: string
|
||||
description: Desired libtorch variant (for libtorch builds only)
|
||||
DESIRED_DEVTOOLSET:
|
||||
required: false
|
||||
type: string
|
||||
description: Desired dev toolset
|
||||
DESIRED_PYTHON:
|
||||
required: false
|
||||
type: string
|
||||
@ -88,6 +92,7 @@ jobs:
|
||||
SKIP_ALL_TESTS: 1
|
||||
LIBTORCH_CONFIG: ${{ inputs.LIBTORCH_CONFIG }}
|
||||
LIBTORCH_VARIANT: ${{ inputs.LIBTORCH_VARIANT }}
|
||||
DESIRED_DEVTOOLSET: ${{ inputs.DESIRED_DEVTOOLSET }}
|
||||
DESIRED_PYTHON: ${{ inputs.DESIRED_PYTHON }}
|
||||
ALPINE_IMAGE: ${{ inputs.ALPINE_IMAGE }}
|
||||
AWS_DEFAULT_REGION: us-east-1
|
||||
@ -113,6 +118,7 @@ jobs:
|
||||
echo "SKIP_ALL_TESTS=${{ env.SKIP_ALL_TESTS }}"
|
||||
echo "LIBTORCH_CONFIG=${{ env.LIBTORCH_CONFIG }}"
|
||||
echo "LIBTORCH_VARIANT=${{ env.LIBTORCH_VARIANT }}"
|
||||
echo "DESIRED_DEVTOOLSET=${{ env.DESIRED_DEVTOOLSET }}"
|
||||
echo "DESIRED_PYTHON=${{ env.DESIRED_PYTHON }}"
|
||||
|
||||
echo "ALPINE_IMAGE=${{ env.ALPINE_IMAGE }}"
|
||||
|
||||
6
.github/workflows/_binary-upload.yml
vendored
6
.github/workflows/_binary-upload.yml
vendored
@ -43,6 +43,10 @@ on:
|
||||
required: false
|
||||
type: string
|
||||
description: Desired libtorch variant (for libtorch builds only)
|
||||
DESIRED_DEVTOOLSET:
|
||||
required: false
|
||||
type: string
|
||||
description: Desired dev toolset
|
||||
DESIRED_PYTHON:
|
||||
required: false
|
||||
type: string
|
||||
@ -62,6 +66,7 @@ on:
|
||||
jobs:
|
||||
upload:
|
||||
runs-on: ubuntu-22.04
|
||||
environment: ${{ (github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || startsWith(github.event.ref, 'refs/tags/v'))) && 'conda-aws-upload' || '' }}
|
||||
container:
|
||||
image: continuumio/miniconda3:4.12.0
|
||||
env:
|
||||
@ -76,6 +81,7 @@ jobs:
|
||||
SKIP_ALL_TESTS: 1
|
||||
LIBTORCH_CONFIG: ${{ inputs.LIBTORCH_CONFIG }}
|
||||
LIBTORCH_VARIANT: ${{ inputs.LIBTORCH_VARIANT }}
|
||||
DESIRED_DEVTOOLSET: ${{ inputs.DESIRED_DEVTOOLSET }}
|
||||
DESIRED_PYTHON: ${{ inputs.DESIRED_PYTHON }}
|
||||
BINARY_ENV_FILE: /tmp/env
|
||||
GITHUB_TOKEN: ${{ secrets.github-token }}
|
||||
|
||||
42
.github/workflows/_linux-build.yml
vendored
42
.github/workflows/_linux-build.yml
vendored
@ -69,11 +69,13 @@ on:
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
max-jobs:
|
||||
use_split_build:
|
||||
description: |
|
||||
Overwrite the number of jobs to use for the build
|
||||
[Experimental] Build a libtorch only wheel and build pytorch such that
|
||||
are built from the libtorch wheel.
|
||||
required: false
|
||||
type: string
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
secrets:
|
||||
HUGGING_FACE_HUB_TOKEN:
|
||||
@ -208,7 +210,7 @@ jobs:
|
||||
OUR_GITHUB_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
|
||||
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
||||
SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
|
||||
MAX_JOBS_OVERRIDE: ${{ inputs.max-jobs }}
|
||||
USE_SPLIT_BUILD: ${{ inputs.use_split_build }}
|
||||
run: |
|
||||
START_TIME=$(date +%s)
|
||||
if [[ ${BUILD_ENVIRONMENT} == *"s390x"* ]]; then
|
||||
@ -228,12 +230,6 @@ jobs:
|
||||
DOCKER_SHELL_CMD=
|
||||
fi
|
||||
|
||||
if [[ ${MAX_JOBS_OVERRIDE} == "" ]]; then
|
||||
MAX_JOBS="$(nproc --ignore=2)"
|
||||
else
|
||||
MAX_JOBS="${MAX_JOBS_OVERRIDE}"
|
||||
fi
|
||||
|
||||
# Leaving 1GB for the runner and other things
|
||||
TOTAL_AVAILABLE_MEMORY_IN_GB=$(awk '/MemTotal/ { printf "%.3f \n", $2/1024/1024 - 1 }' /proc/meminfo)
|
||||
# https://docs.docker.com/engine/containers/resource_constraints/#--memory-swap-details, the 3GB swap
|
||||
@ -245,8 +241,7 @@ jobs:
|
||||
# shellcheck disable=SC2086
|
||||
container_name=$(docker run \
|
||||
-e BUILD_ENVIRONMENT \
|
||||
-e MAX_JOBS=${MAX_JOBS} \
|
||||
-e MAX_JOBS_OVERRIDE \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e AWS_DEFAULT_REGION \
|
||||
-e PR_NUMBER \
|
||||
-e SHA1 \
|
||||
@ -287,7 +282,7 @@ jobs:
|
||||
|
||||
- name: Store PyTorch Build Artifacts on S3
|
||||
uses: seemethere/upload-artifact-s3@v5
|
||||
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.build-environment != 'linux-s390x-binary-manywheel'
|
||||
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build && inputs.build-environment != 'linux-s390x-binary-manywheel'
|
||||
with:
|
||||
name: ${{ inputs.build-environment }}
|
||||
retention-days: 14
|
||||
@ -295,15 +290,34 @@ jobs:
|
||||
path: artifacts.zip
|
||||
s3-bucket: ${{ inputs.s3-bucket }}
|
||||
|
||||
- name: Store PyTorch Build Artifacts on S3 for split build
|
||||
uses: seemethere/upload-artifact-s3@v5
|
||||
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build && inputs.build-environment != 'linux-s390x-binary-manywheel'
|
||||
with:
|
||||
name: ${{ inputs.build-environment }}-experimental-split-build
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path: artifacts.zip
|
||||
s3-bucket: ${{ inputs.s3-bucket }}
|
||||
|
||||
- name: Store PyTorch Build Artifacts for s390x
|
||||
uses: actions/upload-artifact@v4
|
||||
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.build-environment == 'linux-s390x-binary-manywheel'
|
||||
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build && inputs.build-environment == 'linux-s390x-binary-manywheel'
|
||||
with:
|
||||
name: ${{ inputs.build-environment }}
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path: artifacts.zip
|
||||
|
||||
- name: Store PyTorch Build Artifacts for s390x for split build
|
||||
uses: actions/upload-artifact@v4
|
||||
if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build && inputs.build-environment == 'linux-s390x-binary-manywheel'
|
||||
with:
|
||||
name: ${{ inputs.build-environment }}-experimental-split-build
|
||||
retention-days: 14
|
||||
if-no-files-found: error
|
||||
path: artifacts.zip
|
||||
|
||||
- name: Upload sccache stats
|
||||
if: steps.build.outcome != 'skipped' && inputs.build-environment != 'linux-s390x-binary-manywheel'
|
||||
uses: ./.github/actions/upload-sccache-stats
|
||||
|
||||
15
.github/workflows/_mac-build.yml
vendored
15
.github/workflows/_mac-build.yml
vendored
@ -33,6 +33,10 @@ on:
|
||||
default: "3.9"
|
||||
description: |
|
||||
The python version to be used. Will be 3.9 by default
|
||||
environment-file:
|
||||
required: false
|
||||
type: string
|
||||
description: Set the conda environment file used to setup macOS build.
|
||||
test-matrix:
|
||||
required: false
|
||||
type: string
|
||||
@ -82,12 +86,23 @@ jobs:
|
||||
fi
|
||||
|
||||
- name: Setup miniconda
|
||||
if: inputs.environment-file == ''
|
||||
uses: pytorch/test-infra/.github/actions/setup-miniconda@main
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}
|
||||
pip-requirements-file: .github/requirements/pip-requirements-${{ runner.os }}.txt
|
||||
|
||||
# This option is used when cross-compiling arm64 from x86-64. Specifically, we need arm64 conda
|
||||
# environment even though the arch is x86-64
|
||||
- name: Setup miniconda using the provided environment file
|
||||
if: inputs.environment-file != ''
|
||||
uses: pytorch/test-infra/.github/actions/setup-miniconda@main
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
environment-file: ${{ inputs.environment-file }}
|
||||
pip-requirements-file: .github/requirements/pip-requirements-${{ runner.os }}.txt
|
||||
|
||||
- name: Install sccache (only for non-forked PRs, and pushes to trunk)
|
||||
uses: nick-fields/retry@v3.0.0
|
||||
if: ${{ github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }}
|
||||
|
||||
5
.github/workflows/_rocm-test.yml
vendored
5
.github/workflows/_rocm-test.yml
vendored
@ -251,11 +251,6 @@ jobs:
|
||||
# copy test results back to the mounted workspace, needed sudo, resulting permissions were correct
|
||||
docker exec -t "${{ env.CONTAINER_NAME }}" sh -c "cd ../pytorch && sudo cp -R test/test-reports ../workspace/test"
|
||||
|
||||
- name: Change permissions (only needed for MI300 runners for now)
|
||||
if: ${{ always() && steps.test.conclusion && contains(matrix.runner, 'mi300') }}
|
||||
run: |
|
||||
docker exec -t "${{ env.CONTAINER_NAME }}" sh -c "sudo chown -R 1001:1001 test"
|
||||
|
||||
- name: Print remaining test logs
|
||||
shell: bash
|
||||
if: always() && steps.test.conclusion
|
||||
|
||||
@ -62,12 +62,7 @@ jobs:
|
||||
if: cancelled()
|
||||
shell: bash
|
||||
run: |
|
||||
# If podman build command is interrupted,
|
||||
# if podman build command is interrupted,
|
||||
# it can leave a couple of processes still running.
|
||||
# Order them to stop for clean shutdown.
|
||||
# It looks like sometimes some processes remain
|
||||
# after first cleanup.
|
||||
# Wait a bit and do cleanup again. It looks like it helps.
|
||||
docker system prune --build -f || true
|
||||
sleep 60
|
||||
# order them to stop for clean shutdown.
|
||||
docker system prune --build -f || true
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user