mirror of
				https://github.com/pytorch/pytorch.git
				synced 2025-11-04 16:04:58 +08:00 
			
		
		
		
	Update (base update)
[ghstack-poisoned]
This commit is contained in:
		@ -21,6 +21,3 @@
 | 
			
		||||
  cxx = /usr/bin/clang++
 | 
			
		||||
  cxxpp = /usr/bin/clang++
 | 
			
		||||
  ld = /usr/bin/clang++
 | 
			
		||||
 | 
			
		||||
[project]
 | 
			
		||||
  default_flavors_mode=all
 | 
			
		||||
 | 
			
		||||
@ -291,7 +291,7 @@ case "$image" in
 | 
			
		||||
    PROTOBUF=yes
 | 
			
		||||
    DB=yes
 | 
			
		||||
    VISION=yes
 | 
			
		||||
    ROCM_VERSION=6.0
 | 
			
		||||
    ROCM_VERSION=6.1
 | 
			
		||||
    NINJA_VERSION=1.9.0
 | 
			
		||||
    CONDA_CMAKE=yes
 | 
			
		||||
    TRITON=yes
 | 
			
		||||
@ -302,7 +302,7 @@ case "$image" in
 | 
			
		||||
    PROTOBUF=yes
 | 
			
		||||
    DB=yes
 | 
			
		||||
    VISION=yes
 | 
			
		||||
    ROCM_VERSION=6.1
 | 
			
		||||
    ROCM_VERSION=6.2
 | 
			
		||||
    NINJA_VERSION=1.9.0
 | 
			
		||||
    CONDA_CMAKE=yes
 | 
			
		||||
    TRITON=yes
 | 
			
		||||
@ -355,6 +355,12 @@ case "$image" in
 | 
			
		||||
    CONDA_CMAKE=yes
 | 
			
		||||
    VISION=yes
 | 
			
		||||
    ;;
 | 
			
		||||
  pytorch-linux-jammy-py3-clang18-asan)
 | 
			
		||||
    ANACONDA_PYTHON_VERSION=3.10
 | 
			
		||||
    CLANG_VERSION=18
 | 
			
		||||
    CONDA_CMAKE=yes
 | 
			
		||||
    VISION=yes
 | 
			
		||||
    ;;
 | 
			
		||||
  pytorch-linux-jammy-py3.9-gcc11)
 | 
			
		||||
    ANACONDA_PYTHON_VERSION=3.9
 | 
			
		||||
    GCC_VERSION=11
 | 
			
		||||
@ -379,6 +385,14 @@ case "$image" in
 | 
			
		||||
    GCC_VERSION=11
 | 
			
		||||
    CONDA_CMAKE=yes
 | 
			
		||||
    HALIDE=yes
 | 
			
		||||
    TRITON=yes
 | 
			
		||||
    ;;
 | 
			
		||||
  pytorch-linux-jammy-py3.12-triton-cpu)
 | 
			
		||||
    CUDA_VERSION=12.4
 | 
			
		||||
    ANACONDA_PYTHON_VERSION=3.12
 | 
			
		||||
    GCC_VERSION=11
 | 
			
		||||
    CONDA_CMAKE=yes
 | 
			
		||||
    TRITON_CPU=yes
 | 
			
		||||
    ;;
 | 
			
		||||
  pytorch-linux-focal-linter)
 | 
			
		||||
    # TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
 | 
			
		||||
@ -509,6 +523,7 @@ docker build \
 | 
			
		||||
       --build-arg "UCC_COMMIT=${UCC_COMMIT}" \
 | 
			
		||||
       --build-arg "CONDA_CMAKE=${CONDA_CMAKE}" \
 | 
			
		||||
       --build-arg "TRITON=${TRITON}" \
 | 
			
		||||
       --build-arg "TRITON_CPU=${TRITON_CPU}" \
 | 
			
		||||
       --build-arg "ONNX=${ONNX}" \
 | 
			
		||||
       --build-arg "DOCS=${DOCS}" \
 | 
			
		||||
       --build-arg "INDUCTOR_BENCHMARKS=${INDUCTOR_BENCHMARKS}" \
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										1
									
								
								.ci/docker/ci_commit_pins/triton-cpu.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								.ci/docker/ci_commit_pins/triton-cpu.txt
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1 @@
 | 
			
		||||
6a333f1b05671f6fada4ba7bbfae4a02a9d96f4f
 | 
			
		||||
@ -1 +1 @@
 | 
			
		||||
5fe38ffd73c2ac6ed6323b554205186696631c6f
 | 
			
		||||
cf34004b8a67d290a962da166f5aa2fc66751326
 | 
			
		||||
 | 
			
		||||
@ -13,11 +13,17 @@ if [ -n "$CLANG_VERSION" ]; then
 | 
			
		||||
  elif [[ $UBUNTU_VERSION == 22.04 ]]; then
 | 
			
		||||
    # work around ubuntu apt-get conflicts
 | 
			
		||||
    sudo apt-get -y -f install
 | 
			
		||||
    wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add  -
 | 
			
		||||
    if [[ $CLANG_VERSION == 18 ]]; then
 | 
			
		||||
      apt-add-repository "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main"
 | 
			
		||||
    fi
 | 
			
		||||
  fi
 | 
			
		||||
 | 
			
		||||
  sudo apt-get update
 | 
			
		||||
  apt-get install -y --no-install-recommends clang-"$CLANG_VERSION"
 | 
			
		||||
  apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION"
 | 
			
		||||
  apt-get install -y --no-install-recommends clang-"$CLANG_VERSION" llvm-"$CLANG_VERSION"
 | 
			
		||||
  if [[ $CLANG_VERSION == 18 ]]; then
 | 
			
		||||
    apt-get install -y --no-install-recommends libomp-18-dev
 | 
			
		||||
  fi
 | 
			
		||||
 | 
			
		||||
  # Install dev version of LLVM.
 | 
			
		||||
  if [ -n "$LLVMDEV" ]; then
 | 
			
		||||
 | 
			
		||||
@ -105,7 +105,7 @@ function install_121 {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function install_124 {
 | 
			
		||||
  echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
 | 
			
		||||
  echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
 | 
			
		||||
  rm -rf /usr/local/cuda-12.4 /usr/local/cuda
 | 
			
		||||
  # install CUDA 12.4.1 in the same container
 | 
			
		||||
  wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux.run
 | 
			
		||||
 | 
			
		||||
@ -5,19 +5,19 @@ set -ex
 | 
			
		||||
 | 
			
		||||
NCCL_VERSION=v2.21.5-1
 | 
			
		||||
 | 
			
		||||
function install_cusparselt_052 {
 | 
			
		||||
function install_cusparselt_062 {
 | 
			
		||||
    # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
 | 
			
		||||
    mkdir tmp_cusparselt && pushd tmp_cusparselt
 | 
			
		||||
    wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.5.2.1-archive.tar.xz
 | 
			
		||||
    tar xf libcusparse_lt-linux-sbsa-0.5.2.1-archive.tar.xz
 | 
			
		||||
    cp -a libcusparse_lt-linux-sbsa-0.5.2.1-archive/include/* /usr/local/cuda/include/
 | 
			
		||||
    cp -a libcusparse_lt-linux-sbsa-0.5.2.1-archive/lib/* /usr/local/cuda/lib64/
 | 
			
		||||
    wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.6.2.3-archive.tar.xz
 | 
			
		||||
    tar xf libcusparse_lt-linux-sbsa-0.6.2.3-archive.tar.xz
 | 
			
		||||
    cp -a libcusparse_lt-linux-sbsa-0.6.2.3-archive/include/* /usr/local/cuda/include/
 | 
			
		||||
    cp -a libcusparse_lt-linux-sbsa-0.6.2.3-archive/lib/* /usr/local/cuda/lib64/
 | 
			
		||||
    popd
 | 
			
		||||
    rm -rf tmp_cusparselt
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function install_124 {
 | 
			
		||||
  echo "Installing CUDA 12.4.1 and cuDNN 9.1 and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
 | 
			
		||||
  echo "Installing CUDA 12.4.1 and cuDNN 9.1 and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
 | 
			
		||||
  rm -rf /usr/local/cuda-12.4 /usr/local/cuda
 | 
			
		||||
  # install CUDA 12.4.1 in the same container
 | 
			
		||||
  wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux_sbsa.run
 | 
			
		||||
@ -44,7 +44,7 @@ function install_124 {
 | 
			
		||||
  cd ..
 | 
			
		||||
  rm -rf nccl
 | 
			
		||||
 | 
			
		||||
  install_cusparselt_052
 | 
			
		||||
  install_cusparselt_062
 | 
			
		||||
 | 
			
		||||
  ldconfig
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -5,7 +5,7 @@ set -ex
 | 
			
		||||
# cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html
 | 
			
		||||
mkdir tmp_cusparselt && cd tmp_cusparselt
 | 
			
		||||
 | 
			
		||||
if [[ ${CUDA_VERSION:0:4} =~ ^12\.[2-4]$ ]]; then
 | 
			
		||||
if [[ ${CUDA_VERSION:0:4} =~ ^12\.[2-6]$ ]]; then
 | 
			
		||||
    arch_path='sbsa'
 | 
			
		||||
    export TARGETARCH=${TARGETARCH:-$(uname -m)}
 | 
			
		||||
    if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
 | 
			
		||||
 | 
			
		||||
@ -10,6 +10,21 @@ if [[ -z $ROCM_VERSION ]]; then
 | 
			
		||||
    exit 1;
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
IS_UBUNTU=0
 | 
			
		||||
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
 | 
			
		||||
case "$ID" in
 | 
			
		||||
  ubuntu)
 | 
			
		||||
    IS_UBUNTU=1
 | 
			
		||||
    ;;
 | 
			
		||||
  centos)
 | 
			
		||||
    IS_UBUNTU=0
 | 
			
		||||
    ;;
 | 
			
		||||
  *)
 | 
			
		||||
    echo "Unable to determine OS..."
 | 
			
		||||
    exit 1
 | 
			
		||||
    ;;
 | 
			
		||||
esac
 | 
			
		||||
 | 
			
		||||
# To make version comparison easier, create an integer representation.
 | 
			
		||||
save_IFS="$IFS"
 | 
			
		||||
IFS=. ROCM_VERSION_ARRAY=(${ROCM_VERSION})
 | 
			
		||||
@ -57,9 +72,11 @@ MIOPEN_CMAKE_COMMON_FLAGS="
 | 
			
		||||
-DMIOPEN_BUILD_DRIVER=OFF
 | 
			
		||||
"
 | 
			
		||||
# Pull MIOpen repo and set DMIOPEN_EMBED_DB based on ROCm version
 | 
			
		||||
if [[ $ROCM_INT -ge 60200 ]] && [[ $ROCM_INT -lt 60300 ]]; then
 | 
			
		||||
    echo "ROCm 6.2 MIOpen does not need any patches, do not build from source"
 | 
			
		||||
if [[ $ROCM_INT -ge 60300 ]]; then
 | 
			
		||||
    echo "ROCm 6.3+ MIOpen does not need any patches, do not build from source"
 | 
			
		||||
    exit 0
 | 
			
		||||
elif [[ $ROCM_INT -ge 60200 ]] && [[ $ROCM_INT -lt 60300 ]]; then
 | 
			
		||||
    MIOPEN_BRANCH="release/rocm-rel-6.2-staging"
 | 
			
		||||
elif [[ $ROCM_INT -ge 60100 ]] && [[ $ROCM_INT -lt 60200 ]]; then
 | 
			
		||||
    echo "ROCm 6.1 MIOpen does not need any patches, do not build from source"
 | 
			
		||||
    exit 0
 | 
			
		||||
@ -93,12 +110,21 @@ else
 | 
			
		||||
    exit 1
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
yum remove -y miopen-hip
 | 
			
		||||
 | 
			
		||||
if [[ ${IS_UBUNTU} == 1 ]]; then
 | 
			
		||||
  apt-get remove -y miopen-hip
 | 
			
		||||
else
 | 
			
		||||
  yum remove -y miopen-hip
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
git clone https://github.com/ROCm/MIOpen -b ${MIOPEN_BRANCH}
 | 
			
		||||
pushd MIOpen
 | 
			
		||||
# remove .git to save disk space since CI runner was running out
 | 
			
		||||
rm -rf .git
 | 
			
		||||
# Don't build CK to save docker build time
 | 
			
		||||
if [[ $ROCM_INT -ge 60200 ]]; then
 | 
			
		||||
    sed -i '/composable_kernel/d' requirements.txt
 | 
			
		||||
fi
 | 
			
		||||
# Don't build MLIR to save docker build time
 | 
			
		||||
# since we are disabling MLIR backend for MIOpen anyway
 | 
			
		||||
if [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then
 | 
			
		||||
@ -111,10 +137,15 @@ cmake -P install_deps.cmake --minimum
 | 
			
		||||
 | 
			
		||||
# clean up since CI runner was running out of disk space
 | 
			
		||||
rm -rf /tmp/*
 | 
			
		||||
yum clean all
 | 
			
		||||
rm -rf /var/cache/yum
 | 
			
		||||
rm -rf /var/lib/yum/yumdb
 | 
			
		||||
rm -rf /var/lib/yum/history
 | 
			
		||||
if [[ ${IS_UBUNTU} == 1 ]]; then
 | 
			
		||||
  apt-get autoclean && apt-get clean
 | 
			
		||||
  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 | 
			
		||||
else
 | 
			
		||||
  yum clean all
 | 
			
		||||
  rm -rf /var/cache/yum
 | 
			
		||||
  rm -rf /var/lib/yum/yumdb
 | 
			
		||||
  rm -rf /var/lib/yum/history
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
## Build MIOpen
 | 
			
		||||
mkdir -p build
 | 
			
		||||
@ -131,7 +162,11 @@ make -j $(nproc) package
 | 
			
		||||
# clean up since CI runner was running out of disk space
 | 
			
		||||
rm -rf /usr/local/cget
 | 
			
		||||
 | 
			
		||||
yum install -y miopen-*.rpm
 | 
			
		||||
if [[ ${IS_UBUNTU} == 1 ]]; then
 | 
			
		||||
  sudo dpkg -i miopen-hip*.deb
 | 
			
		||||
else
 | 
			
		||||
  yum install -y miopen-*.rpm
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
popd
 | 
			
		||||
rm -rf MIOpen
 | 
			
		||||
 | 
			
		||||
@ -32,7 +32,7 @@ pip_install coloredlogs packaging
 | 
			
		||||
 | 
			
		||||
pip_install onnxruntime==1.18.1
 | 
			
		||||
pip_install onnx==1.16.2
 | 
			
		||||
pip_install onnxscript==0.1.0.dev20240831 --no-deps
 | 
			
		||||
pip_install onnxscript==0.1.0.dev20241008 --no-deps
 | 
			
		||||
# required by onnxscript
 | 
			
		||||
pip_install ml_dtypes
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -15,8 +15,11 @@ conda_reinstall() {
 | 
			
		||||
if [ -n "${XPU_VERSION}" ]; then
 | 
			
		||||
  TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton"
 | 
			
		||||
  TRITON_TEXT_FILE="triton-xpu"
 | 
			
		||||
elif [ -n "${TRITON_CPU}" ]; then
 | 
			
		||||
  TRITON_REPO="https://github.com/triton-lang/triton-cpu"
 | 
			
		||||
  TRITON_TEXT_FILE="triton-cpu"
 | 
			
		||||
else
 | 
			
		||||
  TRITON_REPO="https://github.com/openai/triton"
 | 
			
		||||
  TRITON_REPO="https://github.com/triton-lang/triton"
 | 
			
		||||
  TRITON_TEXT_FILE="triton"
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
@ -44,9 +47,10 @@ chown -R jenkins /var/lib/jenkins/triton
 | 
			
		||||
chgrp -R jenkins /var/lib/jenkins/triton
 | 
			
		||||
pushd /var/lib/jenkins/
 | 
			
		||||
 | 
			
		||||
as_jenkins git clone ${TRITON_REPO} triton
 | 
			
		||||
as_jenkins git clone --recursive ${TRITON_REPO} triton
 | 
			
		||||
cd triton
 | 
			
		||||
as_jenkins git checkout ${TRITON_PINNED_COMMIT}
 | 
			
		||||
as_jenkins git submodule update --init --recursive
 | 
			
		||||
cd python
 | 
			
		||||
 | 
			
		||||
# TODO: remove patch setup.py once we have a proper fix for https://github.com/triton-lang/triton/issues/4527
 | 
			
		||||
 | 
			
		||||
@ -37,6 +37,12 @@ esac
 | 
			
		||||
 | 
			
		||||
(
 | 
			
		||||
  set -x
 | 
			
		||||
  # TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
 | 
			
		||||
  # is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
 | 
			
		||||
  sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
 | 
			
		||||
  sudo systemctl daemon-reload
 | 
			
		||||
  sudo systemctl restart docker
 | 
			
		||||
 | 
			
		||||
  docker build \
 | 
			
		||||
    --target final \
 | 
			
		||||
    --progress plain \
 | 
			
		||||
 | 
			
		||||
@ -10,6 +10,7 @@ ENV LANG en_US.UTF-8
 | 
			
		||||
ENV LANGUAGE en_US.UTF-8
 | 
			
		||||
 | 
			
		||||
ARG DEVTOOLSET_VERSION=9
 | 
			
		||||
 | 
			
		||||
# Note: This is required patch since CentOS have reached EOL
 | 
			
		||||
# otherwise any yum install setp will fail
 | 
			
		||||
RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
 | 
			
		||||
 | 
			
		||||
@ -124,7 +124,14 @@ if [[ -n ${MANY_LINUX_VERSION} && -z ${DOCKERFILE_SUFFIX} ]]; then
 | 
			
		||||
fi
 | 
			
		||||
(
 | 
			
		||||
    set -x
 | 
			
		||||
    DOCKER_BUILDKIT=1 docker build \
 | 
			
		||||
 | 
			
		||||
    # TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
 | 
			
		||||
    # is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
 | 
			
		||||
    sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
 | 
			
		||||
    sudo systemctl daemon-reload
 | 
			
		||||
    sudo systemctl restart docker
 | 
			
		||||
 | 
			
		||||
    DOCKER_BUILDKIT=1 docker build  \
 | 
			
		||||
        ${DOCKER_GPU_BUILD_ARG} \
 | 
			
		||||
        --build-arg "GPU_IMAGE=${GPU_IMAGE}" \
 | 
			
		||||
        --target "${TARGET}" \
 | 
			
		||||
 | 
			
		||||
@ -139,9 +139,9 @@ opt-einsum==3.3
 | 
			
		||||
#Pinned versions: 3.3
 | 
			
		||||
#test that import: test_linalg.py
 | 
			
		||||
 | 
			
		||||
optree==0.12.1
 | 
			
		||||
optree==0.13.0
 | 
			
		||||
#Description: A library for tree manipulation
 | 
			
		||||
#Pinned versions: 0.12.1
 | 
			
		||||
#Pinned versions: 0.13.0
 | 
			
		||||
#test that import: test_vmap.py, test_aotdispatch.py, test_dynamic_shapes.py,
 | 
			
		||||
#test_pytree.py, test_ops.py, test_control_flow.py, test_modules.py,
 | 
			
		||||
#common_utils.py, test_eager_transforms.py, test_python_dispatch.py,
 | 
			
		||||
 | 
			
		||||
@ -68,6 +68,8 @@ RUN rm install_rocm.sh
 | 
			
		||||
COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
 | 
			
		||||
RUN bash ./install_rocm_magma.sh
 | 
			
		||||
RUN rm install_rocm_magma.sh
 | 
			
		||||
ADD ./common/install_miopen.sh install_miopen.sh
 | 
			
		||||
RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
 | 
			
		||||
ENV ROCM_PATH /opt/rocm
 | 
			
		||||
ENV PATH /opt/rocm/bin:$PATH
 | 
			
		||||
ENV PATH /opt/rocm/hcc/bin:$PATH
 | 
			
		||||
@ -121,5 +123,8 @@ RUN bash ./install_cache.sh && rm install_cache.sh
 | 
			
		||||
ARG BUILD_ENVIRONMENT
 | 
			
		||||
ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
 | 
			
		||||
 | 
			
		||||
# Install LLVM dev version (Defined in the pytorch/builder github repository)
 | 
			
		||||
COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
 | 
			
		||||
 | 
			
		||||
USER jenkins
 | 
			
		||||
CMD ["bash"]
 | 
			
		||||
 | 
			
		||||
@ -147,6 +147,13 @@ COPY ci_commit_pins/triton.txt triton.txt
 | 
			
		||||
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
 | 
			
		||||
RUN rm install_triton.sh common_utils.sh triton.txt
 | 
			
		||||
 | 
			
		||||
ARG TRITON_CPU
 | 
			
		||||
COPY ./common/install_triton.sh install_triton.sh
 | 
			
		||||
COPY ./common/common_utils.sh common_utils.sh
 | 
			
		||||
COPY ci_commit_pins/triton-cpu.txt triton-cpu.txt
 | 
			
		||||
RUN if [ -n "${TRITON_CPU}" ]; then bash ./install_triton.sh; fi
 | 
			
		||||
RUN rm install_triton.sh common_utils.sh triton-cpu.txt
 | 
			
		||||
 | 
			
		||||
ARG EXECUTORCH
 | 
			
		||||
# Build and install executorch
 | 
			
		||||
COPY ./common/install_executorch.sh install_executorch.sh
 | 
			
		||||
 | 
			
		||||
@ -49,13 +49,8 @@ if [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
# Enable LLVM dependency for TensorExpr testing
 | 
			
		||||
if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
 | 
			
		||||
  export USE_LLVM=/opt/rocm/llvm
 | 
			
		||||
  export LLVM_DIR=/opt/rocm/llvm/lib/cmake/llvm
 | 
			
		||||
else
 | 
			
		||||
  export USE_LLVM=/opt/llvm
 | 
			
		||||
  export LLVM_DIR=/opt/llvm/lib/cmake/llvm
 | 
			
		||||
fi
 | 
			
		||||
export USE_LLVM=/opt/llvm
 | 
			
		||||
export LLVM_DIR=/opt/llvm/lib/cmake/llvm
 | 
			
		||||
 | 
			
		||||
if [[ "$BUILD_ENVIRONMENT" == *executorch* ]]; then
 | 
			
		||||
  # To build test_edge_op_registration
 | 
			
		||||
@ -183,7 +178,7 @@ fi
 | 
			
		||||
# sccache will fail for CUDA builds if all cores are used for compiling
 | 
			
		||||
# gcc 7 with sccache seems to have intermittent OOM issue if all cores are used
 | 
			
		||||
if [ -z "$MAX_JOBS" ]; then
 | 
			
		||||
  if { [[ "$BUILD_ENVIRONMENT" == *cuda* ]] || [[ "$BUILD_ENVIRONMENT" == *gcc7* ]]; } && which sccache > /dev/null; then
 | 
			
		||||
  if { [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; } && which sccache > /dev/null; then
 | 
			
		||||
    export MAX_JOBS=$(($(nproc) - 1))
 | 
			
		||||
  fi
 | 
			
		||||
fi
 | 
			
		||||
@ -223,10 +218,6 @@ if [[ "${BUILD_ENVIRONMENT}" == *-pch* ]]; then
 | 
			
		||||
    export USE_PRECOMPILED_HEADERS=1
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
if [[ "${BUILD_ENVIRONMENT}" == *linux-focal-py3.7-gcc7-build*  ]]; then
 | 
			
		||||
  export USE_GLOO_WITH_OPENSSL=ON
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* ]]; then
 | 
			
		||||
  export BUILD_STATIC_RUNTIME_BENCHMARK=ON
 | 
			
		||||
fi
 | 
			
		||||
@ -237,7 +228,7 @@ fi
 | 
			
		||||
 | 
			
		||||
# Do not change workspace permissions for ROCm CI jobs
 | 
			
		||||
# as it can leave workspace with bad permissions for cancelled jobs
 | 
			
		||||
if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
 | 
			
		||||
if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* ]]; then
 | 
			
		||||
  # Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96)
 | 
			
		||||
  WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace")
 | 
			
		||||
  cleanup_workspace() {
 | 
			
		||||
@ -283,6 +274,7 @@ else
 | 
			
		||||
    # set only when building other architectures
 | 
			
		||||
    # or building non-XLA tests.
 | 
			
		||||
    if [[ "$BUILD_ENVIRONMENT" != *rocm*  &&
 | 
			
		||||
          "$BUILD_ENVIRONMENT" != *s390x*   &&
 | 
			
		||||
          "$BUILD_ENVIRONMENT" != *xla* ]]; then
 | 
			
		||||
      if [[ "$BUILD_ENVIRONMENT" != *py3.8* ]]; then
 | 
			
		||||
        # Install numpy-2.0.2 for builds which are backward compatible with 1.X
 | 
			
		||||
@ -345,11 +337,11 @@ else
 | 
			
		||||
    CUSTOM_OP_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-op-build"
 | 
			
		||||
    CUSTOM_OP_TEST="$PWD/test/custom_operator"
 | 
			
		||||
    python --version
 | 
			
		||||
    SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
 | 
			
		||||
    SITE_PACKAGES="$(python -c 'import site; print(";".join([x for x in site.getsitepackages()] + [x + "/torch" for x in site.getsitepackages()]))')"
 | 
			
		||||
 | 
			
		||||
    mkdir -p "$CUSTOM_OP_BUILD"
 | 
			
		||||
    pushd "$CUSTOM_OP_BUILD"
 | 
			
		||||
    cmake "$CUSTOM_OP_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch;$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
 | 
			
		||||
    cmake "$CUSTOM_OP_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
 | 
			
		||||
          -DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
 | 
			
		||||
    make VERBOSE=1
 | 
			
		||||
    popd
 | 
			
		||||
@ -359,10 +351,10 @@ else
 | 
			
		||||
    JIT_HOOK_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/jit-hook-build"
 | 
			
		||||
    JIT_HOOK_TEST="$PWD/test/jit_hooks"
 | 
			
		||||
    python --version
 | 
			
		||||
    SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
 | 
			
		||||
    SITE_PACKAGES="$(python -c 'import site; print(";".join([x for x in site.getsitepackages()] + [x + "/torch" for x in site.getsitepackages()]))')"
 | 
			
		||||
    mkdir -p "$JIT_HOOK_BUILD"
 | 
			
		||||
    pushd "$JIT_HOOK_BUILD"
 | 
			
		||||
    cmake "$JIT_HOOK_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch;$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
 | 
			
		||||
    cmake "$JIT_HOOK_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
 | 
			
		||||
          -DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
 | 
			
		||||
    make VERBOSE=1
 | 
			
		||||
    popd
 | 
			
		||||
@ -374,7 +366,7 @@ else
 | 
			
		||||
    python --version
 | 
			
		||||
    mkdir -p "$CUSTOM_BACKEND_BUILD"
 | 
			
		||||
    pushd "$CUSTOM_BACKEND_BUILD"
 | 
			
		||||
    cmake "$CUSTOM_BACKEND_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch;$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
 | 
			
		||||
    cmake "$CUSTOM_BACKEND_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
 | 
			
		||||
          -DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
 | 
			
		||||
    make VERBOSE=1
 | 
			
		||||
    popd
 | 
			
		||||
@ -407,6 +399,6 @@ fi
 | 
			
		||||
 | 
			
		||||
# snadampal: skipping it till sccache support added for aarch64
 | 
			
		||||
# https://github.com/pytorch/pytorch/issues/121559
 | 
			
		||||
if [[ "$BUILD_ENVIRONMENT" != *aarch64* ]]; then
 | 
			
		||||
if [[ "$BUILD_ENVIRONMENT" != *aarch64* &&  "$BUILD_ENVIRONMENT" != *s390x* ]]; then
 | 
			
		||||
  print_sccache_stats
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
@ -191,9 +191,22 @@ function install_torchrec_and_fbgemm() {
 | 
			
		||||
  pip_uninstall torchrec-nightly
 | 
			
		||||
  pip_uninstall fbgemm-gpu-nightly
 | 
			
		||||
  pip_install setuptools-git-versioning scikit-build pyre-extensions
 | 
			
		||||
 | 
			
		||||
  # TODO (huydhn): I still have no clue on why sccache doesn't work with only fbgemm_gpu here, but it
 | 
			
		||||
  # seems to be an sccache-related issue
 | 
			
		||||
  if [[ "$IS_A100_RUNNER" == "1" ]]; then
 | 
			
		||||
    unset CMAKE_CUDA_COMPILER_LAUNCHER
 | 
			
		||||
    sudo mv /opt/cache/bin /opt/cache/bin-backup
 | 
			
		||||
  fi
 | 
			
		||||
 | 
			
		||||
  # See https://github.com/pytorch/pytorch/issues/106971
 | 
			
		||||
  CUDA_PATH=/usr/local/cuda-12.1 pip_install --no-use-pep517 --user "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#egg=fbgemm-gpu&subdirectory=fbgemm_gpu"
 | 
			
		||||
  pip_install --no-use-pep517 --user "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}"
 | 
			
		||||
 | 
			
		||||
  if [[ "$IS_A100_RUNNER" == "1" ]]; then
 | 
			
		||||
    export CMAKE_CUDA_COMPILER_LAUNCHER=/opt/cache/bin/sccache
 | 
			
		||||
    sudo mv /opt/cache/bin-backup /opt/cache/bin
 | 
			
		||||
  fi
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function clone_pytorch_xla() {
 | 
			
		||||
 | 
			
		||||
@ -1,4 +1,4 @@
 | 
			
		||||
from datetime import datetime, timedelta
 | 
			
		||||
from datetime import datetime, timedelta, timezone
 | 
			
		||||
from tempfile import mkdtemp
 | 
			
		||||
 | 
			
		||||
from cryptography import x509
 | 
			
		||||
@ -42,10 +42,10 @@ def create_cert(path, C, ST, L, O, key):
 | 
			
		||||
        .issuer_name(issuer)
 | 
			
		||||
        .public_key(key.public_key())
 | 
			
		||||
        .serial_number(x509.random_serial_number())
 | 
			
		||||
        .not_valid_before(datetime.utcnow())
 | 
			
		||||
        .not_valid_before(datetime.now(timezone.utc))
 | 
			
		||||
        .not_valid_after(
 | 
			
		||||
            # Our certificate will be valid for 10 days
 | 
			
		||||
            datetime.utcnow()
 | 
			
		||||
            datetime.now(timezone.utc)
 | 
			
		||||
            + timedelta(days=10)
 | 
			
		||||
        )
 | 
			
		||||
        .add_extension(
 | 
			
		||||
@ -88,10 +88,10 @@ def sign_certificate_request(path, csr_cert, ca_cert, private_ca_key):
 | 
			
		||||
        .issuer_name(ca_cert.subject)
 | 
			
		||||
        .public_key(csr_cert.public_key())
 | 
			
		||||
        .serial_number(x509.random_serial_number())
 | 
			
		||||
        .not_valid_before(datetime.utcnow())
 | 
			
		||||
        .not_valid_before(datetime.now(timezone.utc))
 | 
			
		||||
        .not_valid_after(
 | 
			
		||||
            # Our certificate will be valid for 10 days
 | 
			
		||||
            datetime.utcnow()
 | 
			
		||||
            datetime.now(timezone.utc)
 | 
			
		||||
            + timedelta(days=10)
 | 
			
		||||
            # Sign our certificate with our private key
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
@ -375,9 +375,8 @@ test_inductor_cpp_wrapper_abi_compatible() {
 | 
			
		||||
  mkdir -p "$TEST_REPORTS_DIR"
 | 
			
		||||
 | 
			
		||||
  echo "Testing Inductor cpp wrapper mode with TORCHINDUCTOR_ABI_COMPATIBLE=1"
 | 
			
		||||
  # cpu stack allocation causes segfault and needs more investigation
 | 
			
		||||
  PYTORCH_TESTING_DEVICE_ONLY_FOR="" python test/run_test.py --include inductor/test_cpu_cpp_wrapper
 | 
			
		||||
  python test/run_test.py --include inductor/test_cuda_cpp_wrapper
 | 
			
		||||
  python test/run_test.py --include inductor/test_cuda_cpp_wrapper inductor/test_cpu_repro inductor/test_extension_backend
 | 
			
		||||
 | 
			
		||||
  TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/timm_models.py --device cuda --accuracy --amp \
 | 
			
		||||
    --training --inductor --disable-cudagraphs --only vit_base_patch16_224 \
 | 
			
		||||
@ -404,7 +403,7 @@ pr_time_benchmarks() {
 | 
			
		||||
  PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks source benchmarks/dynamo/pr_time_benchmarks/benchmark_runner.sh "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv" "benchmarks/dynamo/pr_time_benchmarks/benchmarks"
 | 
			
		||||
  echo "benchmark results on current PR: "
 | 
			
		||||
  cat  "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv"
 | 
			
		||||
 | 
			
		||||
  PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks python benchmarks/dynamo/pr_time_benchmarks/check_results.py "benchmarks/dynamo/pr_time_benchmarks/expected_results.csv" "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv" "$TEST_REPORTS_DIR/new_expected_results.csv"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
if [[ "${TEST_CONFIG}" == *pr_time_benchmarks* ]]; then
 | 
			
		||||
@ -607,6 +606,11 @@ test_inductor_halide() {
 | 
			
		||||
  assert_git_not_dirty
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
test_inductor_triton_cpu() {
 | 
			
		||||
  python test/run_test.py --include inductor/test_triton_cpu_backend.py --verbose
 | 
			
		||||
  assert_git_not_dirty
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
test_dynamo_benchmark() {
 | 
			
		||||
  # Usage: test_dynamo_benchmark huggingface 0
 | 
			
		||||
  TEST_REPORTS_DIR=$(pwd)/test/test-reports
 | 
			
		||||
@ -661,15 +665,6 @@ test_inductor_torchbench_smoketest_perf() {
 | 
			
		||||
  # The threshold value needs to be actively maintained to make this check useful
 | 
			
		||||
  python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" -t 1.4
 | 
			
		||||
 | 
			
		||||
  TORCHINDUCTOR_ABI_COMPATIBLE=1 python benchmarks/dynamo/torchbench.py --device cuda --performance --bfloat16 --inference \
 | 
			
		||||
    --export-aot-inductor --only nanogpt --output "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv"
 | 
			
		||||
  # The threshold value needs to be actively maintained to make this check useful
 | 
			
		||||
  # The perf number of nanogpt seems not very stable, e.g.
 | 
			
		||||
  # https://github.com/pytorch/pytorch/actions/runs/7158691360/job/19491437314,
 | 
			
		||||
  # and thus we lower its threshold to reduce flakiness. If this continues to be a problem,
 | 
			
		||||
  # we switch to use some other model.
 | 
			
		||||
  python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" -t 4.9
 | 
			
		||||
 | 
			
		||||
  # Check memory compression ratio for a few models
 | 
			
		||||
  for test in hf_Albert timm_vision_transformer; do
 | 
			
		||||
    python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --amp --training \
 | 
			
		||||
@ -713,6 +708,10 @@ test_inductor_set_cpu_affinity(){
 | 
			
		||||
    export KMP_BLOCKTIME=1
 | 
			
		||||
  fi
 | 
			
		||||
  cores=$(test_inductor_get_core_number)
 | 
			
		||||
  # Set number of cores to 16 on Aarch64 for performance runs.
 | 
			
		||||
  if [[ "${TEST_CONFIG}" == *aarch64* && $cores -gt 16 ]]; then
 | 
			
		||||
    cores=16
 | 
			
		||||
  fi
 | 
			
		||||
  export OMP_NUM_THREADS=$cores
 | 
			
		||||
  end_core=$((cores-1))
 | 
			
		||||
  export TASKSET="taskset -c 0-$end_core"
 | 
			
		||||
@ -1402,7 +1401,7 @@ test_linux_aarch64() {
 | 
			
		||||
       inductor/test_max_autotune inductor/test_memory_planning inductor/test_metrics inductor/test_multi_kernel inductor/test_pad_mm \
 | 
			
		||||
       inductor/test_pattern_matcher inductor/test_perf inductor/test_profiler inductor/test_select_algorithm inductor/test_smoke \
 | 
			
		||||
       inductor/test_split_cat_fx_passes inductor/test_standalone_compile inductor/test_torchinductor \
 | 
			
		||||
       inductor/test_torchinductor_codegen_dynamic_shapes inductor/test_torchinductor_dynamic_shapes \
 | 
			
		||||
       inductor/test_torchinductor_codegen_dynamic_shapes inductor/test_torchinductor_dynamic_shapes inductor/test_memory \
 | 
			
		||||
       --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -1436,6 +1435,8 @@ elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then
 | 
			
		||||
  test_inductor_distributed
 | 
			
		||||
elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then
 | 
			
		||||
  test_inductor_halide
 | 
			
		||||
elif [[ "${TEST_CONFIG}" == *inductor-triton-cpu* ]]; then
 | 
			
		||||
  test_inductor_triton_cpu
 | 
			
		||||
elif [[ "${TEST_CONFIG}" == *inductor-micro-benchmark* ]]; then
 | 
			
		||||
  test_inductor_micro_benchmark
 | 
			
		||||
elif [[ "${TEST_CONFIG}" == *huggingface* ]]; then
 | 
			
		||||
@ -1459,7 +1460,7 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
 | 
			
		||||
  # https://github.com/opencv/opencv-python/issues/885
 | 
			
		||||
  pip_install opencv-python==4.8.0.74
 | 
			
		||||
  if [[ "${TEST_CONFIG}" == *inductor_torchbench_smoketest_perf* ]]; then
 | 
			
		||||
    checkout_install_torchbench hf_Bert hf_Albert nanogpt timm_vision_transformer
 | 
			
		||||
    checkout_install_torchbench hf_Bert hf_Albert timm_vision_transformer
 | 
			
		||||
    PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_smoketest_perf
 | 
			
		||||
  elif [[ "${TEST_CONFIG}" == *inductor_torchbench_cpu_smoketest_perf* ]]; then
 | 
			
		||||
    checkout_install_torchbench timm_vision_transformer phlippe_densenet basic_gnn_edgecnn \
 | 
			
		||||
 | 
			
		||||
@ -26,7 +26,7 @@ fi
 | 
			
		||||
export SCRIPT_HELPERS_DIR=$SCRIPT_PARENT_DIR/win-test-helpers
 | 
			
		||||
 | 
			
		||||
set +ex
 | 
			
		||||
grep -E -R 'PyLong_(From|As)(Unsigned|)Long\(' --exclude=python_numbers.h --exclude=eval_frame.c torch/
 | 
			
		||||
grep -E -R 'PyLong_(From|As)(Unsigned|)Long\(' --exclude=python_numbers.h  --exclude=pythoncapi_compat.h --exclude=eval_frame.c torch/
 | 
			
		||||
PYLONG_API_CHECK=$?
 | 
			
		||||
if [[ $PYLONG_API_CHECK == 0 ]]; then
 | 
			
		||||
  echo "Usage of PyLong_{From,As}{Unsigned}Long API may lead to overflow errors on Windows"
 | 
			
		||||
 | 
			
		||||
@ -27,12 +27,11 @@ if [[ "$PACKAGE_TYPE" == conda ]]; then
 | 
			
		||||
  source activate testenv >/dev/null
 | 
			
		||||
elif [[ "$PACKAGE_TYPE" != libtorch ]]; then
 | 
			
		||||
  python_path="/opt/python/cp\$python_nodot-cp\${python_nodot}"
 | 
			
		||||
  # Prior to Python 3.8 paths were suffixed with an 'm'
 | 
			
		||||
  if [[ -d  "\${python_path}/bin" ]]; then
 | 
			
		||||
    export PATH="\${python_path}/bin:\$PATH"
 | 
			
		||||
  elif [[ -d "\${python_path}m/bin" ]]; then
 | 
			
		||||
    export PATH="\${python_path}m/bin:\$PATH"
 | 
			
		||||
  if [[ "\$python_nodot" = *t ]]; then
 | 
			
		||||
    python_digits="\$(echo $DESIRED_PYTHON | tr -cd [:digit:])"
 | 
			
		||||
    python_path="/opt/python/cp\$python_digits-cp\${python_digits}t"
 | 
			
		||||
  fi
 | 
			
		||||
  export PATH="\${python_path}/bin:\$PATH"
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
EXTRA_CONDA_FLAGS=""
 | 
			
		||||
 | 
			
		||||
@ -44,7 +44,9 @@ ContinuationIndentWidth: 4
 | 
			
		||||
Cpp11BracedListStyle: true
 | 
			
		||||
DerivePointerAlignment: false
 | 
			
		||||
DisableFormat:   false
 | 
			
		||||
ForEachMacros:   [ FOR_EACH_RANGE, FOR_EACH, ]
 | 
			
		||||
ForEachMacros:
 | 
			
		||||
  - FOR_EACH_RANGE
 | 
			
		||||
  - FOR_EACH
 | 
			
		||||
IncludeCategories:
 | 
			
		||||
  - Regex:           '^<.*\.h(pp)?>'
 | 
			
		||||
    Priority:        1
 | 
			
		||||
@ -58,6 +60,24 @@ IndentWrappedFunctionNames: false
 | 
			
		||||
KeepEmptyLinesAtTheStartOfBlocks: false
 | 
			
		||||
MacroBlockBegin: ''
 | 
			
		||||
MacroBlockEnd:   ''
 | 
			
		||||
Macros:
 | 
			
		||||
  - >-
 | 
			
		||||
    PyObject_HEAD_INIT(type)={
 | 
			
		||||
        /* this is not exactly match with PyObject_HEAD_INIT in Python source code
 | 
			
		||||
         * but it is enough for clang-format */
 | 
			
		||||
        { 0xFFFFFFFF },
 | 
			
		||||
        (type)
 | 
			
		||||
    },
 | 
			
		||||
  - >-
 | 
			
		||||
    PyVarObject_HEAD_INIT(type, size)={
 | 
			
		||||
        {
 | 
			
		||||
            /* manually expand PyObject_HEAD_INIT(type) above
 | 
			
		||||
             * because clang-format do not support recursive expansion */
 | 
			
		||||
            { 0xFFFFFFFF },
 | 
			
		||||
            (type)
 | 
			
		||||
        },
 | 
			
		||||
        (size)
 | 
			
		||||
    },
 | 
			
		||||
MaxEmptyLinesToKeep: 1
 | 
			
		||||
NamespaceIndentation: None
 | 
			
		||||
PenaltyBreakBeforeFirstCallParameter: 1
 | 
			
		||||
@ -79,7 +99,11 @@ SpacesInContainerLiterals: true
 | 
			
		||||
SpacesInCStyleCastParentheses: false
 | 
			
		||||
SpacesInParentheses: false
 | 
			
		||||
SpacesInSquareBrackets: false
 | 
			
		||||
Standard:        Cpp11
 | 
			
		||||
Standard:        c++17
 | 
			
		||||
StatementMacros:
 | 
			
		||||
  - PyObject_HEAD
 | 
			
		||||
  - PyObject_VAR_HEAD
 | 
			
		||||
  - PyException_HEAD
 | 
			
		||||
TabWidth:        8
 | 
			
		||||
UseTab:          Never
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										38
									
								
								.github/ISSUE_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										38
									
								
								.github/ISSUE_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							@ -1,38 +0,0 @@
 | 
			
		||||
If you have a question or would like help and support, please ask at our
 | 
			
		||||
[forums](https://discuss.pytorch.org/).
 | 
			
		||||
 | 
			
		||||
If you are submitting a feature request, please preface the title with [feature request].
 | 
			
		||||
If you are submitting a bug report, please fill in the following details.
 | 
			
		||||
 | 
			
		||||
## Issue description
 | 
			
		||||
 | 
			
		||||
Provide a short description.
 | 
			
		||||
 | 
			
		||||
## Code example
 | 
			
		||||
 | 
			
		||||
Please try to provide a minimal example to repro the bug.
 | 
			
		||||
Error messages and stack traces are also helpful.
 | 
			
		||||
 | 
			
		||||
## System Info
 | 
			
		||||
Please copy and paste the output from our
 | 
			
		||||
[environment collection script](https://raw.githubusercontent.com/pytorch/pytorch/main/torch/utils/collect_env.py)
 | 
			
		||||
(or fill out the checklist below manually).
 | 
			
		||||
 | 
			
		||||
You can get the script and run it with:
 | 
			
		||||
```
 | 
			
		||||
wget https://raw.githubusercontent.com/pytorch/pytorch/main/torch/utils/collect_env.py
 | 
			
		||||
# For security purposes, please check the contents of collect_env.py before running it.
 | 
			
		||||
python collect_env.py
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
- PyTorch or Caffe2:
 | 
			
		||||
- How you installed PyTorch (conda, pip, source):
 | 
			
		||||
- Build command you used (if compiling from source):
 | 
			
		||||
- OS:
 | 
			
		||||
- PyTorch version:
 | 
			
		||||
- Python version:
 | 
			
		||||
- CUDA/cuDNN version:
 | 
			
		||||
- GPU models and configuration:
 | 
			
		||||
- GCC version (if compiling from source):
 | 
			
		||||
- CMake version:
 | 
			
		||||
- Versions of any other relevant libraries:
 | 
			
		||||
							
								
								
									
										3
									
								
								.github/ISSUE_TEMPLATE/ci-sev.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.github/ISSUE_TEMPLATE/ci-sev.md
									
									
									
									
										vendored
									
									
								
							@ -5,7 +5,8 @@ about: Tracking incidents for PyTorch's CI infra.
 | 
			
		||||
 | 
			
		||||
> NOTE: Remember to label this issue with "`ci: sev`"
 | 
			
		||||
 | 
			
		||||
**MERGE BLOCKING** <!-- remove this line if you don't want this SEV to block merges -->
 | 
			
		||||
 <!-- uncomment the below line if you don't want this SEV to block merges -->
 | 
			
		||||
 <!--  **MERGE BLOCKING** -->
 | 
			
		||||
 | 
			
		||||
## Current Status
 | 
			
		||||
*Status could be: preemptive, ongoing, mitigated, closed. Also tell people if they need to take action to fix it (i.e. rebase)*.
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										24
									
								
								.github/actionlint.yaml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										24
									
								
								.github/actionlint.yaml
									
									
									
									
										vendored
									
									
								
							@ -32,30 +32,6 @@ self-hosted-runner:
 | 
			
		||||
    - lf.linux.8xlarge.nvidia.gpu
 | 
			
		||||
    - lf.linux.16xlarge.nvidia.gpu
 | 
			
		||||
    - lf.linux.g5.4xlarge.nvidia.gpu
 | 
			
		||||
    # Organization-wide AWS Linux Runners with new Amazon 2023 AMI
 | 
			
		||||
    - amz2023.linux.large
 | 
			
		||||
    - amz2023.linux.2xlarge
 | 
			
		||||
    - amz2023.linux.4xlarge
 | 
			
		||||
    - amz2023.linux.12xlarge
 | 
			
		||||
    - amz2023.linux.24xlarge
 | 
			
		||||
    - amz2023.linux.arm64.2xlarge
 | 
			
		||||
    - amz2023.linux.arm64.m7g.4xlarge
 | 
			
		||||
    - amz2023.linux.arm64.m7g.4xlarge.ephemeral
 | 
			
		||||
    - amz2023.linux.4xlarge.nvidia.gpu
 | 
			
		||||
    - amz2023.linux.8xlarge.nvidia.gpu
 | 
			
		||||
    - amz2023.linux.16xlarge.nvidia.gpu
 | 
			
		||||
    - amz2023.linux.g5.4xlarge.nvidia.gpu
 | 
			
		||||
    # Pytorch/pytorch AWS Linux Runners with the new Amazon 2023 AMI on Linux Foundation account
 | 
			
		||||
    - amz2023.lf.linux.large
 | 
			
		||||
    - amz2023.lf.linux.2xlarge
 | 
			
		||||
    - amz2023.lf.linux.4xlarge
 | 
			
		||||
    - amz2023.lf.linux.12xlarge
 | 
			
		||||
    - amz2023.lf.linux.24xlarge
 | 
			
		||||
    - amz2023.lf.linux.arm64.2xlarge
 | 
			
		||||
    - amz2023.lf.linux.4xlarge.nvidia.gpu
 | 
			
		||||
    - amz2023.lf.linux.8xlarge.nvidia.gpu
 | 
			
		||||
    - amz2023.lf.linux.16xlarge.nvidia.gpu
 | 
			
		||||
    - amz2023.lf.linux.g5.4xlarge.nvidia.gpu
 | 
			
		||||
    # Repo-specific IBM hosted S390x runner
 | 
			
		||||
    - linux.s390x
 | 
			
		||||
    # Organization wide AWS Windows runners
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										6
									
								
								.github/actions/checkout-pytorch/action.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/actions/checkout-pytorch/action.yml
									
									
									
									
										vendored
									
									
								
							@ -18,8 +18,14 @@ inputs:
 | 
			
		||||
runs:
 | 
			
		||||
  using: composite
 | 
			
		||||
  steps:
 | 
			
		||||
    - name: Check if in a container runner
 | 
			
		||||
      shell: bash
 | 
			
		||||
      id: check_container_runner
 | 
			
		||||
      run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
 | 
			
		||||
 | 
			
		||||
    - name: Clean workspace
 | 
			
		||||
      shell: bash
 | 
			
		||||
      if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
 | 
			
		||||
      env:
 | 
			
		||||
        NO_SUDO: ${{ inputs.no-sudo }}
 | 
			
		||||
      run: |
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										30
									
								
								.github/actions/linux-test/action.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										30
									
								
								.github/actions/linux-test/action.yml
									
									
									
									
										vendored
									
									
								
							@ -85,15 +85,25 @@ runs:
 | 
			
		||||
      with:
 | 
			
		||||
        docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
 | 
			
		||||
 | 
			
		||||
    - name: Check if in a ARC runner
 | 
			
		||||
    - name: Check if in a container runner
 | 
			
		||||
      shell: bash
 | 
			
		||||
      id: check_arc_runner
 | 
			
		||||
      run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> "$GITHUB_OUTPUT"
 | 
			
		||||
      id: check_container_runner
 | 
			
		||||
      run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
 | 
			
		||||
 | 
			
		||||
    - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
 | 
			
		||||
      id: install-nvidia-driver
 | 
			
		||||
      uses: pytorch/test-infra/.github/actions/setup-nvidia@main
 | 
			
		||||
      if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
 | 
			
		||||
      if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
 | 
			
		||||
 | 
			
		||||
    - name: Setup GPU_FLAG for docker run
 | 
			
		||||
      id: setup-gpu-flag
 | 
			
		||||
      run: echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
 | 
			
		||||
      if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
 | 
			
		||||
 | 
			
		||||
    - name: Setup SCCACHE_SERVER_PORT environment for docker run when on container
 | 
			
		||||
      id: setup-sscache-port-flag
 | 
			
		||||
      run: echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"
 | 
			
		||||
      if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
 | 
			
		||||
 | 
			
		||||
    - name: Lock NVIDIA A100 40GB Frequency
 | 
			
		||||
      shell: bash
 | 
			
		||||
@ -101,7 +111,7 @@ runs:
 | 
			
		||||
        sudo nvidia-smi -pm 1
 | 
			
		||||
        sudo nvidia-smi -ac 1215,1410
 | 
			
		||||
        nvidia-smi
 | 
			
		||||
      if: contains(matrix.runner, 'a100')
 | 
			
		||||
      if: ${{ contains(matrix.runner, 'a100') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
 | 
			
		||||
 | 
			
		||||
    - name: Start monitoring script
 | 
			
		||||
      id: monitor-script
 | 
			
		||||
@ -172,6 +182,7 @@ runs:
 | 
			
		||||
        NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
 | 
			
		||||
        TD_DISTRIBUTED: ${{ steps.keep-going.outputs.ci-td-distributed }}
 | 
			
		||||
        SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
 | 
			
		||||
        SCCACHE_REGION: us-east-1
 | 
			
		||||
        SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
 | 
			
		||||
        SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
 | 
			
		||||
        DOCKER_IMAGE: ${{ inputs.docker-image }}
 | 
			
		||||
@ -181,6 +192,9 @@ runs:
 | 
			
		||||
        PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
 | 
			
		||||
        DASHBOARD_TAG: ${{ inputs.dashboard-tag }}
 | 
			
		||||
        HUGGING_FACE_HUB_TOKEN: ${{ inputs.HUGGING_FACE_HUB_TOKEN }}
 | 
			
		||||
        SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
 | 
			
		||||
        IS_A100_RUNNER: ${{ contains(matrix.runner, 'a100') && '1' || '0' }}
 | 
			
		||||
 | 
			
		||||
      shell: bash
 | 
			
		||||
      run: |
 | 
			
		||||
        set -x
 | 
			
		||||
@ -199,6 +213,7 @@ runs:
 | 
			
		||||
        # shellcheck disable=SC2086,SC2090
 | 
			
		||||
        container_name=$(docker run \
 | 
			
		||||
          ${GPU_FLAG:-} \
 | 
			
		||||
          ${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \
 | 
			
		||||
          -e BUILD_ENVIRONMENT \
 | 
			
		||||
          -e PR_NUMBER \
 | 
			
		||||
          -e GITHUB_ACTIONS \
 | 
			
		||||
@ -227,6 +242,7 @@ runs:
 | 
			
		||||
          -e PR_LABELS \
 | 
			
		||||
          -e MAX_JOBS="$(nproc --ignore=2)" \
 | 
			
		||||
          -e SCCACHE_BUCKET \
 | 
			
		||||
          -e SCCACHE_REGION \
 | 
			
		||||
          -e SCCACHE_S3_KEY_PREFIX \
 | 
			
		||||
          -e XLA_CUDA \
 | 
			
		||||
          -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
 | 
			
		||||
@ -234,7 +250,9 @@ runs:
 | 
			
		||||
          -e PYTORCH_TEST_RERUN_DISABLED_TESTS \
 | 
			
		||||
          -e SKIP_SCCACHE_INITIALIZATION=1 \
 | 
			
		||||
          -e HUGGING_FACE_HUB_TOKEN \
 | 
			
		||||
          -e SCRIBE_GRAPHQL_ACCESS_TOKEN \
 | 
			
		||||
          -e DASHBOARD_TAG \
 | 
			
		||||
          -e IS_A100_RUNNER \
 | 
			
		||||
          --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
 | 
			
		||||
          --security-opt seccomp=unconfined \
 | 
			
		||||
          --cap-add=SYS_PTRACE \
 | 
			
		||||
@ -305,7 +323,7 @@ runs:
 | 
			
		||||
 | 
			
		||||
    - name: Teardown Linux
 | 
			
		||||
      uses: pytorch/test-infra/.github/actions/teardown-linux@main
 | 
			
		||||
      if: always()
 | 
			
		||||
      if: always() && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false'
 | 
			
		||||
 | 
			
		||||
    # NB: We are currently having an intermittent GPU-related issue on G5 runners with
 | 
			
		||||
    # A10G GPU. Once this happens, trying to reset the GPU as done in setup-nvidia does
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										12
									
								
								.github/actions/setup-linux/action.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										12
									
								
								.github/actions/setup-linux/action.yml
									
									
									
									
										vendored
									
									
								
							@ -28,14 +28,14 @@ runs:
 | 
			
		||||
        echo "instance-type: $(get_ec2_metadata instance-type)"
 | 
			
		||||
        echo "system info $(uname -a)"
 | 
			
		||||
 | 
			
		||||
    - name: Check if in a ARC runner
 | 
			
		||||
    - name: Check if in a container runner
 | 
			
		||||
      shell: bash
 | 
			
		||||
      id: check_arc_runner
 | 
			
		||||
      run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)"  >> $GITHUB_OUTPUT
 | 
			
		||||
      id: check_container_runner
 | 
			
		||||
      run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
 | 
			
		||||
 | 
			
		||||
    - name: Start docker if docker deamon is not running
 | 
			
		||||
      shell: bash
 | 
			
		||||
      if: ${{ steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
 | 
			
		||||
      if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
 | 
			
		||||
      run: |
 | 
			
		||||
        if systemctl is-active --quiet docker; then
 | 
			
		||||
            echo "Docker daemon is running...";
 | 
			
		||||
@ -73,7 +73,7 @@ runs:
 | 
			
		||||
        env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"
 | 
			
		||||
 | 
			
		||||
    - name: Kill any existing containers, clean up images
 | 
			
		||||
      if: ${{ steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
 | 
			
		||||
      if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
 | 
			
		||||
      shell: bash
 | 
			
		||||
      run: |
 | 
			
		||||
        # ignore expansion of "docker ps -q" since it could be empty
 | 
			
		||||
@ -116,7 +116,7 @@ runs:
 | 
			
		||||
    - name: Check that the docker daemon is running
 | 
			
		||||
      shell: bash
 | 
			
		||||
      continue-on-error: true
 | 
			
		||||
      if: ${{ steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'true' }}
 | 
			
		||||
      if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
 | 
			
		||||
      run: |
 | 
			
		||||
        set +x
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										2
									
								
								.github/ci_commit_pins/audio.txt
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/ci_commit_pins/audio.txt
									
									
									
									
										vendored
									
									
								
							@ -1 +1 @@
 | 
			
		||||
ba696ea3dfec4cbe693bf06a84c75dc196077f5b
 | 
			
		||||
3f0569939c4369bec943fc27d1c9d8dfbc828c26
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										62
									
								
								.github/lf-canary-scale-config.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										62
									
								
								.github/lf-canary-scale-config.yml
									
									
									
									
										vendored
									
									
								
							@ -35,38 +35,35 @@ runner_types:
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 1000
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    variants:
 | 
			
		||||
      amz2023:
 | 
			
		||||
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.c.linux.10xlarge.avx2:
 | 
			
		||||
    disk_size: 200
 | 
			
		||||
    instance_type: m4.10xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 450
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.c.linux.24xl.spr-metal:
 | 
			
		||||
    disk_size: 200
 | 
			
		||||
    instance_type: c7i.metal-24xl
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 150
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.c.linux.16xlarge.spr:
 | 
			
		||||
    disk_size: 200
 | 
			
		||||
    instance_type: c7i.16xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 150
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.c.linux.9xlarge.ephemeral:
 | 
			
		||||
    disk_size: 200
 | 
			
		||||
    instance_type: c5.9xlarge
 | 
			
		||||
    is_ephemeral: true
 | 
			
		||||
    max_available: 50
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
    variants:
 | 
			
		||||
      am2:
 | 
			
		||||
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
 | 
			
		||||
@ -76,149 +73,140 @@ runner_types:
 | 
			
		||||
    is_ephemeral: true
 | 
			
		||||
    max_available: 300
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.c.linux.16xlarge.nvidia.gpu:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: g3.16xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 150
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.c.linux.24xlarge:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: c5.24xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 500
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.c.linux.24xlarge.ephemeral:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: c5.24xlarge
 | 
			
		||||
    is_ephemeral: true
 | 
			
		||||
    max_available: 200
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.c.linux.2xlarge:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: c5.2xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 3120
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.c.linux.4xlarge:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: c5.4xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 1000
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    variants:
 | 
			
		||||
      amz2023:
 | 
			
		||||
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.c.linux.4xlarge.nvidia.gpu:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: g3.4xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 1000
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.c.linux.8xlarge.nvidia.gpu:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: g3.8xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 400
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    variants:
 | 
			
		||||
      amz2023:
 | 
			
		||||
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.c.linux.g4dn.12xlarge.nvidia.gpu:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: g4dn.12xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 250
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.c.linux.g4dn.metal.nvidia.gpu:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: g4dn.metal
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 300
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.c.linux.g5.48xlarge.nvidia.gpu:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: g5.48xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 200
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.c.linux.g5.12xlarge.nvidia.gpu:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: g5.12xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 150
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.c.linux.g5.4xlarge.nvidia.gpu:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: g5.4xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 2400
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.c.linux.g6.4xlarge.experimental.nvidia.gpu:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: g6.4xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 50
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    variants:
 | 
			
		||||
      amz2023:
 | 
			
		||||
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.c.linux.large:
 | 
			
		||||
    max_available: 1200
 | 
			
		||||
    disk_size: 15
 | 
			
		||||
    instance_type: c5.large
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.c.linux.arm64.2xlarge:
 | 
			
		||||
    disk_size: 256
 | 
			
		||||
    instance_type: t4g.2xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 200
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
 | 
			
		||||
  lf.c.linux.arm64.m7g.4xlarge:
 | 
			
		||||
    disk_size: 256
 | 
			
		||||
    instance_type: m7g.4xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 200
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
 | 
			
		||||
  lf.c.linux.arm64.2xlarge.ephemeral:
 | 
			
		||||
    disk_size: 256
 | 
			
		||||
    instance_type: t4g.2xlarge
 | 
			
		||||
    is_ephemeral: true
 | 
			
		||||
    max_available: 200
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
 | 
			
		||||
  lf.c.linux.arm64.m7g.4xlarge.ephemeral:
 | 
			
		||||
    disk_size: 256
 | 
			
		||||
    instance_type: m7g.4xlarge
 | 
			
		||||
    is_ephemeral: true
 | 
			
		||||
    max_available: 200
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
 | 
			
		||||
  lf.c.linux.arm64.m7g.metal:
 | 
			
		||||
    disk_size: 256
 | 
			
		||||
    instance_type: m7g.metal
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 100
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
 | 
			
		||||
  lf.c.windows.g4dn.xlarge:
 | 
			
		||||
    disk_size: 256
 | 
			
		||||
    instance_type: g4dn.xlarge
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										62
									
								
								.github/lf-scale-config.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										62
									
								
								.github/lf-scale-config.yml
									
									
									
									
										vendored
									
									
								
							@ -35,38 +35,35 @@ runner_types:
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 1000
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    variants:
 | 
			
		||||
      amz2023:
 | 
			
		||||
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.linux.10xlarge.avx2:
 | 
			
		||||
    disk_size: 200
 | 
			
		||||
    instance_type: m4.10xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 450
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.linux.24xl.spr-metal:
 | 
			
		||||
    disk_size: 200
 | 
			
		||||
    instance_type: c7i.metal-24xl
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 150
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.linux.16xlarge.spr:
 | 
			
		||||
    disk_size: 200
 | 
			
		||||
    instance_type: c7i.16xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 150
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.linux.9xlarge.ephemeral:
 | 
			
		||||
    disk_size: 200
 | 
			
		||||
    instance_type: c5.9xlarge
 | 
			
		||||
    is_ephemeral: true
 | 
			
		||||
    max_available: 50
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
    variants:
 | 
			
		||||
      am2:
 | 
			
		||||
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
 | 
			
		||||
@ -76,149 +73,140 @@ runner_types:
 | 
			
		||||
    is_ephemeral: true
 | 
			
		||||
    max_available: 300
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.linux.16xlarge.nvidia.gpu:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: g3.16xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 150
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.linux.24xlarge:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: c5.24xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 500
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.linux.24xlarge.ephemeral:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: c5.24xlarge
 | 
			
		||||
    is_ephemeral: true
 | 
			
		||||
    max_available: 200
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.linux.2xlarge:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: c5.2xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 3120
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.linux.4xlarge:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: c5.4xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 1000
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    variants:
 | 
			
		||||
      amz2023:
 | 
			
		||||
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.linux.4xlarge.nvidia.gpu:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: g3.4xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 1000
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.linux.8xlarge.nvidia.gpu:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: g3.8xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 400
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    variants:
 | 
			
		||||
      amz2023:
 | 
			
		||||
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.linux.g4dn.12xlarge.nvidia.gpu:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: g4dn.12xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 250
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.linux.g4dn.metal.nvidia.gpu:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: g4dn.metal
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 300
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.linux.g5.48xlarge.nvidia.gpu:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: g5.48xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 200
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.linux.g5.12xlarge.nvidia.gpu:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: g5.12xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 150
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.linux.g5.4xlarge.nvidia.gpu:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: g5.4xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 2400
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.linux.g6.4xlarge.experimental.nvidia.gpu:
 | 
			
		||||
    disk_size: 150
 | 
			
		||||
    instance_type: g6.4xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 50
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    variants:
 | 
			
		||||
      amz2023:
 | 
			
		||||
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.linux.large:
 | 
			
		||||
    max_available: 1200
 | 
			
		||||
    disk_size: 15
 | 
			
		||||
    instance_type: c5.large
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
 | 
			
		||||
  lf.linux.arm64.2xlarge:
 | 
			
		||||
    disk_size: 256
 | 
			
		||||
    instance_type: t4g.2xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 200
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
 | 
			
		||||
  lf.linux.arm64.m7g.4xlarge:
 | 
			
		||||
    disk_size: 256
 | 
			
		||||
    instance_type: m7g.4xlarge
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 200
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
 | 
			
		||||
  lf.linux.arm64.2xlarge.ephemeral:
 | 
			
		||||
    disk_size: 256
 | 
			
		||||
    instance_type: t4g.2xlarge
 | 
			
		||||
    is_ephemeral: true
 | 
			
		||||
    max_available: 200
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
 | 
			
		||||
  lf.linux.arm64.m7g.4xlarge.ephemeral:
 | 
			
		||||
    disk_size: 256
 | 
			
		||||
    instance_type: m7g.4xlarge
 | 
			
		||||
    is_ephemeral: true
 | 
			
		||||
    max_available: 200
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
 | 
			
		||||
  lf.linux.arm64.m7g.metal:
 | 
			
		||||
    disk_size: 256
 | 
			
		||||
    instance_type: m7g.metal
 | 
			
		||||
    is_ephemeral: false
 | 
			
		||||
    max_available: 100
 | 
			
		||||
    os: linux
 | 
			
		||||
    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
 | 
			
		||||
    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
 | 
			
		||||
  lf.windows.g4dn.xlarge:
 | 
			
		||||
    disk_size: 256
 | 
			
		||||
    instance_type: g4dn.xlarge
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										1
									
								
								.github/merge_rules.yaml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.github/merge_rules.yaml
									
									
									
									
										vendored
									
									
								
							@ -544,6 +544,7 @@
 | 
			
		||||
  - anijain2305
 | 
			
		||||
  - bdhirsh
 | 
			
		||||
  - zou3519
 | 
			
		||||
  - isuruf
 | 
			
		||||
  mandatory_checks_name:
 | 
			
		||||
  - EasyCLA
 | 
			
		||||
  - Lint
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										1
									
								
								.github/pytorch-probot.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.github/pytorch-probot.yml
									
									
									
									
										vendored
									
									
								
							@ -16,6 +16,7 @@ ciflow_push_tags:
 | 
			
		||||
- ciflow/nightly
 | 
			
		||||
- ciflow/periodic
 | 
			
		||||
- ciflow/rocm
 | 
			
		||||
- ciflow/s390
 | 
			
		||||
- ciflow/slow
 | 
			
		||||
- ciflow/trunk
 | 
			
		||||
- ciflow/unstable
 | 
			
		||||
 | 
			
		||||
@ -1,4 +1,4 @@
 | 
			
		||||
# iOS simulator requirements
 | 
			
		||||
coremltools==5.0b5
 | 
			
		||||
protobuf==3.20.2
 | 
			
		||||
optree==0.12.1
 | 
			
		||||
optree==0.13.0
 | 
			
		||||
 | 
			
		||||
@ -27,7 +27,7 @@ pytest-cpp==2.3.0
 | 
			
		||||
rockset==1.0.3
 | 
			
		||||
z3-solver==4.12.2.0
 | 
			
		||||
tensorboard==2.13.0
 | 
			
		||||
optree==0.12.1
 | 
			
		||||
optree==0.13.0
 | 
			
		||||
# NB: test_hparams_* from test_tensorboard is failing with protobuf 5.26.0 in
 | 
			
		||||
# which the stringify metadata is wrong when escaping double quote
 | 
			
		||||
protobuf==3.20.2
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										10
									
								
								.github/scripts/generate_binary_build_matrix.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										10
									
								
								.github/scripts/generate_binary_build_matrix.py
									
									
									
									
										vendored
									
									
								
							@ -333,7 +333,7 @@ def generate_wheels_matrix(
 | 
			
		||||
        package_type = "manywheel"
 | 
			
		||||
 | 
			
		||||
    if python_versions is None:
 | 
			
		||||
        python_versions = FULL_PYTHON_VERSIONS + ["3.13"]
 | 
			
		||||
        python_versions = FULL_PYTHON_VERSIONS + ["3.13", "3.13t"]
 | 
			
		||||
 | 
			
		||||
    if arches is None:
 | 
			
		||||
        # Define default compute archivectures
 | 
			
		||||
@ -369,7 +369,13 @@ def generate_wheels_matrix(
 | 
			
		||||
            # TODO: Enable python 3.13 on rocm, aarch64, windows
 | 
			
		||||
            if (
 | 
			
		||||
                gpu_arch_type == "rocm" or (os != "linux" and os != "linux-s390x")
 | 
			
		||||
            ) and python_version == "3.13":
 | 
			
		||||
            ) and (python_version == "3.13" or python_version == "3.13t"):
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            # TODO: Enable python 3.13t on xpu and cpu-s390x
 | 
			
		||||
            if (
 | 
			
		||||
                gpu_arch_type == "xpu" or gpu_arch_type == "cpu-s390x"
 | 
			
		||||
            ) and python_version == "3.13t":
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            if use_split_build and (
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										7
									
								
								.github/scripts/lintrunner.sh
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										7
									
								
								.github/scripts/lintrunner.sh
									
									
									
									
										vendored
									
									
								
							@ -17,6 +17,11 @@ if [[ -d "${CACHE_DIRECTORY}" ]]; then
 | 
			
		||||
    cp -r "${CACHE_DIRECTORY}" . || true
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
# if lintrunner is not installed, install it
 | 
			
		||||
if ! command -v lintrunner &> /dev/null; then
 | 
			
		||||
    python3 -m pip install lintrunner==0.12.5
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
# This has already been cached in the docker image
 | 
			
		||||
lintrunner init 2> /dev/null
 | 
			
		||||
 | 
			
		||||
@ -33,7 +38,7 @@ python3 torch/utils/data/datapipes/gen_pyi.py
 | 
			
		||||
 | 
			
		||||
RC=0
 | 
			
		||||
# Run lintrunner on all files
 | 
			
		||||
if ! lintrunner --force-color --all-files --tee-json=lint.json ${ADDITIONAL_LINTRUNNER_ARGS} 2> /dev/null; then
 | 
			
		||||
if ! lintrunner --force-color --tee-json=lint.json ${ADDITIONAL_LINTRUNNER_ARGS} 2> /dev/null; then
 | 
			
		||||
    echo ""
 | 
			
		||||
    echo -e "\e[1m\e[36mYou can reproduce these results locally by using \`lintrunner -m origin/main\`. (If you don't get the same results, run \'lintrunner init\' to update your local linter)\e[0m"
 | 
			
		||||
    echo -e "\e[1m\e[36mSee https://github.com/pytorch/pytorch/wiki/lintrunner for setup instructions.\e[0m"
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										67
									
								
								.github/scripts/runner_determinator.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										67
									
								
								.github/scripts/runner_determinator.py
									
									
									
									
										vendored
									
									
								
							@ -1,5 +1,9 @@
 | 
			
		||||
# flake8: noqa: G004
 | 
			
		||||
 | 
			
		||||
# Note: Copies of this script in runner_determinator.py and _runner-determinator.yml
 | 
			
		||||
#       must be kept in sync. You can do it easily by running the following command:
 | 
			
		||||
#           python .github/scripts/update_runner_determinator.py
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
This runner determinator is used to determine which set of runners to run a
 | 
			
		||||
GitHub job on. It uses the first comment of a GitHub issue (by default
 | 
			
		||||
@ -79,6 +83,9 @@ class Experiment(NamedTuple):
 | 
			
		||||
    rollout_perc: float = (
 | 
			
		||||
        0  # Percentage of workflows to experiment on when user is not opted-in.
 | 
			
		||||
    )
 | 
			
		||||
    all_branches: bool = (
 | 
			
		||||
        False  # If True, the experiment is also enabled on the exception branches
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    # Add more fields as needed
 | 
			
		||||
 | 
			
		||||
@ -212,7 +219,7 @@ def get_potential_pr_author(
 | 
			
		||||
 | 
			
		||||
def is_exception_branch(branch: str) -> bool:
 | 
			
		||||
    """
 | 
			
		||||
    Branches that get opted out of all experiments and should always use Meta runners
 | 
			
		||||
    Branches that get opted out of experiments by default, until they're explicitly enabled.
 | 
			
		||||
    """
 | 
			
		||||
    return branch.split("/")[0] in {"main", "nightly", "release", "landchecks"}
 | 
			
		||||
 | 
			
		||||
@ -338,7 +345,10 @@ def is_user_opted_in(user: str, user_optins: UserOptins, experiment_name: str) -
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_runner_prefix(
 | 
			
		||||
    rollout_state: str, workflow_requestors: Iterable[str], is_canary: bool = False
 | 
			
		||||
    rollout_state: str,
 | 
			
		||||
    workflow_requestors: Iterable[str],
 | 
			
		||||
    branch: str,
 | 
			
		||||
    is_canary: bool = False,
 | 
			
		||||
) -> str:
 | 
			
		||||
    settings = parse_settings(rollout_state)
 | 
			
		||||
    user_optins = parse_users(rollout_state)
 | 
			
		||||
@ -348,6 +358,12 @@ def get_runner_prefix(
 | 
			
		||||
    for experiment_name, experiment_settings in settings.experiments.items():
 | 
			
		||||
        enabled = False
 | 
			
		||||
 | 
			
		||||
        if not experiment_settings.all_branches and is_exception_branch(branch):
 | 
			
		||||
            log.info(
 | 
			
		||||
                f"Branch {branch} is an exception branch. Not enabling experiment {experiment_name}."
 | 
			
		||||
            )
 | 
			
		||||
            continue
 | 
			
		||||
 | 
			
		||||
        # Is any workflow_requestor opted in to this experiment?
 | 
			
		||||
        opted_in_users = [
 | 
			
		||||
            requestor
 | 
			
		||||
@ -407,35 +423,34 @@ def get_rollout_state_from_issue(github_token: str, repo: str, issue_num: int) -
 | 
			
		||||
def main() -> None:
 | 
			
		||||
    args = parse_args()
 | 
			
		||||
 | 
			
		||||
    if args.github_ref_type == "branch" and is_exception_branch(args.github_branch):
 | 
			
		||||
        log.info(
 | 
			
		||||
            f"Exception branch: '{args.github_branch}', using Meta runners and no experiments."
 | 
			
		||||
    runner_label_prefix = DEFAULT_LABEL_PREFIX
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        rollout_state = get_rollout_state_from_issue(
 | 
			
		||||
            args.github_token, args.github_issue_repo, args.github_issue
 | 
			
		||||
        )
 | 
			
		||||
        runner_label_prefix = DEFAULT_LABEL_PREFIX
 | 
			
		||||
    else:
 | 
			
		||||
        try:
 | 
			
		||||
            rollout_state = get_rollout_state_from_issue(
 | 
			
		||||
                args.github_token, args.github_issue_repo, args.github_issue
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
            username = get_potential_pr_author(
 | 
			
		||||
                args.github_token,
 | 
			
		||||
                args.github_repo,
 | 
			
		||||
                args.github_actor,
 | 
			
		||||
                args.github_ref_type,
 | 
			
		||||
                args.github_branch,
 | 
			
		||||
            )
 | 
			
		||||
        username = get_potential_pr_author(
 | 
			
		||||
            args.github_token,
 | 
			
		||||
            args.github_repo,
 | 
			
		||||
            args.github_actor,
 | 
			
		||||
            args.github_ref_type,
 | 
			
		||||
            args.github_branch,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
            is_canary = args.github_repo == "pytorch/pytorch-canary"
 | 
			
		||||
        is_canary = args.github_repo == "pytorch/pytorch-canary"
 | 
			
		||||
 | 
			
		||||
            runner_label_prefix = get_runner_prefix(
 | 
			
		||||
                rollout_state, (args.github_issue_owner, username), is_canary
 | 
			
		||||
            )
 | 
			
		||||
        runner_label_prefix = get_runner_prefix(
 | 
			
		||||
            rollout_state,
 | 
			
		||||
            (args.github_issue_owner, username),
 | 
			
		||||
            args.github_branch,
 | 
			
		||||
            is_canary,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            log.error(
 | 
			
		||||
                f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
 | 
			
		||||
            )
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        log.error(
 | 
			
		||||
            f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, runner_label_prefix)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										87
									
								
								.github/scripts/test_runner_determinator.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										87
									
								
								.github/scripts/test_runner_determinator.py
									
									
									
									
										vendored
									
									
								
							@ -4,6 +4,10 @@ from unittest.mock import Mock, patch
 | 
			
		||||
import runner_determinator as rd
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
USER_BRANCH = "somebranch"
 | 
			
		||||
EXCEPTION_BRANCH = "main"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestRunnerDeterminatorIssueParser(TestCase):
 | 
			
		||||
    def test_parse_settings(self) -> None:
 | 
			
		||||
        settings_text = """
 | 
			
		||||
@ -66,6 +70,40 @@ class TestRunnerDeterminatorIssueParser(TestCase):
 | 
			
		||||
            "otherExp settings not parsed correctly",
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def test_parse_all_branches_setting(self) -> None:
 | 
			
		||||
        settings_text = """
 | 
			
		||||
        ```
 | 
			
		||||
        experiments:
 | 
			
		||||
            lf:
 | 
			
		||||
                rollout_perc: 25
 | 
			
		||||
                all_branches: true
 | 
			
		||||
            otherExp:
 | 
			
		||||
                all_branches: True
 | 
			
		||||
                rollout_perc: 0
 | 
			
		||||
        ```
 | 
			
		||||
 | 
			
		||||
        ---
 | 
			
		||||
 | 
			
		||||
        Users:
 | 
			
		||||
        @User1,lf
 | 
			
		||||
        @User2,lf,otherExp
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        settings = rd.parse_settings(settings_text)
 | 
			
		||||
 | 
			
		||||
        self.assertTupleEqual(
 | 
			
		||||
            rd.Experiment(rollout_perc=25, all_branches=True),
 | 
			
		||||
            settings.experiments["lf"],
 | 
			
		||||
            "lf settings not parsed correctly",
 | 
			
		||||
        )
 | 
			
		||||
        self.assertTrue(settings.experiments["otherExp"].all_branches)
 | 
			
		||||
        self.assertTupleEqual(
 | 
			
		||||
            rd.Experiment(rollout_perc=0, all_branches=True),
 | 
			
		||||
            settings.experiments["otherExp"],
 | 
			
		||||
            "otherExp settings not parsed correctly",
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def test_parse_users(self) -> None:
 | 
			
		||||
        settings_text = """
 | 
			
		||||
        experiments:
 | 
			
		||||
@ -119,7 +157,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
 | 
			
		||||
        @User2,lf,otherExp
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        prefix = rd.get_runner_prefix(settings_text, ["User1"])
 | 
			
		||||
        prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
 | 
			
		||||
        self.assertEqual("lf.", prefix, "Runner prefix not correct for User1")
 | 
			
		||||
 | 
			
		||||
    def test_opted_in_user_two_experiments(self) -> None:
 | 
			
		||||
@ -136,7 +174,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
 | 
			
		||||
        @User2,lf,otherExp
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        prefix = rd.get_runner_prefix(settings_text, ["User2"])
 | 
			
		||||
        prefix = rd.get_runner_prefix(settings_text, ["User2"], USER_BRANCH)
 | 
			
		||||
        self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for User2")
 | 
			
		||||
 | 
			
		||||
    @patch("random.uniform", return_value=50)
 | 
			
		||||
@ -154,7 +192,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
 | 
			
		||||
        @User2,lf,otherExp
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        prefix = rd.get_runner_prefix(settings_text, ["User3"])
 | 
			
		||||
        prefix = rd.get_runner_prefix(settings_text, ["User3"], USER_BRANCH)
 | 
			
		||||
        self.assertEqual("", prefix, "Runner prefix not correct for user")
 | 
			
		||||
 | 
			
		||||
    @patch("random.uniform", return_value=10)
 | 
			
		||||
@ -174,7 +212,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        # User3 is opted out, but is pulled into both experiments by the 10% rollout
 | 
			
		||||
        prefix = rd.get_runner_prefix(settings_text, ["User3"])
 | 
			
		||||
        prefix = rd.get_runner_prefix(settings_text, ["User3"], USER_BRANCH)
 | 
			
		||||
        self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for user")
 | 
			
		||||
 | 
			
		||||
    def test_lf_prefix_always_comes_first(self) -> None:
 | 
			
		||||
@ -192,7 +230,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        prefix = rd.get_runner_prefix(settings_text, ["User2"])
 | 
			
		||||
        prefix = rd.get_runner_prefix(settings_text, ["User2"], USER_BRANCH)
 | 
			
		||||
        self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for user")
 | 
			
		||||
 | 
			
		||||
    def test_ignores_commented_users(self) -> None:
 | 
			
		||||
@ -210,7 +248,7 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        prefix = rd.get_runner_prefix(settings_text, ["User1"])
 | 
			
		||||
        prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
 | 
			
		||||
        self.assertEqual("", prefix, "Runner prefix not correct for user")
 | 
			
		||||
 | 
			
		||||
    def test_ignores_extra_experiments(self) -> None:
 | 
			
		||||
@ -229,9 +267,44 @@ class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        prefix = rd.get_runner_prefix(settings_text, ["User1"])
 | 
			
		||||
        prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
 | 
			
		||||
        self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for user")
 | 
			
		||||
 | 
			
		||||
    def test_disables_experiment_on_exception_branches_when_not_explicitly_opted_in(
 | 
			
		||||
        self,
 | 
			
		||||
    ) -> None:
 | 
			
		||||
        settings_text = """
 | 
			
		||||
        experiments:
 | 
			
		||||
            lf:
 | 
			
		||||
                rollout_perc: 100
 | 
			
		||||
        ---
 | 
			
		||||
 | 
			
		||||
        Users:
 | 
			
		||||
        @User,lf,otherExp
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        prefix = rd.get_runner_prefix(settings_text, ["User1"], EXCEPTION_BRANCH)
 | 
			
		||||
        self.assertEqual("", prefix, "Runner prefix not correct for user")
 | 
			
		||||
 | 
			
		||||
    def test_allows_experiment_on_exception_branches_when_explicitly_opted_in(
 | 
			
		||||
        self,
 | 
			
		||||
    ) -> None:
 | 
			
		||||
        settings_text = """
 | 
			
		||||
        experiments:
 | 
			
		||||
            lf:
 | 
			
		||||
                rollout_perc: 100
 | 
			
		||||
                all_branches: true
 | 
			
		||||
        ---
 | 
			
		||||
 | 
			
		||||
        Users:
 | 
			
		||||
        @User,lf,otherExp
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        prefix = rd.get_runner_prefix(settings_text, ["User1"], EXCEPTION_BRANCH)
 | 
			
		||||
        self.assertEqual("lf.", prefix, "Runner prefix not correct for user")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    main()
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										35
									
								
								.github/scripts/test_trymerge.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										35
									
								
								.github/scripts/test_trymerge.py
									
									
									
									
										vendored
									
									
								
							@ -12,7 +12,7 @@ import json
 | 
			
		||||
import os
 | 
			
		||||
import warnings
 | 
			
		||||
from hashlib import sha256
 | 
			
		||||
from typing import Any, Dict, List, Optional
 | 
			
		||||
from typing import Any, List, Optional
 | 
			
		||||
from unittest import main, mock, skip, TestCase
 | 
			
		||||
from urllib.error import HTTPError
 | 
			
		||||
 | 
			
		||||
@ -24,7 +24,6 @@ from trymerge import (
 | 
			
		||||
    find_matching_merge_rule,
 | 
			
		||||
    get_classifications,
 | 
			
		||||
    get_drci_classifications,
 | 
			
		||||
    get_rockset_results,
 | 
			
		||||
    gh_get_team_members,
 | 
			
		||||
    GitHubPR,
 | 
			
		||||
    JobCheckState,
 | 
			
		||||
@ -42,7 +41,6 @@ if "GIT_REMOTE_URL" not in os.environ:
 | 
			
		||||
    os.environ["GIT_REMOTE_URL"] = "https://github.com/pytorch/pytorch"
 | 
			
		||||
 | 
			
		||||
GQL_MOCKS = "gql_mocks.json.gz"
 | 
			
		||||
ROCKSET_MOCKS = "rockset_mocks.json.gz"
 | 
			
		||||
DRCI_MOCKS = "drci_mocks.json.gz"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -77,16 +75,11 @@ def mock_query(
 | 
			
		||||
        if err.code == 401 or err.code == 403:
 | 
			
		||||
            err_msg = f"If you are seeing this message during workflow run, please make sure to update {file_name}"
 | 
			
		||||
            err_msg += f" locally, by deleting it and running {os.path.basename(__file__)} with"
 | 
			
		||||
            err_msg += " GitHub Personal Access Token passed via GITHUB_TOKEN,"
 | 
			
		||||
            err_msg += " the rockset api key passed via ROCKSET_API_KEY,"
 | 
			
		||||
            err_msg += " GitHub Personal Access Token passed via GITHUB_TOKEN"
 | 
			
		||||
            err_msg += " and drci api key passed via DRCI_BOT_KEY environment variables"
 | 
			
		||||
            if (
 | 
			
		||||
                os.getenv("GITHUB_TOKEN") is None
 | 
			
		||||
                or os.getenv("ROCKSET_API_KEY") is None
 | 
			
		||||
                or os.getenv("DRCI_BOT_KEY") is None
 | 
			
		||||
            ):
 | 
			
		||||
            if os.getenv("GITHUB_TOKEN") is None or os.getenv("DRCI_BOT_KEY") is None:
 | 
			
		||||
                err_msg = (
 | 
			
		||||
                    "Failed to update cached queries as GITHUB_TOKEN or ROCKSET_API_KEY or DRCI_BOT_KEY "
 | 
			
		||||
                    "Failed to update cached queries as GITHUB_TOKEN or DRCI_BOT_KEY "
 | 
			
		||||
                    + "is not defined. "
 | 
			
		||||
                    + err_msg
 | 
			
		||||
                )
 | 
			
		||||
@ -110,16 +103,6 @@ def mocked_gh_graphql(query: str, **kwargs: Any) -> Any:
 | 
			
		||||
    return mock_query(gh_graphql_wrapper, GQL_MOCKS, key_function, query, kwargs)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def mocked_rockset_results(head_sha: str, merge_base: str, num_retries: int = 3) -> Any:
 | 
			
		||||
    return mock_query(
 | 
			
		||||
        get_rockset_results,
 | 
			
		||||
        ROCKSET_MOCKS,
 | 
			
		||||
        lambda x, y: f"{x} {y}",
 | 
			
		||||
        head_sha,
 | 
			
		||||
        merge_base,
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def mocked_drci_classifications(pr_num: int, project: str, num_retries: int = 3) -> Any:
 | 
			
		||||
    return mock_query(
 | 
			
		||||
        get_drci_classifications,
 | 
			
		||||
@ -273,10 +256,6 @@ def xla_merge_rules(repo: Any, org: str, project: str) -> List[MergeRule]:
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def empty_rockset_results(head_sha: str, merge_base: str) -> List[Dict[str, Any]]:
 | 
			
		||||
    return []
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DummyGitRepo(GitRepo):
 | 
			
		||||
    def __init__(self) -> None:
 | 
			
		||||
        super().__init__(get_git_repo_dir(), get_git_remote_name())
 | 
			
		||||
@ -288,7 +267,6 @@ class DummyGitRepo(GitRepo):
 | 
			
		||||
        return "super awsome commit message"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@mock.patch("trymerge.get_rockset_results", side_effect=empty_rockset_results)
 | 
			
		||||
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
 | 
			
		||||
@mock.patch(
 | 
			
		||||
    "trymerge.get_drci_classifications", side_effect=mocked_drci_classifications
 | 
			
		||||
@ -604,7 +582,6 @@ class TestTryMerge(TestCase):
 | 
			
		||||
            mocked_gh_fetch_merge_base.assert_called_once()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@mock.patch("trymerge.get_rockset_results", side_effect=mocked_rockset_results)
 | 
			
		||||
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
 | 
			
		||||
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
 | 
			
		||||
@mock.patch(
 | 
			
		||||
@ -843,7 +820,7 @@ class TestBypassFailures(TestCase):
 | 
			
		||||
        checks = pr.get_checkrun_conclusions()
 | 
			
		||||
 | 
			
		||||
        # Known flaky failure takes precedence over ignore current (need to set the
 | 
			
		||||
        # merge base here to get the results from Rockset, and that categorize the
 | 
			
		||||
        # merge base here to get the results from Dr. CI, and that categorize the
 | 
			
		||||
        # broken trunk failure too
 | 
			
		||||
        checks = get_classifications(
 | 
			
		||||
            pr.pr_num,
 | 
			
		||||
@ -929,7 +906,6 @@ class TestBypassFailures(TestCase):
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@mock.patch("trymerge.get_rockset_results", side_effect=mocked_rockset_results)
 | 
			
		||||
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
 | 
			
		||||
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
 | 
			
		||||
@mock.patch("trymerge.get_drci_classifications", return_value={})
 | 
			
		||||
@ -1008,7 +984,6 @@ class TestBypassFailuresOnSandCastle(TestCase):
 | 
			
		||||
        self.assertTrue(len(failed) == 2)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@mock.patch("trymerge.get_rockset_results", side_effect=mocked_rockset_results)
 | 
			
		||||
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
 | 
			
		||||
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
 | 
			
		||||
@mock.patch(
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										51
									
								
								.github/scripts/trymerge.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										51
									
								
								.github/scripts/trymerge.py
									
									
									
									
										vendored
									
									
								
							@ -452,8 +452,6 @@ RE_DIFF_REV = re.compile(r"^Differential Revision:.+?(D[0-9]+)", re.MULTILINE)
 | 
			
		||||
CIFLOW_LABEL = re.compile(r"^ciflow/.+")
 | 
			
		||||
CIFLOW_TRUNK_LABEL = re.compile(r"^ciflow/trunk")
 | 
			
		||||
MERGE_RULE_PATH = Path(".github") / "merge_rules.yaml"
 | 
			
		||||
ROCKSET_MERGES_COLLECTION = "merges"
 | 
			
		||||
ROCKSET_MERGES_WORKSPACE = "commons"
 | 
			
		||||
REMOTE_MAIN_BRANCH = "origin/main"
 | 
			
		||||
DRCI_CHECKRUN_NAME = "Dr.CI"
 | 
			
		||||
INTERNAL_CHANGES_CHECKRUN_NAME = "Meta Internal-Only Changes Check"
 | 
			
		||||
@ -1180,7 +1178,7 @@ class GitHubPR:
 | 
			
		||||
        merge_commit_sha = repo.rev_parse(name=self.default_branch())
 | 
			
		||||
 | 
			
		||||
        if comment_id and self.pr_num:
 | 
			
		||||
            # Finally, upload the record to Rockset. The list of pending and failed
 | 
			
		||||
            # Finally, upload the record to s3. The list of pending and failed
 | 
			
		||||
            # checks are at the time of the merge
 | 
			
		||||
            save_merge_record(
 | 
			
		||||
                comment_id=comment_id,
 | 
			
		||||
@ -1202,7 +1200,7 @@ class GitHubPR:
 | 
			
		||||
                ignore_current=bool(ignore_current_checks),
 | 
			
		||||
            )
 | 
			
		||||
        else:
 | 
			
		||||
            print("Missing comment ID or PR number, couldn't upload to Rockset")
 | 
			
		||||
            print("Missing comment ID or PR number, couldn't upload to s3")
 | 
			
		||||
 | 
			
		||||
        # Usually Github will see that the commit has "resolves <pr_num>" in the
 | 
			
		||||
        # commit message and close the PR, but sometimes it doesn't, leading to
 | 
			
		||||
@ -1481,7 +1479,7 @@ def find_matching_merge_rule(
 | 
			
		||||
 | 
			
		||||
        # Categorize all checks when skip_mandatory_checks (force merge) is set. Do it here
 | 
			
		||||
        # where the list of checks is readily available. These records will be saved into
 | 
			
		||||
        # Rockset merge records
 | 
			
		||||
        # s3 merge records
 | 
			
		||||
        (
 | 
			
		||||
            pending_mandatory_checks,
 | 
			
		||||
            failed_mandatory_checks,
 | 
			
		||||
@ -1568,7 +1566,7 @@ def save_merge_record(
 | 
			
		||||
    This saves the merge records as a json, which can later be uploaded to s3
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    # Prepare the record to be written into Rockset
 | 
			
		||||
    # Prepare the record to be written into s3
 | 
			
		||||
    data = [
 | 
			
		||||
        {
 | 
			
		||||
            "comment_id": comment_id,
 | 
			
		||||
@ -1590,7 +1588,8 @@ def save_merge_record(
 | 
			
		||||
            "ignore_current": ignore_current,
 | 
			
		||||
            "error": error,
 | 
			
		||||
            # This is a unique identifier for the record for deduping purposes
 | 
			
		||||
            # in rockset.  Any unique string would work
 | 
			
		||||
            # in Rockset.  Any unique string would work.  This will not be used
 | 
			
		||||
            # after we migrate off Rockset
 | 
			
		||||
            "_id": f"{project}-{pr_num}-{comment_id}-{os.environ.get('GITHUB_RUN_ID')}",
 | 
			
		||||
        }
 | 
			
		||||
    ]
 | 
			
		||||
@ -1600,36 +1599,6 @@ def save_merge_record(
 | 
			
		||||
        json.dump(data, f)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@retries_decorator(rc=[])
 | 
			
		||||
def get_rockset_results(head_sha: str, merge_base: str) -> List[Dict[str, Any]]:
 | 
			
		||||
    query = f"""
 | 
			
		||||
SELECT
 | 
			
		||||
    w.name as workflow_name,
 | 
			
		||||
    j.id,
 | 
			
		||||
    j.name,
 | 
			
		||||
    j.conclusion,
 | 
			
		||||
    j.completed_at,
 | 
			
		||||
    j.html_url,
 | 
			
		||||
    j.head_sha,
 | 
			
		||||
    j.torchci_classification.captures as failure_captures,
 | 
			
		||||
    LENGTH(j.steps) as steps,
 | 
			
		||||
FROM
 | 
			
		||||
    commons.workflow_job j join commons.workflow_run w on w.id = j.run_id
 | 
			
		||||
where
 | 
			
		||||
    j.head_sha in ('{head_sha}','{merge_base}')
 | 
			
		||||
"""
 | 
			
		||||
    try:
 | 
			
		||||
        import rockset  # type: ignore[import]
 | 
			
		||||
 | 
			
		||||
        res = rockset.RocksetClient(
 | 
			
		||||
            host="api.usw2a1.rockset.com", api_key=os.environ["ROCKSET_API_KEY"]
 | 
			
		||||
        ).sql(query)
 | 
			
		||||
        return cast(List[Dict[str, Any]], res.results)
 | 
			
		||||
    except ModuleNotFoundError:
 | 
			
		||||
        print("Could not use RockSet as rocket dependency is missing")
 | 
			
		||||
        return []
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@retries_decorator()
 | 
			
		||||
def get_drci_classifications(pr_num: int, project: str = "pytorch") -> Any:
 | 
			
		||||
    """
 | 
			
		||||
@ -2067,7 +2036,7 @@ def categorize_checks(
 | 
			
		||||
    pending_checks: List[Tuple[str, Optional[str], Optional[int]]] = []
 | 
			
		||||
    failed_checks: List[Tuple[str, Optional[str], Optional[int]]] = []
 | 
			
		||||
 | 
			
		||||
    # failed_checks_categorization is used to keep track of all ignorable failures when saving the merge record on Rockset
 | 
			
		||||
    # failed_checks_categorization is used to keep track of all ignorable failures when saving the merge record on s3
 | 
			
		||||
    failed_checks_categorization: Dict[str, List[Any]] = defaultdict(list)
 | 
			
		||||
 | 
			
		||||
    # If required_checks is not set or empty, consider all names are relevant
 | 
			
		||||
@ -2126,7 +2095,7 @@ def categorize_checks(
 | 
			
		||||
    ):
 | 
			
		||||
        failed_checks = failed_checks + flaky_or_broken_trunk
 | 
			
		||||
 | 
			
		||||
    # The list of failed_checks_categorization is returned so that it can be saved into the Rockset merge record
 | 
			
		||||
    # The list of failed_checks_categorization is returned so that it can be saved into the s3 merge record
 | 
			
		||||
    return (pending_checks, failed_checks, failed_checks_categorization)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -2410,7 +2379,7 @@ def main() -> None:
 | 
			
		||||
        handle_exception(e)
 | 
			
		||||
 | 
			
		||||
        if args.comment_id and args.pr_num:
 | 
			
		||||
            # Finally, upload the record to Rockset, we don't have access to the
 | 
			
		||||
            # Finally, upload the record to s3, we don't have access to the
 | 
			
		||||
            # list of pending and failed checks here, but they are not really
 | 
			
		||||
            # needed at the moment
 | 
			
		||||
            save_merge_record(
 | 
			
		||||
@ -2433,7 +2402,7 @@ def main() -> None:
 | 
			
		||||
                error=str(e),
 | 
			
		||||
            )
 | 
			
		||||
        else:
 | 
			
		||||
            print("Missing comment ID or PR number, couldn't upload to Rockset")
 | 
			
		||||
            print("Missing comment ID or PR number, couldn't upload to s3")
 | 
			
		||||
    finally:
 | 
			
		||||
        if not args.check_mergeability:
 | 
			
		||||
            gh_remove_label(
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										31
									
								
								.github/scripts/update_runner_determinator.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							
							
						
						
									
										31
									
								
								.github/scripts/update_runner_determinator.py
									
									
									
									
										vendored
									
									
										Executable file
									
								
							@ -0,0 +1,31 @@
 | 
			
		||||
#!/usr/bin/env python3
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Read the contents of runner_determinator.py
 | 
			
		||||
with open(".github/scripts/runner_determinator.py") as script_file:
 | 
			
		||||
    script_content = script_file.read()
 | 
			
		||||
 | 
			
		||||
# Indent the script content by 10 spaces to match destination indentation
 | 
			
		||||
indented_script_content = "\n".join(
 | 
			
		||||
    [" " * 10 + line if line else line for line in script_content.splitlines()]
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
# Read the contents of _runner-determinator.yml
 | 
			
		||||
with open(".github/workflows/_runner-determinator.yml") as yml_file:
 | 
			
		||||
    yml_content = yml_file.read()
 | 
			
		||||
 | 
			
		||||
# Replace the content between the markers
 | 
			
		||||
new_yml_content = re.sub(
 | 
			
		||||
    r"(cat <<EOF > runner_determinator.py\n)(.*?)(\n\s+EOF)",
 | 
			
		||||
    lambda match: match.group(1) + indented_script_content + match.group(3),
 | 
			
		||||
    yml_content,
 | 
			
		||||
    flags=re.DOTALL,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
# Save the modified content back to _runner-determinator.yml
 | 
			
		||||
with open(".github/workflows/_runner-determinator.yml", "w") as yml_file:
 | 
			
		||||
    yml_file.write(new_yml_content)
 | 
			
		||||
 | 
			
		||||
print("Updated _runner-determinator.yml with the contents of runner_determinator.py")
 | 
			
		||||
@ -68,6 +68,7 @@ jobs:
 | 
			
		||||
    needs: get-label-type
 | 
			
		||||
    with:!{{ upload.binary_env_as_input(config) }}
 | 
			
		||||
      {%- if "aarch64" in build_environment %}
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.arm64.m7g.4xlarge.ephemeral
 | 
			
		||||
      ALPINE_IMAGE: "arm64v8/alpine"
 | 
			
		||||
      {%- elif "s390x" in build_environment %}
 | 
			
		||||
@ -102,6 +103,7 @@ jobs:
 | 
			
		||||
      build_name: !{{ config["build_name"] }}
 | 
			
		||||
      build_environment: !{{ build_environment }}
 | 
			
		||||
      {%- if "aarch64" in build_environment %}
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.arm64.2xlarge
 | 
			
		||||
      ALPINE_IMAGE: "arm64v8/alpine"
 | 
			
		||||
      {%- elif "s390x" in build_environment %}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										8
									
								
								.github/workflows/_bazel-build-test.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								.github/workflows/_bazel-build-test.yml
									
									
									
									
										vendored
									
									
								
							@ -91,14 +91,14 @@ jobs:
 | 
			
		||||
        with:
 | 
			
		||||
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
 | 
			
		||||
 | 
			
		||||
      - name: Check if in a ARC runner
 | 
			
		||||
      - name: Check if in a container runner
 | 
			
		||||
        shell: bash
 | 
			
		||||
        id: check_arc_runner
 | 
			
		||||
        run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> "$GITHUB_OUTPUT"
 | 
			
		||||
        id: check_container_runner
 | 
			
		||||
        run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
 | 
			
		||||
 | 
			
		||||
      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
 | 
			
		||||
        uses: pytorch/test-infra/.github/actions/setup-nvidia@main
 | 
			
		||||
        if: ${{ inputs.cuda-version != 'cpu' && steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
 | 
			
		||||
        if: ${{ inputs.cuda-version != 'cpu' && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
 | 
			
		||||
 | 
			
		||||
      - name: Output disk space left
 | 
			
		||||
        run: |
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										64
									
								
								.github/workflows/_linux-build.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										64
									
								
								.github/workflows/_linux-build.yml
									
									
									
									
										vendored
									
									
								
							@ -109,6 +109,7 @@ jobs:
 | 
			
		||||
    steps:
 | 
			
		||||
      - name: Setup SSH (Click me for login details)
 | 
			
		||||
        uses: pytorch/test-infra/.github/actions/setup-ssh@main
 | 
			
		||||
        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
 | 
			
		||||
        with:
 | 
			
		||||
          github-secret: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
 | 
			
		||||
@ -118,13 +119,16 @@ jobs:
 | 
			
		||||
      # checkout. In other cases you should prefer a local checkout.
 | 
			
		||||
      - name: Checkout PyTorch
 | 
			
		||||
        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
 | 
			
		||||
        with:
 | 
			
		||||
          no-sudo: ${{ inputs.build-environment == 'linux-s390x-binary-manywheel' }}
 | 
			
		||||
 | 
			
		||||
      - name: Setup Linux
 | 
			
		||||
        uses: ./.github/actions/setup-linux
 | 
			
		||||
        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
 | 
			
		||||
 | 
			
		||||
      - name: configure aws credentials
 | 
			
		||||
        uses: aws-actions/configure-aws-credentials@v3
 | 
			
		||||
        if: ${{ inputs.aws-role-to-assume != '' }}
 | 
			
		||||
        if: ${{ inputs.aws-role-to-assume != '' && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
 | 
			
		||||
        with:
 | 
			
		||||
          role-to-assume: ${{ inputs.aws-role-to-assume }}
 | 
			
		||||
          role-session-name: gha-linux-build
 | 
			
		||||
@ -133,11 +137,13 @@ jobs:
 | 
			
		||||
      - name: Calculate docker image
 | 
			
		||||
        id: calculate-docker-image
 | 
			
		||||
        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
 | 
			
		||||
        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
 | 
			
		||||
        with:
 | 
			
		||||
          docker-image-name: ${{ inputs.docker-image-name }}
 | 
			
		||||
 | 
			
		||||
      - name: Use following to pull public copy of the image
 | 
			
		||||
        id: print-ghcr-mirror
 | 
			
		||||
        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
 | 
			
		||||
        env:
 | 
			
		||||
          ECR_DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
 | 
			
		||||
        shell: bash
 | 
			
		||||
@ -147,6 +153,7 @@ jobs:
 | 
			
		||||
 | 
			
		||||
      - name: Pull docker image
 | 
			
		||||
        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
 | 
			
		||||
        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
 | 
			
		||||
        with:
 | 
			
		||||
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
 | 
			
		||||
 | 
			
		||||
@ -174,6 +181,7 @@ jobs:
 | 
			
		||||
      - name: Download pytest cache
 | 
			
		||||
        uses: ./.github/actions/pytest-cache-download
 | 
			
		||||
        continue-on-error: true
 | 
			
		||||
        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
 | 
			
		||||
        with:
 | 
			
		||||
          cache_dir: .pytest_cache
 | 
			
		||||
          job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
 | 
			
		||||
@ -195,6 +203,7 @@ jobs:
 | 
			
		||||
          PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
 | 
			
		||||
          TORCH_CUDA_ARCH_LIST: ${{ inputs.cuda-arch-list }}
 | 
			
		||||
          DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
 | 
			
		||||
          DOCKER_IMAGE_S390X: ${{ inputs.docker-image-name }}
 | 
			
		||||
          XLA_CUDA: ${{ contains(inputs.build-environment, 'xla') && '0' || '' }}
 | 
			
		||||
          DEBUG: ${{ inputs.build-with-debug && '1' || '0' }}
 | 
			
		||||
          OUR_GITHUB_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
 | 
			
		||||
@ -202,7 +211,21 @@ jobs:
 | 
			
		||||
          SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
 | 
			
		||||
          USE_SPLIT_BUILD: ${{ inputs.use_split_build }}
 | 
			
		||||
        run: |
 | 
			
		||||
          if [[ ${BUILD_ENVIRONMENT} == *"s390x"* ]]; then
 | 
			
		||||
            JENKINS_USER=
 | 
			
		||||
            USED_IMAGE="${DOCKER_IMAGE_S390X}"
 | 
			
		||||
 | 
			
		||||
            # since some steps are skipped on s390x, if they are necessary, run them here
 | 
			
		||||
            env | grep '^GITHUB' >> "/tmp/github_env_${GITHUB_RUN_ID}"
 | 
			
		||||
            env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"
 | 
			
		||||
          else
 | 
			
		||||
            JENKINS_USER="--user jenkins"
 | 
			
		||||
            USED_IMAGE="${DOCKER_IMAGE}"
 | 
			
		||||
          fi
 | 
			
		||||
 | 
			
		||||
          # detached container should get cleaned up by teardown_ec2_linux
 | 
			
		||||
          # Used for JENKINS_USER, which can be empty
 | 
			
		||||
          # shellcheck disable=SC2086
 | 
			
		||||
          container_name=$(docker run \
 | 
			
		||||
            -e BUILD_ENVIRONMENT \
 | 
			
		||||
            -e MAX_JOBS="$(nproc --ignore=2)" \
 | 
			
		||||
@ -225,10 +248,10 @@ jobs:
 | 
			
		||||
            --cap-add=SYS_PTRACE \
 | 
			
		||||
            --tty \
 | 
			
		||||
            --detach \
 | 
			
		||||
            --user jenkins \
 | 
			
		||||
            ${JENKINS_USER} \
 | 
			
		||||
            -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
 | 
			
		||||
            -w /var/lib/jenkins/workspace \
 | 
			
		||||
            "${DOCKER_IMAGE}"
 | 
			
		||||
            "${USED_IMAGE}"
 | 
			
		||||
          )
 | 
			
		||||
          docker exec -t "${container_name}" sh -c '.ci/pytorch/build.sh'
 | 
			
		||||
 | 
			
		||||
@ -239,7 +262,7 @@ jobs:
 | 
			
		||||
 | 
			
		||||
      - name: Store PyTorch Build Artifacts on S3
 | 
			
		||||
        uses: seemethere/upload-artifact-s3@v5
 | 
			
		||||
        if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build
 | 
			
		||||
        if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build && inputs.build-environment != 'linux-s390x-binary-manywheel'
 | 
			
		||||
        with:
 | 
			
		||||
          name: ${{ inputs.build-environment }}
 | 
			
		||||
          retention-days: 14
 | 
			
		||||
@ -249,7 +272,7 @@ jobs:
 | 
			
		||||
 | 
			
		||||
      - name: Store PyTorch Build Artifacts on S3 for split build
 | 
			
		||||
        uses: seemethere/upload-artifact-s3@v5
 | 
			
		||||
        if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build
 | 
			
		||||
        if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build && inputs.build-environment != 'linux-s390x-binary-manywheel'
 | 
			
		||||
        with:
 | 
			
		||||
          name: ${{ inputs.build-environment }}-experimental-split-build
 | 
			
		||||
          retention-days: 14
 | 
			
		||||
@ -257,8 +280,26 @@ jobs:
 | 
			
		||||
          path: artifacts.zip
 | 
			
		||||
          s3-bucket: ${{ inputs.s3-bucket }}
 | 
			
		||||
 | 
			
		||||
      - name: Store PyTorch Build Artifacts for s390x
 | 
			
		||||
        uses: actions/upload-artifact@v3
 | 
			
		||||
        if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build && inputs.build-environment == 'linux-s390x-binary-manywheel'
 | 
			
		||||
        with:
 | 
			
		||||
          name: ${{ inputs.build-environment }}
 | 
			
		||||
          retention-days: 14
 | 
			
		||||
          if-no-files-found: error
 | 
			
		||||
          path: artifacts.zip
 | 
			
		||||
 | 
			
		||||
      - name: Store PyTorch Build Artifacts for s390x for split build
 | 
			
		||||
        uses: actions/upload-artifact@v3
 | 
			
		||||
        if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build && inputs.build-environment == 'linux-s390x-binary-manywheel'
 | 
			
		||||
        with:
 | 
			
		||||
          name: ${{ inputs.build-environment }}-experimental-split-build
 | 
			
		||||
          retention-days: 14
 | 
			
		||||
          if-no-files-found: error
 | 
			
		||||
          path: artifacts.zip
 | 
			
		||||
 | 
			
		||||
      - name: Upload sccache stats
 | 
			
		||||
        if: steps.build.outcome != 'skipped'
 | 
			
		||||
        if: steps.build.outcome != 'skipped' && inputs.build-environment != 'linux-s390x-binary-manywheel'
 | 
			
		||||
        uses: seemethere/upload-artifact-s3@v5
 | 
			
		||||
        with:
 | 
			
		||||
          s3-prefix: |
 | 
			
		||||
@ -270,4 +311,13 @@ jobs:
 | 
			
		||||
 | 
			
		||||
      - name: Teardown Linux
 | 
			
		||||
        uses: pytorch/test-infra/.github/actions/teardown-linux@main
 | 
			
		||||
        if: always()
 | 
			
		||||
        if: always() && inputs.build-environment != 'linux-s390x-binary-manywheel'
 | 
			
		||||
 | 
			
		||||
      - name: Cleanup docker
 | 
			
		||||
        if: always() && inputs.build-environment == 'linux-s390x-binary-manywheel'
 | 
			
		||||
        shell: bash
 | 
			
		||||
        run: |
 | 
			
		||||
          # on s390x stop the container for clean worker stop
 | 
			
		||||
          # ignore expansion of "docker ps -q" since it could be empty
 | 
			
		||||
          # shellcheck disable=SC2046
 | 
			
		||||
          docker stop $(docker ps -q) || true
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										27
									
								
								.github/workflows/_linux-test.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										27
									
								
								.github/workflows/_linux-test.yml
									
									
									
									
										vendored
									
									
								
							@ -114,22 +114,32 @@ jobs:
 | 
			
		||||
        with:
 | 
			
		||||
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
 | 
			
		||||
 | 
			
		||||
      - name: Check if in a ARC runner
 | 
			
		||||
      - name: Check if in a container runner
 | 
			
		||||
        shell: bash
 | 
			
		||||
        id: check_arc_runner
 | 
			
		||||
        run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> "$GITHUB_OUTPUT"
 | 
			
		||||
        id: check_container_runner
 | 
			
		||||
        run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
 | 
			
		||||
 | 
			
		||||
      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
 | 
			
		||||
        id: install-nvidia-driver
 | 
			
		||||
        uses: pytorch/test-infra/.github/actions/setup-nvidia@main
 | 
			
		||||
        if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
 | 
			
		||||
        if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
 | 
			
		||||
 | 
			
		||||
      - name: Setup GPU_FLAG for docker run
 | 
			
		||||
        id: setup-gpu-flag
 | 
			
		||||
        run: echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
 | 
			
		||||
        if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
 | 
			
		||||
 | 
			
		||||
      - name: Setup SCCACHE_SERVER_PORT environment for docker run when on container
 | 
			
		||||
        id: setup-sscache-port-flag
 | 
			
		||||
        run: echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"
 | 
			
		||||
        if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
 | 
			
		||||
 | 
			
		||||
      - name: Lock NVIDIA A100 40GB Frequency
 | 
			
		||||
        run: |
 | 
			
		||||
          sudo nvidia-smi -pm 1
 | 
			
		||||
          sudo nvidia-smi -ac 1215,1410
 | 
			
		||||
          nvidia-smi
 | 
			
		||||
        if: contains(matrix.runner, 'a100')
 | 
			
		||||
        if: ${{ contains(matrix.runner, 'a100') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
 | 
			
		||||
 | 
			
		||||
      - name: Start monitoring script
 | 
			
		||||
        id: monitor-script
 | 
			
		||||
@ -208,6 +218,7 @@ jobs:
 | 
			
		||||
          NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
 | 
			
		||||
          TD_DISTRIBUTED: ${{ steps.keep-going.outputs.ci-td-distributed }}
 | 
			
		||||
          SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
 | 
			
		||||
          SCCACHE_REGION: us-east-1
 | 
			
		||||
          SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
 | 
			
		||||
          SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
 | 
			
		||||
          DOCKER_IMAGE: ${{ inputs.docker-image }}
 | 
			
		||||
@ -218,6 +229,7 @@ jobs:
 | 
			
		||||
          DASHBOARD_TAG: ${{ inputs.dashboard-tag }}
 | 
			
		||||
          HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 | 
			
		||||
          SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
 | 
			
		||||
          IS_A100_RUNNER: ${{ contains(matrix.runner, 'a100') && '1' || '0' }}
 | 
			
		||||
 | 
			
		||||
        run: |
 | 
			
		||||
          set -x
 | 
			
		||||
@ -236,6 +248,7 @@ jobs:
 | 
			
		||||
          # shellcheck disable=SC2086,SC2090
 | 
			
		||||
          container_name=$(docker run \
 | 
			
		||||
            ${GPU_FLAG:-} \
 | 
			
		||||
            ${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \
 | 
			
		||||
            -e BUILD_ENVIRONMENT \
 | 
			
		||||
            -e PR_NUMBER \
 | 
			
		||||
            -e GITHUB_ACTIONS \
 | 
			
		||||
@ -265,6 +278,7 @@ jobs:
 | 
			
		||||
            -e PR_LABELS \
 | 
			
		||||
            -e MAX_JOBS="$(nproc --ignore=2)" \
 | 
			
		||||
            -e SCCACHE_BUCKET \
 | 
			
		||||
            -e SCCACHE_REGION \
 | 
			
		||||
            -e SCCACHE_S3_KEY_PREFIX \
 | 
			
		||||
            -e XLA_CUDA \
 | 
			
		||||
            -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
 | 
			
		||||
@ -274,6 +288,7 @@ jobs:
 | 
			
		||||
            -e HUGGING_FACE_HUB_TOKEN \
 | 
			
		||||
            -e SCRIBE_GRAPHQL_ACCESS_TOKEN \
 | 
			
		||||
            -e DASHBOARD_TAG \
 | 
			
		||||
            -e IS_A100_RUNNER \
 | 
			
		||||
            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
 | 
			
		||||
            --security-opt seccomp=unconfined \
 | 
			
		||||
            --cap-add=SYS_PTRACE \
 | 
			
		||||
@ -343,7 +358,7 @@ jobs:
 | 
			
		||||
 | 
			
		||||
      - name: Teardown Linux
 | 
			
		||||
        uses: pytorch/test-infra/.github/actions/teardown-linux@main
 | 
			
		||||
        if: always()
 | 
			
		||||
        if: always() && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false'
 | 
			
		||||
 | 
			
		||||
      # NB: We are currently having an intermittent GPU-related issue on G5 runners with
 | 
			
		||||
      # A10G GPU. Once this happens, trying to reset the GPU as done in setup-nvidia does
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										22
									
								
								.github/workflows/_mac-test-mps.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										22
									
								
								.github/workflows/_mac-test-mps.yml
									
									
									
									
										vendored
									
									
								
							@ -88,6 +88,13 @@ jobs:
 | 
			
		||||
          environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}
 | 
			
		||||
          pip-requirements-file: .github/requirements/pip-requirements-${{ runner.os }}.txt
 | 
			
		||||
 | 
			
		||||
      - name: Get workflow job id
 | 
			
		||||
        id: get-job-id
 | 
			
		||||
        uses: ./.github/actions/get-workflow-job-id
 | 
			
		||||
        if: always()
 | 
			
		||||
        with:
 | 
			
		||||
          github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
 | 
			
		||||
      - name: Install PyTorch and run MPS tests
 | 
			
		||||
        id: test
 | 
			
		||||
        env:
 | 
			
		||||
@ -103,6 +110,14 @@ jobs:
 | 
			
		||||
          NO_TEST_TIMEOUT: ${{ needs.filter.outputs.ci-no-test-timeout }}
 | 
			
		||||
          NO_TD: ${{ needs.filter.outputs.ci-no-td }}
 | 
			
		||||
          PIP_REQUIREMENTS_FILE: .github/requirements/pip-requirements-${{ runner.os }}.txt
 | 
			
		||||
          GITHUB_REPOSITORY: ${{ github.repository }}
 | 
			
		||||
          GITHUB_WORKFLOW: ${{ github.workflow }}
 | 
			
		||||
          GITHUB_JOB: ${{ github.job }}
 | 
			
		||||
          GITHUB_RUN_ID: ${{ github.run_id }}
 | 
			
		||||
          GITHUB_RUN_NUMBER: ${{ github.run_number }}
 | 
			
		||||
          GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
 | 
			
		||||
          JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
 | 
			
		||||
          JOB_NAME: ${{ steps.get-job-id.outputs.job-name }}
 | 
			
		||||
          REENABLED_ISSUES: ${{ needs.filter.outputs.reenabled-issues }}
 | 
			
		||||
        run: |
 | 
			
		||||
          # shellcheck disable=SC1090
 | 
			
		||||
@ -144,13 +159,6 @@ jobs:
 | 
			
		||||
        run: |
 | 
			
		||||
          cat test/**/*_toprint.log || true
 | 
			
		||||
 | 
			
		||||
      - name: Get workflow job id
 | 
			
		||||
        id: get-job-id
 | 
			
		||||
        uses: ./.github/actions/get-workflow-job-id
 | 
			
		||||
        if: always()
 | 
			
		||||
        with:
 | 
			
		||||
          github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
 | 
			
		||||
      - name: Upload test artifacts
 | 
			
		||||
        uses: ./.github/actions/upload-test-artifacts
 | 
			
		||||
        if: always() && steps.test.conclusion && steps.test.conclusion != 'skipped'
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										67
									
								
								.github/workflows/_runner-determinator.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										67
									
								
								.github/workflows/_runner-determinator.yml
									
									
									
									
										vendored
									
									
								
							@ -59,6 +59,10 @@ jobs:
 | 
			
		||||
          cat <<EOF > runner_determinator.py
 | 
			
		||||
          # flake8: noqa: G004
 | 
			
		||||
 | 
			
		||||
          # Note: Copies of this script in runner_determinator.py and _runner-determinator.yml
 | 
			
		||||
          #       must be kept in sync. You can do it easily by running the following command:
 | 
			
		||||
          #           python .github/scripts/update_runner_determinator.py
 | 
			
		||||
 | 
			
		||||
          """
 | 
			
		||||
          This runner determinator is used to determine which set of runners to run a
 | 
			
		||||
          GitHub job on. It uses the first comment of a GitHub issue (by default
 | 
			
		||||
@ -138,6 +142,9 @@ jobs:
 | 
			
		||||
              rollout_perc: float = (
 | 
			
		||||
                  0  # Percentage of workflows to experiment on when user is not opted-in.
 | 
			
		||||
              )
 | 
			
		||||
              all_branches: bool = (
 | 
			
		||||
                  False  # If True, the experiment is also enabled on the exception branches
 | 
			
		||||
              )
 | 
			
		||||
 | 
			
		||||
              # Add more fields as needed
 | 
			
		||||
 | 
			
		||||
@ -271,7 +278,7 @@ jobs:
 | 
			
		||||
 | 
			
		||||
          def is_exception_branch(branch: str) -> bool:
 | 
			
		||||
              """
 | 
			
		||||
              Branches that get opted out of all experiments and should always use Meta runners
 | 
			
		||||
              Branches that get opted out of experiments by default, until they're explicitly enabled.
 | 
			
		||||
              """
 | 
			
		||||
              return branch.split("/")[0] in {"main", "nightly", "release", "landchecks"}
 | 
			
		||||
 | 
			
		||||
@ -397,7 +404,10 @@ jobs:
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
          def get_runner_prefix(
 | 
			
		||||
              rollout_state: str, workflow_requestors: Iterable[str], is_canary: bool = False
 | 
			
		||||
              rollout_state: str,
 | 
			
		||||
              workflow_requestors: Iterable[str],
 | 
			
		||||
              branch: str,
 | 
			
		||||
              is_canary: bool = False,
 | 
			
		||||
          ) -> str:
 | 
			
		||||
              settings = parse_settings(rollout_state)
 | 
			
		||||
              user_optins = parse_users(rollout_state)
 | 
			
		||||
@ -407,6 +417,12 @@ jobs:
 | 
			
		||||
              for experiment_name, experiment_settings in settings.experiments.items():
 | 
			
		||||
                  enabled = False
 | 
			
		||||
 | 
			
		||||
                  if not experiment_settings.all_branches and is_exception_branch(branch):
 | 
			
		||||
                      log.info(
 | 
			
		||||
                          f"Branch {branch} is an exception branch. Not enabling experiment {experiment_name}."
 | 
			
		||||
                      )
 | 
			
		||||
                      continue
 | 
			
		||||
 | 
			
		||||
                  # Is any workflow_requestor opted in to this experiment?
 | 
			
		||||
                  opted_in_users = [
 | 
			
		||||
                      requestor
 | 
			
		||||
@ -466,35 +482,34 @@ jobs:
 | 
			
		||||
          def main() -> None:
 | 
			
		||||
              args = parse_args()
 | 
			
		||||
 | 
			
		||||
              if args.github_ref_type == "branch" and is_exception_branch(args.github_branch):
 | 
			
		||||
                  log.info(
 | 
			
		||||
                      f"Exception branch: '{args.github_branch}', using Meta runners and no experiments."
 | 
			
		||||
              runner_label_prefix = DEFAULT_LABEL_PREFIX
 | 
			
		||||
 | 
			
		||||
              try:
 | 
			
		||||
                  rollout_state = get_rollout_state_from_issue(
 | 
			
		||||
                      args.github_token, args.github_issue_repo, args.github_issue
 | 
			
		||||
                  )
 | 
			
		||||
                  runner_label_prefix = DEFAULT_LABEL_PREFIX
 | 
			
		||||
              else:
 | 
			
		||||
                  try:
 | 
			
		||||
                      rollout_state = get_rollout_state_from_issue(
 | 
			
		||||
                          args.github_token, args.github_issue_repo, args.github_issue
 | 
			
		||||
                      )
 | 
			
		||||
 | 
			
		||||
                      username = get_potential_pr_author(
 | 
			
		||||
                          args.github_token,
 | 
			
		||||
                          args.github_repo,
 | 
			
		||||
                          args.github_actor,
 | 
			
		||||
                          args.github_ref_type,
 | 
			
		||||
                          args.github_branch,
 | 
			
		||||
                      )
 | 
			
		||||
                  username = get_potential_pr_author(
 | 
			
		||||
                      args.github_token,
 | 
			
		||||
                      args.github_repo,
 | 
			
		||||
                      args.github_actor,
 | 
			
		||||
                      args.github_ref_type,
 | 
			
		||||
                      args.github_branch,
 | 
			
		||||
                  )
 | 
			
		||||
 | 
			
		||||
                      is_canary = args.github_repo == "pytorch/pytorch-canary"
 | 
			
		||||
                  is_canary = args.github_repo == "pytorch/pytorch-canary"
 | 
			
		||||
 | 
			
		||||
                      runner_label_prefix = get_runner_prefix(
 | 
			
		||||
                          rollout_state, (args.github_issue_owner, username), is_canary
 | 
			
		||||
                      )
 | 
			
		||||
                  runner_label_prefix = get_runner_prefix(
 | 
			
		||||
                      rollout_state,
 | 
			
		||||
                      (args.github_issue_owner, username),
 | 
			
		||||
                      args.github_branch,
 | 
			
		||||
                      is_canary,
 | 
			
		||||
                  )
 | 
			
		||||
 | 
			
		||||
                  except Exception as e:
 | 
			
		||||
                      log.error(
 | 
			
		||||
                          f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
 | 
			
		||||
                      )
 | 
			
		||||
              except Exception as e:
 | 
			
		||||
                  log.error(
 | 
			
		||||
                      f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
 | 
			
		||||
                  )
 | 
			
		||||
 | 
			
		||||
              set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, runner_label_prefix)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										2
									
								
								.github/workflows/_win-test.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/_win-test.yml
									
									
									
									
										vendored
									
									
								
							@ -189,7 +189,7 @@ jobs:
 | 
			
		||||
        run: |
 | 
			
		||||
          pushd "${PYTORCH_FINAL_PACKAGE_DIR}"
 | 
			
		||||
          # shellcheck disable=SC2046,SC2102
 | 
			
		||||
          python3 -mpip install $(echo *.whl)[opt-einsum,optree] optree==0.12.1
 | 
			
		||||
          python3 -mpip install $(echo *.whl)[opt-einsum,optree] optree==0.13.0
 | 
			
		||||
          popd
 | 
			
		||||
 | 
			
		||||
          .ci/pytorch/win-test.sh
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										2
									
								
								.github/workflows/build-conda-images.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/build-conda-images.yml
									
									
									
									
										vendored
									
									
								
							@ -32,7 +32,7 @@ concurrency:
 | 
			
		||||
jobs:
 | 
			
		||||
  build-docker:
 | 
			
		||||
    environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
 | 
			
		||||
    runs-on: am2.linux.9xlarge.ephemeral
 | 
			
		||||
    runs-on: linux.9xlarge.ephemeral
 | 
			
		||||
    strategy:
 | 
			
		||||
      matrix:
 | 
			
		||||
        cuda_version: ["11.8", "12.1", "12.4", "cpu"]
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										6
									
								
								.github/workflows/build-manywheel-images.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/workflows/build-manywheel-images.yml
									
									
									
									
										vendored
									
									
								
							@ -45,7 +45,7 @@ jobs:
 | 
			
		||||
  build-docker-cuda:
 | 
			
		||||
    environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
 | 
			
		||||
    needs: get-label-type
 | 
			
		||||
    runs-on: "${{ needs.get-label-type.outputs.label-type }}am2.linux.9xlarge.ephemeral"
 | 
			
		||||
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
 | 
			
		||||
    strategy:
 | 
			
		||||
      matrix:
 | 
			
		||||
        cuda_version: ["12.4", "12.1", "11.8"]
 | 
			
		||||
@ -156,7 +156,7 @@ jobs:
 | 
			
		||||
  build-docker-rocm:
 | 
			
		||||
    environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
 | 
			
		||||
    needs: get-label-type
 | 
			
		||||
    runs-on: "${{ needs.get-label-type.outputs.label-type }}am2.linux.9xlarge.ephemeral"
 | 
			
		||||
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
 | 
			
		||||
    strategy:
 | 
			
		||||
      matrix:
 | 
			
		||||
        rocm_version: ["6.1", "6.2"]
 | 
			
		||||
@ -192,7 +192,7 @@ jobs:
 | 
			
		||||
  build-docker-cpu:
 | 
			
		||||
    environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
 | 
			
		||||
    needs: get-label-type
 | 
			
		||||
    runs-on: "${{ needs.get-label-type.outputs.label-type }}am2.linux.9xlarge.ephemeral"
 | 
			
		||||
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
 | 
			
		||||
    steps:
 | 
			
		||||
      - name: Checkout PyTorch
 | 
			
		||||
        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										7
									
								
								.github/workflows/build-triton-wheel.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										7
									
								
								.github/workflows/build-triton-wheel.yml
									
									
									
									
										vendored
									
									
								
							@ -43,7 +43,7 @@ jobs:
 | 
			
		||||
    strategy:
 | 
			
		||||
      fail-fast: false
 | 
			
		||||
      matrix:
 | 
			
		||||
        py_vers: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
 | 
			
		||||
        py_vers: [ "3.9", "3.10", "3.11", "3.12" ]
 | 
			
		||||
        device: ["cuda", "rocm", "xpu"]
 | 
			
		||||
        include:
 | 
			
		||||
          - device: "rocm"
 | 
			
		||||
@ -91,9 +91,6 @@ jobs:
 | 
			
		||||
 | 
			
		||||
          # Determine python executable for given version
 | 
			
		||||
          case $PY_VERS in
 | 
			
		||||
          3.8)
 | 
			
		||||
            PYTHON_EXECUTABLE=/opt/python/cp38-cp38/bin/python
 | 
			
		||||
            ;;
 | 
			
		||||
          3.9)
 | 
			
		||||
            PYTHON_EXECUTABLE=/opt/python/cp39-cp39/bin/python
 | 
			
		||||
            ;;
 | 
			
		||||
@ -214,7 +211,7 @@ jobs:
 | 
			
		||||
    strategy:
 | 
			
		||||
      fail-fast: false
 | 
			
		||||
      matrix:
 | 
			
		||||
        py_vers: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
 | 
			
		||||
        py_vers: [ "3.9", "3.10", "3.11", "3.12" ]
 | 
			
		||||
    timeout-minutes: 40
 | 
			
		||||
    env:
 | 
			
		||||
      DOCKER_IMAGE: pytorch/conda-builder:cpu
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										3
									
								
								.github/workflows/check-labels.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.github/workflows/check-labels.yml
									
									
									
									
										vendored
									
									
								
							@ -30,6 +30,9 @@ concurrency:
 | 
			
		||||
 | 
			
		||||
jobs:
 | 
			
		||||
  check-labels:
 | 
			
		||||
    permissions:
 | 
			
		||||
      contents: read
 | 
			
		||||
      pull-requests: write
 | 
			
		||||
    name: Check labels
 | 
			
		||||
    if: github.repository_owner == 'pytorch'
 | 
			
		||||
    runs-on: linux.20_04.4x
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										5
									
								
								.github/workflows/docker-builds.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										5
									
								
								.github/workflows/docker-builds.yml
									
									
									
									
										vendored
									
									
								
							@ -67,6 +67,7 @@ jobs:
 | 
			
		||||
          pytorch-linux-jammy-py3.12-halide,
 | 
			
		||||
          pytorch-linux-jammy-xpu-2024.0-py3,
 | 
			
		||||
          pytorch-linux-jammy-py3-clang15-asan,
 | 
			
		||||
          pytorch-linux-jammy-py3-clang18-asan,
 | 
			
		||||
          pytorch-linux-focal-py3-clang10-onnx,
 | 
			
		||||
          pytorch-linux-focal-linter,
 | 
			
		||||
          pytorch-linux-jammy-cuda11.8-cudnn9-py3.9-linter,
 | 
			
		||||
@ -78,7 +79,9 @@ jobs:
 | 
			
		||||
          - docker-image-name: pytorch-linux-jammy-aarch64-py3.10-gcc11-inductor-benchmarks
 | 
			
		||||
            runner: linux.arm64.m7g.4xlarge
 | 
			
		||||
            timeout-minutes: 600
 | 
			
		||||
    runs-on: "${{ needs.get-label-type.outputs.label-type }}${{ matrix.runner }}"
 | 
			
		||||
    # Docker uploads fail from LF runners, see https://github.com/pytorch/pytorch/pull/137358
 | 
			
		||||
    # runs-on: "${{ needs.get-label-type.outputs.label-type }}${{ matrix.runner }}"
 | 
			
		||||
    runs-on: "${{ matrix.runner }}"
 | 
			
		||||
    env:
 | 
			
		||||
      DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/${{ matrix.docker-image-name }}
 | 
			
		||||
    steps:
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										12
									
								
								.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										12
									
								
								.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							@ -60,6 +60,7 @@ jobs:
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.9"
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.arm64.m7g.4xlarge.ephemeral
 | 
			
		||||
      ALPINE_IMAGE: "arm64v8/alpine"
 | 
			
		||||
      build_name: manywheel-py3_9-cpu-aarch64
 | 
			
		||||
@ -86,6 +87,7 @@ jobs:
 | 
			
		||||
      DESIRED_PYTHON: "3.9"
 | 
			
		||||
      build_name: manywheel-py3_9-cpu-aarch64
 | 
			
		||||
      build_environment: linux-aarch64-binary-manywheel
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.arm64.2xlarge
 | 
			
		||||
      ALPINE_IMAGE: "arm64v8/alpine"
 | 
			
		||||
    secrets:
 | 
			
		||||
@ -130,6 +132,7 @@ jobs:
 | 
			
		||||
      DESIRED_DEVTOOLSET: cxx11-abi
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.9"
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.arm64.m7g.4xlarge.ephemeral
 | 
			
		||||
      ALPINE_IMAGE: "arm64v8/alpine"
 | 
			
		||||
      build_name: manywheel-py3_9-cuda-aarch64
 | 
			
		||||
@ -177,6 +180,7 @@ jobs:
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.10"
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.arm64.m7g.4xlarge.ephemeral
 | 
			
		||||
      ALPINE_IMAGE: "arm64v8/alpine"
 | 
			
		||||
      build_name: manywheel-py3_10-cpu-aarch64
 | 
			
		||||
@ -203,6 +207,7 @@ jobs:
 | 
			
		||||
      DESIRED_PYTHON: "3.10"
 | 
			
		||||
      build_name: manywheel-py3_10-cpu-aarch64
 | 
			
		||||
      build_environment: linux-aarch64-binary-manywheel
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.arm64.2xlarge
 | 
			
		||||
      ALPINE_IMAGE: "arm64v8/alpine"
 | 
			
		||||
    secrets:
 | 
			
		||||
@ -247,6 +252,7 @@ jobs:
 | 
			
		||||
      DESIRED_DEVTOOLSET: cxx11-abi
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.10"
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.arm64.m7g.4xlarge.ephemeral
 | 
			
		||||
      ALPINE_IMAGE: "arm64v8/alpine"
 | 
			
		||||
      build_name: manywheel-py3_10-cuda-aarch64
 | 
			
		||||
@ -294,6 +300,7 @@ jobs:
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.11"
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.arm64.m7g.4xlarge.ephemeral
 | 
			
		||||
      ALPINE_IMAGE: "arm64v8/alpine"
 | 
			
		||||
      build_name: manywheel-py3_11-cpu-aarch64
 | 
			
		||||
@ -320,6 +327,7 @@ jobs:
 | 
			
		||||
      DESIRED_PYTHON: "3.11"
 | 
			
		||||
      build_name: manywheel-py3_11-cpu-aarch64
 | 
			
		||||
      build_environment: linux-aarch64-binary-manywheel
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.arm64.2xlarge
 | 
			
		||||
      ALPINE_IMAGE: "arm64v8/alpine"
 | 
			
		||||
    secrets:
 | 
			
		||||
@ -364,6 +372,7 @@ jobs:
 | 
			
		||||
      DESIRED_DEVTOOLSET: cxx11-abi
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.11"
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.arm64.m7g.4xlarge.ephemeral
 | 
			
		||||
      ALPINE_IMAGE: "arm64v8/alpine"
 | 
			
		||||
      build_name: manywheel-py3_11-cuda-aarch64
 | 
			
		||||
@ -411,6 +420,7 @@ jobs:
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.12"
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.arm64.m7g.4xlarge.ephemeral
 | 
			
		||||
      ALPINE_IMAGE: "arm64v8/alpine"
 | 
			
		||||
      build_name: manywheel-py3_12-cpu-aarch64
 | 
			
		||||
@ -437,6 +447,7 @@ jobs:
 | 
			
		||||
      DESIRED_PYTHON: "3.12"
 | 
			
		||||
      build_name: manywheel-py3_12-cpu-aarch64
 | 
			
		||||
      build_environment: linux-aarch64-binary-manywheel
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.arm64.2xlarge
 | 
			
		||||
      ALPINE_IMAGE: "arm64v8/alpine"
 | 
			
		||||
    secrets:
 | 
			
		||||
@ -481,6 +492,7 @@ jobs:
 | 
			
		||||
      DESIRED_DEVTOOLSET: cxx11-abi
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.12"
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.arm64.m7g.4xlarge.ephemeral
 | 
			
		||||
      ALPINE_IMAGE: "arm64v8/alpine"
 | 
			
		||||
      build_name: manywheel-py3_12-cuda-aarch64
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										350
									
								
								.github/workflows/generated-linux-binary-manywheel-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										350
									
								
								.github/workflows/generated-linux-binary-manywheel-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							@ -3324,3 +3324,353 @@ jobs:
 | 
			
		||||
      conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
 | 
			
		||||
      conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
 | 
			
		||||
    uses: ./.github/workflows/_binary-upload.yml
 | 
			
		||||
 | 
			
		||||
  manywheel-py3_13t-cpu-build:
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    uses: ./.github/workflows/_binary-build-linux.yml
 | 
			
		||||
    needs: get-label-type
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cpu
 | 
			
		||||
      GPU_ARCH_TYPE: cpu
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      build_name: manywheel-py3_13t-cpu
 | 
			
		||||
      build_environment: linux-binary-manywheel
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
  manywheel-py3_13t-cpu-test:  # Testing
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    needs:
 | 
			
		||||
      - manywheel-py3_13t-cpu-build
 | 
			
		||||
      - get-label-type
 | 
			
		||||
    uses: ./.github/workflows/_binary-test-linux.yml
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cpu
 | 
			
		||||
      GPU_ARCH_TYPE: cpu
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      build_name: manywheel-py3_13t-cpu
 | 
			
		||||
      build_environment: linux-binary-manywheel
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.4xlarge
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
  manywheel-py3_13t-cpu-upload:  # Uploading
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    permissions:
 | 
			
		||||
      id-token: write
 | 
			
		||||
      contents: read
 | 
			
		||||
    needs: manywheel-py3_13t-cpu-test
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cpu
 | 
			
		||||
      GPU_ARCH_TYPE: cpu
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      build_name: manywheel-py3_13t-cpu
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
      conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
 | 
			
		||||
      conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
 | 
			
		||||
    uses: ./.github/workflows/_binary-upload.yml
 | 
			
		||||
 | 
			
		||||
  manywheel-py3_13t-cpu-cxx11-abi-build:
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    uses: ./.github/workflows/_binary-build-linux.yml
 | 
			
		||||
    needs: get-label-type
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cpu-cxx11-abi
 | 
			
		||||
      GPU_ARCH_TYPE: cpu-cxx11-abi
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-main
 | 
			
		||||
      DESIRED_DEVTOOLSET: cxx11-abi
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      build_name: manywheel-py3_13t-cpu-cxx11-abi
 | 
			
		||||
      build_environment: linux-binary-manywheel
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
  manywheel-py3_13t-cpu-cxx11-abi-test:  # Testing
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    needs:
 | 
			
		||||
      - manywheel-py3_13t-cpu-cxx11-abi-build
 | 
			
		||||
      - get-label-type
 | 
			
		||||
    uses: ./.github/workflows/_binary-test-linux.yml
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cpu-cxx11-abi
 | 
			
		||||
      GPU_ARCH_TYPE: cpu-cxx11-abi
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-main
 | 
			
		||||
      DESIRED_DEVTOOLSET: cxx11-abi
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      build_name: manywheel-py3_13t-cpu-cxx11-abi
 | 
			
		||||
      build_environment: linux-binary-manywheel
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.4xlarge
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
  manywheel-py3_13t-cpu-cxx11-abi-upload:  # Uploading
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    permissions:
 | 
			
		||||
      id-token: write
 | 
			
		||||
      contents: read
 | 
			
		||||
    needs: manywheel-py3_13t-cpu-cxx11-abi-test
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cpu-cxx11-abi
 | 
			
		||||
      GPU_ARCH_TYPE: cpu-cxx11-abi
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-main
 | 
			
		||||
      DESIRED_DEVTOOLSET: cxx11-abi
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      build_name: manywheel-py3_13t-cpu-cxx11-abi
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
      conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
 | 
			
		||||
      conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
 | 
			
		||||
    uses: ./.github/workflows/_binary-upload.yml
 | 
			
		||||
 | 
			
		||||
  manywheel-py3_13t-cuda11_8-build:
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    uses: ./.github/workflows/_binary-build-linux.yml
 | 
			
		||||
    needs: get-label-type
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cu118
 | 
			
		||||
      GPU_ARCH_VERSION: 11.8
 | 
			
		||||
      GPU_ARCH_TYPE: cuda
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      build_name: manywheel-py3_13t-cuda11_8
 | 
			
		||||
      build_environment: linux-binary-manywheel
 | 
			
		||||
      PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
  manywheel-py3_13t-cuda11_8-test:  # Testing
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    needs:
 | 
			
		||||
      - manywheel-py3_13t-cuda11_8-build
 | 
			
		||||
      - get-label-type
 | 
			
		||||
    uses: ./.github/workflows/_binary-test-linux.yml
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cu118
 | 
			
		||||
      GPU_ARCH_VERSION: 11.8
 | 
			
		||||
      GPU_ARCH_TYPE: cuda
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      build_name: manywheel-py3_13t-cuda11_8
 | 
			
		||||
      build_environment: linux-binary-manywheel
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.4xlarge.nvidia.gpu
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
  manywheel-py3_13t-cuda11_8-upload:  # Uploading
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    permissions:
 | 
			
		||||
      id-token: write
 | 
			
		||||
      contents: read
 | 
			
		||||
    needs: manywheel-py3_13t-cuda11_8-test
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cu118
 | 
			
		||||
      GPU_ARCH_VERSION: 11.8
 | 
			
		||||
      GPU_ARCH_TYPE: cuda
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      build_name: manywheel-py3_13t-cuda11_8
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
      conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
 | 
			
		||||
      conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
 | 
			
		||||
    uses: ./.github/workflows/_binary-upload.yml
 | 
			
		||||
 | 
			
		||||
  manywheel-py3_13t-cuda12_1-build:
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    uses: ./.github/workflows/_binary-build-linux.yml
 | 
			
		||||
    needs: get-label-type
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cu121
 | 
			
		||||
      GPU_ARCH_VERSION: 12.1
 | 
			
		||||
      GPU_ARCH_TYPE: cuda
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      build_name: manywheel-py3_13t-cuda12_1
 | 
			
		||||
      build_environment: linux-binary-manywheel
 | 
			
		||||
      PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
  manywheel-py3_13t-cuda12_1-test:  # Testing
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    needs:
 | 
			
		||||
      - manywheel-py3_13t-cuda12_1-build
 | 
			
		||||
      - get-label-type
 | 
			
		||||
    uses: ./.github/workflows/_binary-test-linux.yml
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cu121
 | 
			
		||||
      GPU_ARCH_VERSION: 12.1
 | 
			
		||||
      GPU_ARCH_TYPE: cuda
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      build_name: manywheel-py3_13t-cuda12_1
 | 
			
		||||
      build_environment: linux-binary-manywheel
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.4xlarge.nvidia.gpu
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
  manywheel-py3_13t-cuda12_1-upload:  # Uploading
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    permissions:
 | 
			
		||||
      id-token: write
 | 
			
		||||
      contents: read
 | 
			
		||||
    needs: manywheel-py3_13t-cuda12_1-test
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cu121
 | 
			
		||||
      GPU_ARCH_VERSION: 12.1
 | 
			
		||||
      GPU_ARCH_TYPE: cuda
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      build_name: manywheel-py3_13t-cuda12_1
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
      conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
 | 
			
		||||
      conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
 | 
			
		||||
    uses: ./.github/workflows/_binary-upload.yml
 | 
			
		||||
 | 
			
		||||
  manywheel-py3_13t-cuda12_4-build:
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    uses: ./.github/workflows/_binary-build-linux.yml
 | 
			
		||||
    needs: get-label-type
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cu124
 | 
			
		||||
      GPU_ARCH_VERSION: 12.4
 | 
			
		||||
      GPU_ARCH_TYPE: cuda
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      build_name: manywheel-py3_13t-cuda12_4
 | 
			
		||||
      build_environment: linux-binary-manywheel
 | 
			
		||||
      PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
  manywheel-py3_13t-cuda12_4-test:  # Testing
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    needs:
 | 
			
		||||
      - manywheel-py3_13t-cuda12_4-build
 | 
			
		||||
      - get-label-type
 | 
			
		||||
    uses: ./.github/workflows/_binary-test-linux.yml
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cu124
 | 
			
		||||
      GPU_ARCH_VERSION: 12.4
 | 
			
		||||
      GPU_ARCH_TYPE: cuda
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      build_name: manywheel-py3_13t-cuda12_4
 | 
			
		||||
      build_environment: linux-binary-manywheel
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.4xlarge.nvidia.gpu
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
  manywheel-py3_13t-cuda12_4-upload:  # Uploading
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    permissions:
 | 
			
		||||
      id-token: write
 | 
			
		||||
      contents: read
 | 
			
		||||
    needs: manywheel-py3_13t-cuda12_4-test
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cu124
 | 
			
		||||
      GPU_ARCH_VERSION: 12.4
 | 
			
		||||
      GPU_ARCH_TYPE: cuda
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
 | 
			
		||||
      use_split_build: False
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      build_name: manywheel-py3_13t-cuda12_4
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
      conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
 | 
			
		||||
      conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
 | 
			
		||||
    uses: ./.github/workflows/_binary-upload.yml
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										280
									
								
								.github/workflows/generated-linux-binary-manywheel-split-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										280
									
								
								.github/workflows/generated-linux-binary-manywheel-split-nightly.yml
									
									
									
										generated
									
									
										vendored
									
									
								
							@ -1514,3 +1514,283 @@ jobs:
 | 
			
		||||
      conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
 | 
			
		||||
      conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
 | 
			
		||||
    uses: ./.github/workflows/_binary-upload.yml
 | 
			
		||||
 | 
			
		||||
  manywheel-py3_13t-cuda11_8-build:
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    uses: ./.github/workflows/_binary-build-linux.yml
 | 
			
		||||
    needs: get-label-type
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cu118
 | 
			
		||||
      GPU_ARCH_VERSION: 11.8
 | 
			
		||||
      GPU_ARCH_TYPE: cuda
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
 | 
			
		||||
      use_split_build: True
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      build_name: manywheel-py3_13t-cuda11_8
 | 
			
		||||
      build_environment: linux-binary-manywheel-split
 | 
			
		||||
      PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
  manywheel-py3_13t-cuda11_8-test:  # Testing
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    needs:
 | 
			
		||||
      - manywheel-py3_13t-cuda11_8-build
 | 
			
		||||
      - get-label-type
 | 
			
		||||
    uses: ./.github/workflows/_binary-test-linux.yml
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cu118
 | 
			
		||||
      GPU_ARCH_VERSION: 11.8
 | 
			
		||||
      GPU_ARCH_TYPE: cuda
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
 | 
			
		||||
      use_split_build: True
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      build_name: manywheel-py3_13t-cuda11_8
 | 
			
		||||
      build_environment: linux-binary-manywheel-split
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.4xlarge.nvidia.gpu
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
  manywheel-py3_13t-cuda11_8-upload:  # Uploading
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    permissions:
 | 
			
		||||
      id-token: write
 | 
			
		||||
      contents: read
 | 
			
		||||
    needs: manywheel-py3_13t-cuda11_8-test
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cu118
 | 
			
		||||
      GPU_ARCH_VERSION: 11.8
 | 
			
		||||
      GPU_ARCH_TYPE: cuda
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
 | 
			
		||||
      use_split_build: True
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      build_name: manywheel-py3_13t-cuda11_8
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
      conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
 | 
			
		||||
      conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
 | 
			
		||||
    uses: ./.github/workflows/_binary-upload.yml
 | 
			
		||||
 | 
			
		||||
  manywheel-py3_13t-cuda12_1-build:
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    uses: ./.github/workflows/_binary-build-linux.yml
 | 
			
		||||
    needs: get-label-type
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cu121
 | 
			
		||||
      GPU_ARCH_VERSION: 12.1
 | 
			
		||||
      GPU_ARCH_TYPE: cuda
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
 | 
			
		||||
      use_split_build: True
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      build_name: manywheel-py3_13t-cuda12_1
 | 
			
		||||
      build_environment: linux-binary-manywheel-split
 | 
			
		||||
      PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
  manywheel-py3_13t-cuda12_1-test:  # Testing
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    needs:
 | 
			
		||||
      - manywheel-py3_13t-cuda12_1-build
 | 
			
		||||
      - get-label-type
 | 
			
		||||
    uses: ./.github/workflows/_binary-test-linux.yml
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cu121
 | 
			
		||||
      GPU_ARCH_VERSION: 12.1
 | 
			
		||||
      GPU_ARCH_TYPE: cuda
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
 | 
			
		||||
      use_split_build: True
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      build_name: manywheel-py3_13t-cuda12_1
 | 
			
		||||
      build_environment: linux-binary-manywheel-split
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.4xlarge.nvidia.gpu
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
  manywheel-py3_13t-cuda12_1-upload:  # Uploading
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    permissions:
 | 
			
		||||
      id-token: write
 | 
			
		||||
      contents: read
 | 
			
		||||
    needs: manywheel-py3_13t-cuda12_1-test
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cu121
 | 
			
		||||
      GPU_ARCH_VERSION: 12.1
 | 
			
		||||
      GPU_ARCH_TYPE: cuda
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
 | 
			
		||||
      use_split_build: True
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      build_name: manywheel-py3_13t-cuda12_1
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
      conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
 | 
			
		||||
      conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
 | 
			
		||||
    uses: ./.github/workflows/_binary-upload.yml
 | 
			
		||||
 | 
			
		||||
  manywheel-py3_13t-cuda12_4-build:
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    uses: ./.github/workflows/_binary-build-linux.yml
 | 
			
		||||
    needs: get-label-type
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cu124
 | 
			
		||||
      GPU_ARCH_VERSION: 12.4
 | 
			
		||||
      GPU_ARCH_TYPE: cuda
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
 | 
			
		||||
      use_split_build: True
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      build_name: manywheel-py3_13t-cuda12_4
 | 
			
		||||
      build_environment: linux-binary-manywheel-split
 | 
			
		||||
      PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
  manywheel-py3_13t-cuda12_4-test:  # Testing
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    needs:
 | 
			
		||||
      - manywheel-py3_13t-cuda12_4-build
 | 
			
		||||
      - get-label-type
 | 
			
		||||
    uses: ./.github/workflows/_binary-test-linux.yml
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cu124
 | 
			
		||||
      GPU_ARCH_VERSION: 12.4
 | 
			
		||||
      GPU_ARCH_TYPE: cuda
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
 | 
			
		||||
      use_split_build: True
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      build_name: manywheel-py3_13t-cuda12_4
 | 
			
		||||
      build_environment: linux-binary-manywheel-split
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.4xlarge.nvidia.gpu
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
  manywheel-py3_13t-cuda12_4-upload:  # Uploading
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    permissions:
 | 
			
		||||
      id-token: write
 | 
			
		||||
      contents: read
 | 
			
		||||
    needs: manywheel-py3_13t-cuda12_4-test
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cu124
 | 
			
		||||
      GPU_ARCH_VERSION: 12.4
 | 
			
		||||
      GPU_ARCH_TYPE: cuda
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
 | 
			
		||||
      use_split_build: True
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      build_name: manywheel-py3_13t-cuda12_4
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
      conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
 | 
			
		||||
      conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
 | 
			
		||||
    uses: ./.github/workflows/_binary-upload.yml
 | 
			
		||||
 | 
			
		||||
  manywheel-py3_13t-cpu-build:
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    uses: ./.github/workflows/_binary-build-linux.yml
 | 
			
		||||
    needs: get-label-type
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cpu
 | 
			
		||||
      GPU_ARCH_TYPE: cpu
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
 | 
			
		||||
      use_split_build: True
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      build_name: manywheel-py3_13t-cpu
 | 
			
		||||
      build_environment: linux-binary-manywheel-split
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
  manywheel-py3_13t-cpu-test:  # Testing
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    needs:
 | 
			
		||||
      - manywheel-py3_13t-cpu-build
 | 
			
		||||
      - get-label-type
 | 
			
		||||
    uses: ./.github/workflows/_binary-test-linux.yml
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cpu
 | 
			
		||||
      GPU_ARCH_TYPE: cpu
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
 | 
			
		||||
      use_split_build: True
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      build_name: manywheel-py3_13t-cpu
 | 
			
		||||
      build_environment: linux-binary-manywheel-split
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      runs_on: linux.4xlarge
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
  manywheel-py3_13t-cpu-upload:  # Uploading
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    permissions:
 | 
			
		||||
      id-token: write
 | 
			
		||||
      contents: read
 | 
			
		||||
    needs: manywheel-py3_13t-cpu-test
 | 
			
		||||
    with:
 | 
			
		||||
      PYTORCH_ROOT: /pytorch
 | 
			
		||||
      BUILDER_ROOT: /builder
 | 
			
		||||
      PACKAGE_TYPE: manywheel
 | 
			
		||||
      # TODO: This is a legacy variable that we eventually want to get rid of in
 | 
			
		||||
      #       favor of GPU_ARCH_VERSION
 | 
			
		||||
      DESIRED_CUDA: cpu
 | 
			
		||||
      GPU_ARCH_TYPE: cpu
 | 
			
		||||
      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
 | 
			
		||||
      use_split_build: True
 | 
			
		||||
      DESIRED_PYTHON: "3.13t"
 | 
			
		||||
      build_name: manywheel-py3_13t-cpu
 | 
			
		||||
    secrets:
 | 
			
		||||
      github-token: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
      conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
 | 
			
		||||
      conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
 | 
			
		||||
    uses: ./.github/workflows/_binary-upload.yml
 | 
			
		||||
 | 
			
		||||
@ -5,9 +5,7 @@ on:
 | 
			
		||||
    # - cron: 0 7 * * 1-6
 | 
			
		||||
    # - cron: 0 7 * * 0
 | 
			
		||||
    # Does not perform max_autotune on CPU, so skip the weekly run setup
 | 
			
		||||
    # Run 6 times everyday to see if perf instablity can be reproduced
 | 
			
		||||
    # Will change this back
 | 
			
		||||
    - cron: 0 */4 * * *
 | 
			
		||||
    - cron: 0 7 * * *
 | 
			
		||||
  # NB: GitHub has an upper limit of 10 inputs here
 | 
			
		||||
  workflow_dispatch:
 | 
			
		||||
    inputs:
 | 
			
		||||
@ -116,7 +114,7 @@ jobs:
 | 
			
		||||
    name: linux-jammy-aarch64-py3.10-inductor
 | 
			
		||||
    uses: ./.github/workflows/_linux-test.yml
 | 
			
		||||
    needs: linux-jammy-aarch64-py3_10-inductor-build
 | 
			
		||||
    if: github.event.schedule == '0 */4 * * *'
 | 
			
		||||
    if: github.event.schedule == '0 7 * * *'
 | 
			
		||||
    with:
 | 
			
		||||
      build-environment: linux-jammy-aarch64-py3.10
 | 
			
		||||
      # Turn off dynamic-shapes and aotinductor tests for now, to have faster iteration for debugging perf instability.
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										18
									
								
								.github/workflows/inductor-rocm.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										18
									
								
								.github/workflows/inductor-rocm.yml
									
									
									
									
										vendored
									
									
								
							@ -31,13 +31,13 @@ jobs:
 | 
			
		||||
      curr_branch: ${{ github.head_ref || github.ref_name }}
 | 
			
		||||
      curr_ref_type: ${{ github.ref_type }}
 | 
			
		||||
 | 
			
		||||
  linux-focal-rocm6_1-py3_10-inductor-build:
 | 
			
		||||
    name: rocm6.1-py3.10-inductor
 | 
			
		||||
  linux-focal-rocm6_2-py3_10-inductor-build:
 | 
			
		||||
    name: rocm6.2-py3.10-inductor
 | 
			
		||||
    uses: ./.github/workflows/_linux-build.yml
 | 
			
		||||
    needs: get-label-type
 | 
			
		||||
    with:
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      build-environment: linux-focal-rocm6.1-py3.10
 | 
			
		||||
      build-environment: linux-focal-rocm6.2-py3.10
 | 
			
		||||
      docker-image-name: pytorch-linux-focal-rocm-n-py3
 | 
			
		||||
      test-matrix: |
 | 
			
		||||
        { include: [
 | 
			
		||||
@ -45,14 +45,14 @@ jobs:
 | 
			
		||||
          { config: "inductor", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.2" },
 | 
			
		||||
        ]}
 | 
			
		||||
 | 
			
		||||
  linux-focal-rocm6_1-py3_10-inductor-test:
 | 
			
		||||
  linux-focal-rocm6_2-py3_10-inductor-test:
 | 
			
		||||
    permissions:
 | 
			
		||||
      id-token: write
 | 
			
		||||
      contents: read
 | 
			
		||||
    name: rocm6.1-py3.10-inductor
 | 
			
		||||
    name: rocm6.2-py3.10-inductor
 | 
			
		||||
    uses: ./.github/workflows/_rocm-test.yml
 | 
			
		||||
    needs: linux-focal-rocm6_1-py3_10-inductor-build
 | 
			
		||||
    needs: linux-focal-rocm6_2-py3_10-inductor-build
 | 
			
		||||
    with:
 | 
			
		||||
      build-environment: linux-focal-rocm6.1-py3.10
 | 
			
		||||
      docker-image: ${{ needs.linux-focal-rocm6_1-py3_10-inductor-build.outputs.docker-image }}
 | 
			
		||||
      test-matrix:  ${{ needs.linux-focal-rocm6_1-py3_10-inductor-build.outputs.test-matrix }}
 | 
			
		||||
      build-environment: linux-focal-rocm6.2-py3.10
 | 
			
		||||
      docker-image: ${{ needs.linux-focal-rocm6_2-py3_10-inductor-build.outputs.docker-image }}
 | 
			
		||||
      test-matrix:  ${{ needs.linux-focal-rocm6_2-py3_10-inductor-build.outputs.test-matrix }}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										44
									
								
								.github/workflows/inductor.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										44
									
								
								.github/workflows/inductor.yml
									
									
									
									
										vendored
									
									
								
							@ -58,8 +58,7 @@ jobs:
 | 
			
		||||
          { config: "aot_inductor_torchbench", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
 | 
			
		||||
          { config: "inductor_cpp_wrapper_abi_compatible", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
 | 
			
		||||
        ]}
 | 
			
		||||
    secrets:
 | 
			
		||||
      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 | 
			
		||||
    secrets: inherit
 | 
			
		||||
 | 
			
		||||
  linux-focal-cuda12_1-py3_10-gcc9-inductor-test:
 | 
			
		||||
    name: cuda12.1-py3.10-gcc9-sm86
 | 
			
		||||
@ -69,8 +68,7 @@ jobs:
 | 
			
		||||
      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm86
 | 
			
		||||
      docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.docker-image }}
 | 
			
		||||
      test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }}
 | 
			
		||||
    secrets:
 | 
			
		||||
      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 | 
			
		||||
    secrets: inherit
 | 
			
		||||
 | 
			
		||||
  linux-focal-cuda12_1-py3_12-gcc9-inductor-build:
 | 
			
		||||
    name: cuda12.1-py3.12-gcc9-sm86
 | 
			
		||||
@ -86,6 +84,7 @@ jobs:
 | 
			
		||||
          { config: "inductor", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
 | 
			
		||||
          { config: "inductor", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
 | 
			
		||||
        ]}
 | 
			
		||||
    secrets: inherit
 | 
			
		||||
 | 
			
		||||
  linux-focal-cuda12_1-py3_12-gcc9-inductor-test:
 | 
			
		||||
    name: cuda12.1-py3.12-gcc9-sm86
 | 
			
		||||
@ -95,6 +94,7 @@ jobs:
 | 
			
		||||
      build-environment: linux-focal-cuda12.1-py3.12-gcc9-sm86
 | 
			
		||||
      docker-image: ${{ needs.linux-focal-cuda12_1-py3_12-gcc9-inductor-build.outputs.docker-image }}
 | 
			
		||||
      test-matrix: ${{ needs.linux-focal-cuda12_1-py3_12-gcc9-inductor-build.outputs.test-matrix }}
 | 
			
		||||
    secrets: inherit
 | 
			
		||||
 | 
			
		||||
  linux-jammy-cpu-py3_12-inductor-halide-build:
 | 
			
		||||
    name: linux-jammy-cpu-py3.12-gcc11-inductor-halide
 | 
			
		||||
@ -108,6 +108,7 @@ jobs:
 | 
			
		||||
        { include: [
 | 
			
		||||
          { config: "inductor-halide", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.12xlarge" },
 | 
			
		||||
        ]}
 | 
			
		||||
    secrets: inherit
 | 
			
		||||
 | 
			
		||||
  linux-jammy-cpu-py3_12-inductor-halide-test:
 | 
			
		||||
    name: linux-jammy-cpu-py3.12-gcc11-inductor-halide
 | 
			
		||||
@ -117,6 +118,29 @@ jobs:
 | 
			
		||||
      build-environment: linux-jammy-py3.12-gcc11
 | 
			
		||||
      docker-image: ${{ needs.linux-jammy-cpu-py3_12-inductor-halide-build.outputs.docker-image }}
 | 
			
		||||
      test-matrix: ${{ needs.linux-jammy-cpu-py3_12-inductor-halide-build.outputs.test-matrix }}
 | 
			
		||||
    secrets: inherit
 | 
			
		||||
 | 
			
		||||
  linux-jammy-cpu-py3_12-inductor-triton-cpu-build:
 | 
			
		||||
    name: linux-jammy-cpu-py3.12-gcc11-inductor-triton-cpu
 | 
			
		||||
    uses: ./.github/workflows/_linux-build.yml
 | 
			
		||||
    needs: get-label-type
 | 
			
		||||
    with:
 | 
			
		||||
      build-environment: linux-jammy-py3.12-gcc11
 | 
			
		||||
      docker-image-name: pytorch-linux-jammy-py3.12-triton-cpu
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      test-matrix: |
 | 
			
		||||
        { include: [
 | 
			
		||||
          { config: "inductor-triton-cpu", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.12xlarge" },
 | 
			
		||||
        ]}
 | 
			
		||||
 | 
			
		||||
  linux-jammy-cpu-py3_12-inductor-triton-cpu-test:
 | 
			
		||||
    name: linux-jammy-cpu-py3.12-gcc11-inductor-triton-cpu
 | 
			
		||||
    uses: ./.github/workflows/_linux-test.yml
 | 
			
		||||
    needs: linux-jammy-cpu-py3_12-inductor-triton-cpu-build
 | 
			
		||||
    with:
 | 
			
		||||
      build-environment: linux-jammy-py3.12-gcc11
 | 
			
		||||
      docker-image: ${{ needs.linux-jammy-cpu-py3_12-inductor-triton-cpu-build.outputs.docker-image }}
 | 
			
		||||
      test-matrix: ${{ needs.linux-jammy-cpu-py3_12-inductor-triton-cpu-build.outputs.test-matrix }}
 | 
			
		||||
 | 
			
		||||
  linux-focal-cuda12_4-py3_10-gcc9-inductor-build:
 | 
			
		||||
    # Should be synced with the one in inductor-periodic.yml but this only runs inductor_timm
 | 
			
		||||
@ -134,8 +158,7 @@ jobs:
 | 
			
		||||
          { config: "inductor_timm", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
 | 
			
		||||
          { config: "inductor_timm", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.4xlarge.nvidia.gpu" },
 | 
			
		||||
        ]}
 | 
			
		||||
    secrets:
 | 
			
		||||
      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 | 
			
		||||
    secrets: inherit
 | 
			
		||||
 | 
			
		||||
  linux-focal-cuda12_4-py3_10-gcc9-inductor-test:
 | 
			
		||||
    name: cuda12.4-py3.10-gcc9-sm86
 | 
			
		||||
@ -146,8 +169,7 @@ jobs:
 | 
			
		||||
      build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86
 | 
			
		||||
      docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.docker-image }}
 | 
			
		||||
      test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.test-matrix }}
 | 
			
		||||
    secrets:
 | 
			
		||||
      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 | 
			
		||||
    secrets: inherit
 | 
			
		||||
 | 
			
		||||
  linux-jammy-cpu-py3_9-gcc11-inductor-build:
 | 
			
		||||
    name: linux-jammy-cpu-py3.9-gcc11-inductor
 | 
			
		||||
@ -201,8 +223,7 @@ jobs:
 | 
			
		||||
          { config: "cpu_inductor_freezing_avx2_timm", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.10xlarge.avx2" },
 | 
			
		||||
          { config: "cpu_inductor_freezing_avx2_timm", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.10xlarge.avx2" },
 | 
			
		||||
        ]}
 | 
			
		||||
    secrets:
 | 
			
		||||
      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 | 
			
		||||
    secrets: inherit
 | 
			
		||||
 | 
			
		||||
  linux-jammy-cpu-py3_9-gcc11-inductor-test:
 | 
			
		||||
    name: linux-jammy-cpu-py3.9-gcc11-inductor
 | 
			
		||||
@ -212,5 +233,4 @@ jobs:
 | 
			
		||||
      build-environment: linux-jammy-py3.9-gcc11-build
 | 
			
		||||
      docker-image: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.docker-image }}
 | 
			
		||||
      test-matrix: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.test-matrix }}
 | 
			
		||||
    secrets:
 | 
			
		||||
      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 | 
			
		||||
    secrets: inherit
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										45
									
								
								.github/workflows/lint-autoformat.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								.github/workflows/lint-autoformat.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@ -0,0 +1,45 @@
 | 
			
		||||
name: Apply lint suggestions
 | 
			
		||||
 | 
			
		||||
on:
 | 
			
		||||
 | 
			
		||||
  pull_request:
 | 
			
		||||
    types: [opened, synchronize, reopened]
 | 
			
		||||
 | 
			
		||||
jobs:
 | 
			
		||||
  lintrunner-autoformat:
 | 
			
		||||
    permissions:
 | 
			
		||||
      contents: read
 | 
			
		||||
      pull-requests: write
 | 
			
		||||
    runs-on: lf.linux.2xlarge
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    steps:
 | 
			
		||||
      - name: Checkout pytorch
 | 
			
		||||
        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
 | 
			
		||||
        with:
 | 
			
		||||
          submodules: true
 | 
			
		||||
          fetch-depth: 0
 | 
			
		||||
      - name: Setup miniconda
 | 
			
		||||
        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
 | 
			
		||||
        with:
 | 
			
		||||
          python-version: "3.10"
 | 
			
		||||
      - name: Run lintrunner (nonretryable)
 | 
			
		||||
        continue-on-error: true
 | 
			
		||||
        # we can't run all files here because only changes around where the diff are shown in the PR UI
 | 
			
		||||
        run: |
 | 
			
		||||
          export ADDITIONAL_LINTRUNNER_ARGS="format"
 | 
			
		||||
          bash .github/scripts/lintrunner.sh
 | 
			
		||||
      - name: Check for changes
 | 
			
		||||
        id: git-check
 | 
			
		||||
        continue-on-error: true
 | 
			
		||||
        run: |
 | 
			
		||||
          git diff --exit-code || echo "changes=true" >> "$GITHUB_OUTPUT"
 | 
			
		||||
      - name: Suggest changes
 | 
			
		||||
        if: steps.git-check.outputs.changes == 'true'
 | 
			
		||||
        continue-on-error: true
 | 
			
		||||
        uses: parkerbxyz/suggest-changes@v1
 | 
			
		||||
        with:
 | 
			
		||||
          comment: "Please commit the suggested changes from pytorch's linter."
 | 
			
		||||
 | 
			
		||||
concurrency:
 | 
			
		||||
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
 | 
			
		||||
  cancel-in-progress: true
 | 
			
		||||
							
								
								
									
										9
									
								
								.github/workflows/lint.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										9
									
								
								.github/workflows/lint.yml
									
									
									
									
										vendored
									
									
								
							@ -36,7 +36,7 @@ jobs:
 | 
			
		||||
      submodules: true
 | 
			
		||||
      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
 | 
			
		||||
      script: |
 | 
			
		||||
        export ADDITIONAL_LINTRUNNER_ARGS="--take CLANGTIDY,CLANGFORMAT"
 | 
			
		||||
        export ADDITIONAL_LINTRUNNER_ARGS="--take CLANGTIDY,CLANGFORMAT --all-files"
 | 
			
		||||
        export CLANG=1
 | 
			
		||||
        .github/scripts/lintrunner.sh
 | 
			
		||||
 | 
			
		||||
@ -53,7 +53,7 @@ jobs:
 | 
			
		||||
      submodules: true
 | 
			
		||||
      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
 | 
			
		||||
      script: |
 | 
			
		||||
        export ADDITIONAL_LINTRUNNER_ARGS="--skip CLANGTIDY,CLANGFORMAT"
 | 
			
		||||
        export ADDITIONAL_LINTRUNNER_ARGS="--skip CLANGTIDY,CLANGFORMAT --all-files"
 | 
			
		||||
        .github/scripts/lintrunner.sh
 | 
			
		||||
 | 
			
		||||
  quick-checks:
 | 
			
		||||
@ -215,14 +215,15 @@ jobs:
 | 
			
		||||
        with:
 | 
			
		||||
          submodules: false
 | 
			
		||||
          fetch-depth: 1
 | 
			
		||||
      - name: Setup Python 3.8
 | 
			
		||||
      - name: Setup Python 3.9
 | 
			
		||||
        uses: actions/setup-python@v4
 | 
			
		||||
        with:
 | 
			
		||||
          python-version: '3.8'
 | 
			
		||||
          python-version: '3.9'
 | 
			
		||||
          architecture: x64
 | 
			
		||||
          cache: pip
 | 
			
		||||
      - name: Install dependencies
 | 
			
		||||
        run: |
 | 
			
		||||
          python3 -m pip install --upgrade pip
 | 
			
		||||
          pip install pytest-rerunfailures==11.1.* pytest-flakefinder==1.1.* pytest-xdist==3.3.* expecttest==0.2.* fbscribelogger==0.1.* numpy==1.24.*
 | 
			
		||||
          pip install torch --pre --index-url https://download.pytorch.org/whl/nightly/cpu/
 | 
			
		||||
      - name: Run run_test.py (nonretryable)
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										53
									
								
								.github/workflows/periodic.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										53
									
								
								.github/workflows/periodic.yml
									
									
									
									
										vendored
									
									
								
							@ -57,10 +57,10 @@ jobs:
 | 
			
		||||
      docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
 | 
			
		||||
      test-matrix: |
 | 
			
		||||
        { include: [
 | 
			
		||||
          { config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
 | 
			
		||||
        ]}
 | 
			
		||||
  linux-focal-cuda12_1-py3_10-gcc9-test:
 | 
			
		||||
@ -89,10 +89,10 @@ jobs:
 | 
			
		||||
          { config: "default", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
 | 
			
		||||
          { config: "default", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
 | 
			
		||||
          { config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
 | 
			
		||||
          { config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
 | 
			
		||||
        ]}
 | 
			
		||||
 | 
			
		||||
@ -118,9 +118,10 @@ jobs:
 | 
			
		||||
      docker-image-name: pytorch-linux-jammy-py3.9-gcc11
 | 
			
		||||
      test-matrix: |
 | 
			
		||||
        { include: [
 | 
			
		||||
          { config: "default", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "default", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "default", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
        ]}
 | 
			
		||||
 | 
			
		||||
  parallelnative-linux-jammy-py3_9-gcc11-test:
 | 
			
		||||
@ -218,7 +219,9 @@ jobs:
 | 
			
		||||
  # TODO: Figure out how to migrate this job to M1 runner
 | 
			
		||||
  ios-build-test:
 | 
			
		||||
    name: ios-build-test
 | 
			
		||||
    if: github.event_name != 'schedule' || github.event.schedule == '45 0,8,16 * * 1-5' || github.event.schedule == '45 4 * * 0,6' || github.event.schedule == '29 8 * * *'
 | 
			
		||||
    # Has been broken for a while, see https://github.com/pytorch/pytorch/issues/136284
 | 
			
		||||
    # if: github.event_name != 'schedule' || github.event.schedule == '45 0,8,16 * * 1-5' || github.event.schedule == '45 4 * * 0,6' || github.event.schedule == '29 8 * * *'
 | 
			
		||||
    if: false
 | 
			
		||||
    uses: ./.github/workflows/_ios-build-test.yml
 | 
			
		||||
    with:
 | 
			
		||||
      trigger-event: ${{ github.event_name }}
 | 
			
		||||
@ -297,13 +300,13 @@ jobs:
 | 
			
		||||
      docker-image: ${{ needs.linux-vulkan-focal-py3_11-clang10-build.outputs.docker-image }}
 | 
			
		||||
      test-matrix: ${{ needs.linux-vulkan-focal-py3_11-clang10-build.outputs.test-matrix }}
 | 
			
		||||
 | 
			
		||||
  linux-focal-rocm6_1-py3_10-build:
 | 
			
		||||
    name: linux-focal-rocm6.1-py3.10
 | 
			
		||||
  linux-focal-rocm6_2-py3_10-build:
 | 
			
		||||
    name: linux-focal-rocm6.2-py3.10
 | 
			
		||||
    uses: ./.github/workflows/_linux-build.yml
 | 
			
		||||
    needs: get-label-type
 | 
			
		||||
    with:
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      build-environment: linux-focal-rocm6.1-py3.10
 | 
			
		||||
      build-environment: linux-focal-rocm6.2-py3.10
 | 
			
		||||
      docker-image-name: pytorch-linux-focal-rocm-n-py3
 | 
			
		||||
      test-matrix: |
 | 
			
		||||
        { include: [
 | 
			
		||||
@ -312,19 +315,19 @@ jobs:
 | 
			
		||||
          { config: "distributed", shard: 3, num_shards: 3, runner: "linux.rocm.gpu" },
 | 
			
		||||
        ]}
 | 
			
		||||
 | 
			
		||||
  linux-focal-rocm6_1-py3_10-test:
 | 
			
		||||
  linux-focal-rocm6_2-py3_10-test:
 | 
			
		||||
    permissions:
 | 
			
		||||
      id-token: write
 | 
			
		||||
      contents: read
 | 
			
		||||
    name: linux-focal-rocm6.1-py3.10
 | 
			
		||||
    name: linux-focal-rocm6.2-py3.10
 | 
			
		||||
    uses: ./.github/workflows/_rocm-test.yml
 | 
			
		||||
    needs:
 | 
			
		||||
      - linux-focal-rocm6_1-py3_10-build
 | 
			
		||||
      - linux-focal-rocm6_2-py3_10-build
 | 
			
		||||
      - target-determination
 | 
			
		||||
    with:
 | 
			
		||||
      build-environment: linux-focal-rocm6.1-py3.10
 | 
			
		||||
      docker-image: ${{ needs.linux-focal-rocm6_1-py3_10-build.outputs.docker-image }}
 | 
			
		||||
      test-matrix: ${{ needs.linux-focal-rocm6_1-py3_10-build.outputs.test-matrix }}
 | 
			
		||||
      build-environment: linux-focal-rocm6.2-py3.10
 | 
			
		||||
      docker-image: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.docker-image }}
 | 
			
		||||
      test-matrix: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.test-matrix }}
 | 
			
		||||
 | 
			
		||||
  linux-focal-cuda12_1-py3_10-gcc9-experimental-split-build:
 | 
			
		||||
    name: linux-focal-cuda12.1-py3.10-gcc9-experimental-split-build
 | 
			
		||||
@ -337,10 +340,10 @@ jobs:
 | 
			
		||||
      docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
 | 
			
		||||
      test-matrix: |
 | 
			
		||||
        { include: [
 | 
			
		||||
          { config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
 | 
			
		||||
        ]}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										38
									
								
								.github/workflows/pull.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										38
									
								
								.github/workflows/pull.yml
									
									
									
									
										vendored
									
									
								
							@ -185,10 +185,10 @@ jobs:
 | 
			
		||||
      docker-image-name: pytorch-linux-focal-py3.9-clang10
 | 
			
		||||
      test-matrix: |
 | 
			
		||||
        { include: [
 | 
			
		||||
          { config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "crossref", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "crossref", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "dynamo", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
@ -217,10 +217,10 @@ jobs:
 | 
			
		||||
      docker-image-name: pytorch-linux-focal-py3.11-clang10
 | 
			
		||||
      test-matrix: |
 | 
			
		||||
        { include: [
 | 
			
		||||
          { config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "crossref", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "crossref", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "dynamo", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
@ -251,10 +251,10 @@ jobs:
 | 
			
		||||
      docker-image-name: pytorch-linux-focal-py3.12-clang10
 | 
			
		||||
      test-matrix: |
 | 
			
		||||
        { include: [
 | 
			
		||||
          { config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "dynamo", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "dynamo", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "dynamo", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
@ -383,7 +383,7 @@ jobs:
 | 
			
		||||
    with:
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      build-environment: linux-focal-py3.9-clang9-xla
 | 
			
		||||
      docker-image-name: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/xla_base:v1.1-lite
 | 
			
		||||
      docker-image-name: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/xla_base:v1.3-lite
 | 
			
		||||
      test-matrix: |
 | 
			
		||||
        { include: [
 | 
			
		||||
          { config: "xla", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.12xlarge" },
 | 
			
		||||
@ -503,15 +503,15 @@ jobs:
 | 
			
		||||
        ]}
 | 
			
		||||
    secrets: inherit
 | 
			
		||||
 | 
			
		||||
  linux-focal-rocm6_1-py3_10-build:
 | 
			
		||||
  linux-focal-rocm6_2-py3_10-build:
 | 
			
		||||
    # don't run build twice on main
 | 
			
		||||
    if: github.event_name == 'pull_request'
 | 
			
		||||
    name: linux-focal-rocm6.1-py3.10
 | 
			
		||||
    name: linux-focal-rocm6.2-py3.10
 | 
			
		||||
    uses: ./.github/workflows/_linux-build.yml
 | 
			
		||||
    needs: get-label-type
 | 
			
		||||
    with:
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      build-environment: linux-focal-rocm6.1-py3.10
 | 
			
		||||
      build-environment: linux-focal-rocm6.2-py3.10
 | 
			
		||||
      docker-image-name: pytorch-linux-focal-rocm-n-py3
 | 
			
		||||
      sync-tag: rocm-build
 | 
			
		||||
      test-matrix: |
 | 
			
		||||
@ -588,9 +588,9 @@ jobs:
 | 
			
		||||
      docker-image-name: pytorch-linux-focal-py3.12-clang10
 | 
			
		||||
      test-matrix: |
 | 
			
		||||
        { include: [
 | 
			
		||||
          { config: "default", shard: 1, num_shards: 3, runner: "linux.2xlarge" },
 | 
			
		||||
          { config: "default", shard: 2, num_shards: 3, runner: "linux.2xlarge" },
 | 
			
		||||
          { config: "default", shard: 3, num_shards: 3, runner: "linux.2xlarge" },
 | 
			
		||||
          { config: "default", shard: 1, num_shards: 3, runner: "linux.4xlarge" },
 | 
			
		||||
          { config: "default", shard: 2, num_shards: 3, runner: "linux.4xlarge" },
 | 
			
		||||
          { config: "default", shard: 3, num_shards: 3, runner: "linux.4xlarge" },
 | 
			
		||||
          { config: "dynamo", shard: 1, num_shards: 3, runner: "linux.2xlarge" },
 | 
			
		||||
          { config: "dynamo", shard: 2, num_shards: 3, runner: "linux.2xlarge" },
 | 
			
		||||
          { config: "dynamo", shard: 3, num_shards: 3, runner: "linux.2xlarge" },
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										18
									
								
								.github/workflows/rocm.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										18
									
								
								.github/workflows/rocm.yml
									
									
									
									
										vendored
									
									
								
							@ -25,11 +25,11 @@ jobs:
 | 
			
		||||
      id-token: write
 | 
			
		||||
      contents: read
 | 
			
		||||
 | 
			
		||||
  linux-focal-rocm6_1-py3_10-build:
 | 
			
		||||
    name: linux-focal-rocm6.1-py3.10
 | 
			
		||||
  linux-focal-rocm6_2-py3_10-build:
 | 
			
		||||
    name: linux-focal-rocm6.2-py3.10
 | 
			
		||||
    uses: ./.github/workflows/_linux-build.yml
 | 
			
		||||
    with:
 | 
			
		||||
      build-environment: linux-focal-rocm6.1-py3.10
 | 
			
		||||
      build-environment: linux-focal-rocm6.2-py3.10
 | 
			
		||||
      docker-image-name: pytorch-linux-focal-rocm-n-py3
 | 
			
		||||
      sync-tag: rocm-build
 | 
			
		||||
      test-matrix: |
 | 
			
		||||
@ -42,16 +42,16 @@ jobs:
 | 
			
		||||
          { config: "default", shard: 6, num_shards: 6, runner: "linux.rocm.gpu.2" },
 | 
			
		||||
        ]}
 | 
			
		||||
 | 
			
		||||
  linux-focal-rocm6_1-py3_10-test:
 | 
			
		||||
  linux-focal-rocm6_2-py3_10-test:
 | 
			
		||||
    permissions:
 | 
			
		||||
      id-token: write
 | 
			
		||||
      contents: read
 | 
			
		||||
    name: linux-focal-rocm6.1-py3.10
 | 
			
		||||
    name: linux-focal-rocm6.2-py3.10
 | 
			
		||||
    uses: ./.github/workflows/_rocm-test.yml
 | 
			
		||||
    needs:
 | 
			
		||||
      - linux-focal-rocm6_1-py3_10-build
 | 
			
		||||
      - linux-focal-rocm6_2-py3_10-build
 | 
			
		||||
      - target-determination
 | 
			
		||||
    with:
 | 
			
		||||
      build-environment: linux-focal-rocm6.1-py3.10
 | 
			
		||||
      docker-image: ${{ needs.linux-focal-rocm6_1-py3_10-build.outputs.docker-image }}
 | 
			
		||||
      test-matrix: ${{ needs.linux-focal-rocm6_1-py3_10-build.outputs.test-matrix }}
 | 
			
		||||
      build-environment: linux-focal-rocm6.2-py3.10
 | 
			
		||||
      docker-image: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.docker-image }}
 | 
			
		||||
      test-matrix: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.test-matrix }}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										24
									
								
								.github/workflows/s390.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								.github/workflows/s390.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@ -0,0 +1,24 @@
 | 
			
		||||
name: s390
 | 
			
		||||
 | 
			
		||||
on:
 | 
			
		||||
  push:
 | 
			
		||||
    branches:
 | 
			
		||||
      - main
 | 
			
		||||
    tags:
 | 
			
		||||
      - ciflow/s390/*
 | 
			
		||||
  workflow_dispatch:
 | 
			
		||||
 | 
			
		||||
concurrency:
 | 
			
		||||
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
 | 
			
		||||
  cancel-in-progress: true
 | 
			
		||||
 | 
			
		||||
permissions: read-all
 | 
			
		||||
 | 
			
		||||
jobs:
 | 
			
		||||
  linux-manylinux-2_28-py3-cpu-s390x-build:
 | 
			
		||||
    name: linux-manylinux-2_28-py3-cpu-s390x
 | 
			
		||||
    uses: ./.github/workflows/_linux-build.yml
 | 
			
		||||
    with:
 | 
			
		||||
      build-environment: linux-s390x-binary-manywheel
 | 
			
		||||
      docker-image-name: pytorch/manylinuxs390x-builder:cpu-s390x-main
 | 
			
		||||
      runner: linux.s390x
 | 
			
		||||
							
								
								
									
										18
									
								
								.github/workflows/slow.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										18
									
								
								.github/workflows/slow.yml
									
									
									
									
										vendored
									
									
								
							@ -130,13 +130,13 @@ jobs:
 | 
			
		||||
      docker-image: ${{ needs.linux-focal-py3_9-clang10-build.outputs.docker-image }}
 | 
			
		||||
      test-matrix: ${{ needs.linux-focal-py3_9-clang10-build.outputs.test-matrix }}
 | 
			
		||||
 | 
			
		||||
  linux-focal-rocm6_1-py3_10-build:
 | 
			
		||||
    name: linux-focal-rocm6.1-py3.10
 | 
			
		||||
  linux-focal-rocm6_2-py3_10-build:
 | 
			
		||||
    name: linux-focal-rocm6.2-py3.10
 | 
			
		||||
    uses: ./.github/workflows/_linux-build.yml
 | 
			
		||||
    needs: get-label-type
 | 
			
		||||
    with:
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      build-environment: linux-focal-rocm6.1-py3.10
 | 
			
		||||
      build-environment: linux-focal-rocm6.2-py3.10
 | 
			
		||||
      docker-image-name: pytorch-linux-focal-rocm-n-py3
 | 
			
		||||
      test-matrix: |
 | 
			
		||||
        { include: [
 | 
			
		||||
@ -144,19 +144,19 @@ jobs:
 | 
			
		||||
          { config: "slow", shard: 2, num_shards: 2, runner: "linux.rocm.gpu" },
 | 
			
		||||
        ]}
 | 
			
		||||
 | 
			
		||||
  linux-focal-rocm6_1-py3_10-test:
 | 
			
		||||
  linux-focal-rocm6_2-py3_10-test:
 | 
			
		||||
    permissions:
 | 
			
		||||
      id-token: write
 | 
			
		||||
      contents: read
 | 
			
		||||
    name: linux-focal-rocm6.1-py3.10
 | 
			
		||||
    name: linux-focal-rocm6.2-py3.10
 | 
			
		||||
    uses: ./.github/workflows/_rocm-test.yml
 | 
			
		||||
    needs:
 | 
			
		||||
      - linux-focal-rocm6_1-py3_10-build
 | 
			
		||||
      - linux-focal-rocm6_2-py3_10-build
 | 
			
		||||
      - target-determination
 | 
			
		||||
    with:
 | 
			
		||||
      build-environment: linux-focal-rocm6.1-py3.10
 | 
			
		||||
      docker-image: ${{ needs.linux-focal-rocm6_1-py3_10-build.outputs.docker-image }}
 | 
			
		||||
      test-matrix: ${{ needs.linux-focal-rocm6_1-py3_10-build.outputs.test-matrix }}
 | 
			
		||||
      build-environment: linux-focal-rocm6.2-py3.10
 | 
			
		||||
      docker-image: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.docker-image }}
 | 
			
		||||
      test-matrix: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.test-matrix }}
 | 
			
		||||
 | 
			
		||||
  linux-jammy-py3_10-clang15-asan-build:
 | 
			
		||||
    name: linux-jammy-py3.10-clang15-asan
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										26
									
								
								.github/workflows/trunk.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										26
									
								
								.github/workflows/trunk.yml
									
									
									
									
										vendored
									
									
								
							@ -223,13 +223,13 @@ jobs:
 | 
			
		||||
      cuda-version: "12.1"
 | 
			
		||||
      runner: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
 | 
			
		||||
 | 
			
		||||
  linux-focal-rocm6_1-py3_10-build:
 | 
			
		||||
    name: linux-focal-rocm6.1-py3.10
 | 
			
		||||
  linux-focal-rocm6_2-py3_10-build:
 | 
			
		||||
    name: linux-focal-rocm6.2-py3.10
 | 
			
		||||
    uses: ./.github/workflows/_linux-build.yml
 | 
			
		||||
    needs: get-label-type
 | 
			
		||||
    with:
 | 
			
		||||
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
 | 
			
		||||
      build-environment: linux-focal-rocm6.1-py3.10
 | 
			
		||||
      build-environment: linux-focal-rocm6.2-py3.10
 | 
			
		||||
      docker-image-name: pytorch-linux-focal-rocm-n-py3
 | 
			
		||||
      sync-tag: rocm-build
 | 
			
		||||
      test-matrix: |
 | 
			
		||||
@ -240,19 +240,19 @@ jobs:
 | 
			
		||||
        ]}
 | 
			
		||||
    secrets: inherit
 | 
			
		||||
 | 
			
		||||
  linux-focal-rocm6_1-py3_10-test:
 | 
			
		||||
  linux-focal-rocm6_2-py3_10-test:
 | 
			
		||||
    permissions:
 | 
			
		||||
      id-token: write
 | 
			
		||||
      contents: read
 | 
			
		||||
    name: linux-focal-rocm6.1-py3.10
 | 
			
		||||
    name: linux-focal-rocm6.2-py3.10
 | 
			
		||||
    uses: ./.github/workflows/_rocm-test.yml
 | 
			
		||||
    needs:
 | 
			
		||||
      - linux-focal-rocm6_1-py3_10-build
 | 
			
		||||
      - linux-focal-rocm6_2-py3_10-build
 | 
			
		||||
      - target-determination
 | 
			
		||||
    with:
 | 
			
		||||
      build-environment: linux-focal-rocm6.1-py3.10
 | 
			
		||||
      docker-image: ${{ needs.linux-focal-rocm6_1-py3_10-build.outputs.docker-image }}
 | 
			
		||||
      test-matrix: ${{ needs.linux-focal-rocm6_1-py3_10-build.outputs.test-matrix }}
 | 
			
		||||
      build-environment: linux-focal-rocm6.2-py3.10
 | 
			
		||||
      docker-image: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.docker-image }}
 | 
			
		||||
      test-matrix: ${{ needs.linux-focal-rocm6_2-py3_10-build.outputs.test-matrix }}
 | 
			
		||||
      tests-to-include: "test_nn test_torch test_cuda test_ops test_unary_ufuncs test_binary_ufuncs test_autograd inductor/test_torchinductor distributed/test_c10d_common distributed/test_c10d_nccl"
 | 
			
		||||
 | 
			
		||||
  linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build:
 | 
			
		||||
@ -266,10 +266,10 @@ jobs:
 | 
			
		||||
      docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
 | 
			
		||||
      test-matrix: |
 | 
			
		||||
        { include: [
 | 
			
		||||
          { config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
 | 
			
		||||
          { config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
 | 
			
		||||
          { config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
 | 
			
		||||
          { config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
 | 
			
		||||
          { config: "default", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										3
									
								
								.github/workflows/trymerge.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.github/workflows/trymerge.yml
									
									
									
									
										vendored
									
									
								
							@ -28,7 +28,7 @@ jobs:
 | 
			
		||||
          check-latest: false
 | 
			
		||||
          cache: pip
 | 
			
		||||
          architecture: x64
 | 
			
		||||
      - run: pip install pyyaml==6.0 rockset==1.0.3
 | 
			
		||||
      - run: pip install pyyaml==6.0
 | 
			
		||||
 | 
			
		||||
      - name: Setup committer id
 | 
			
		||||
        run: |
 | 
			
		||||
@ -43,7 +43,6 @@ jobs:
 | 
			
		||||
          COMMENT_ID: ${{ github.event.client_payload.comment_id }}
 | 
			
		||||
          REBASE: ${{ github.event.client_payload.rebase }}
 | 
			
		||||
          IGNORE_CURRENT: ${{ github.event.client_payload.ignore_current }}
 | 
			
		||||
          ROCKSET_API_KEY: ${{ secrets.ROCKSET_API_KEY }}
 | 
			
		||||
          DRCI_BOT_KEY: ${{ secrets.DRCI_BOT_KEY }}
 | 
			
		||||
          GITHUB_RUN_ID: ${{ github.run_id }}
 | 
			
		||||
        run: |
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										24
									
								
								.github/workflows/update-viablestrict.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										24
									
								
								.github/workflows/update-viablestrict.yml
									
									
									
									
										vendored
									
									
								
							@ -11,15 +11,39 @@ concurrency:
 | 
			
		||||
 | 
			
		||||
jobs:
 | 
			
		||||
  do_update_viablestrict:
 | 
			
		||||
    permissions:
 | 
			
		||||
      id-token: write
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    runs-on: ubuntu-20.04
 | 
			
		||||
    environment: ${{ (github.event_name == 'schedule') && 'mergebot' || '' }}
 | 
			
		||||
    steps:
 | 
			
		||||
      - name: Update viable/strict
 | 
			
		||||
        uses: pytorch/test-infra/.github/actions/update-viablestrict@main
 | 
			
		||||
        id: update_viablestrict
 | 
			
		||||
        with:
 | 
			
		||||
          repository: pytorch/pytorch
 | 
			
		||||
          stable-branch: viable/strict
 | 
			
		||||
          requires: '[\"pull\", \"trunk\", \"lint\", \"linux-binary\"]'
 | 
			
		||||
          secret-bot-token: ${{ secrets.MERGEBOT_TOKEN }}
 | 
			
		||||
          rockset-api-key: ${{ secrets.ROCKSET_API_KEY }}
 | 
			
		||||
 | 
			
		||||
      - name: Authenticate to AWS with OIDC
 | 
			
		||||
        uses: aws-actions/configure-aws-credentials@v4
 | 
			
		||||
        with:
 | 
			
		||||
          role-to-assume: arn:aws:iam::308535385114:role/upload_to_ossci_raw_job_status
 | 
			
		||||
          aws-region: us-east-1
 | 
			
		||||
 | 
			
		||||
      - name: Print sha
 | 
			
		||||
        env:
 | 
			
		||||
          LATEST_SHA: ${{ steps.update_viablestrict.outputs.latest_viable_sha }}
 | 
			
		||||
          PUSH_RESULT: ${{ steps.update_viablestrict.outputs.push_result }}
 | 
			
		||||
          TIME: ${{ steps.update_viablestrict.outputs.time }}
 | 
			
		||||
        run: |
 | 
			
		||||
          echo "${PUSH_RESULT}"
 | 
			
		||||
          if [ "$PUSH_RESULT" = "Everything up-to-date" ]; then
 | 
			
		||||
            echo "No update pushed"
 | 
			
		||||
          else
 | 
			
		||||
            echo "{\"sha\": \"${LATEST_SHA}\", \"repository\":\"pytorch/pytorch\", \"timestamp\": ${TIME}}" > "/tmp/${LATEST_SHA}.json"
 | 
			
		||||
            pip install awscli==1.29.40
 | 
			
		||||
            aws s3 cp "/tmp/${LATEST_SHA}.json" "s3://ossci-raw-job-status/stable_pushes/pytorch/pytorch/${LATEST_SHA}.json"
 | 
			
		||||
          fi
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										55
									
								
								.github/workflows/upload-alerts.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										55
									
								
								.github/workflows/upload-alerts.yml
									
									
									
									
										vendored
									
									
								
							@ -1,55 +0,0 @@
 | 
			
		||||
# upload alerts every 10 minutes
 | 
			
		||||
 | 
			
		||||
name: Upload Alerts to AWS/Rockset
 | 
			
		||||
 | 
			
		||||
on:
 | 
			
		||||
  schedule:
 | 
			
		||||
    - cron: '*/10 * * * *'
 | 
			
		||||
  pull_request:
 | 
			
		||||
    paths:
 | 
			
		||||
      - 'tools/alerts/create_alerts.py'
 | 
			
		||||
      - '.github/workflows/upload-alerts.yml'
 | 
			
		||||
 | 
			
		||||
jobs:
 | 
			
		||||
  upload-alerts:
 | 
			
		||||
    if: ${{ github.repository_owner == 'pytorch' }}
 | 
			
		||||
    runs-on: ubuntu-22.04
 | 
			
		||||
    environment: upload-stats
 | 
			
		||||
    steps:
 | 
			
		||||
      - name: Checkout repo
 | 
			
		||||
        uses: actions/checkout@v3
 | 
			
		||||
        with:
 | 
			
		||||
          fetch-depth: 1
 | 
			
		||||
 | 
			
		||||
      - uses: actions/setup-python@v4
 | 
			
		||||
        with:
 | 
			
		||||
          python-version: '3.11'
 | 
			
		||||
          cache: pip
 | 
			
		||||
 | 
			
		||||
      - name: Install Python Packages
 | 
			
		||||
        run: |
 | 
			
		||||
          pip3 install rockset==1.0.3 boto3==1.19.12 requests==2.32.2
 | 
			
		||||
 | 
			
		||||
      - name: Create alerts
 | 
			
		||||
        run: |
 | 
			
		||||
          output=$(PYTHONPATH=$PYTHONPATH:$(pwd) python3 "tools/alerts/create_alerts.py")
 | 
			
		||||
          echo "uploading following alerts"
 | 
			
		||||
          echo "$output"
 | 
			
		||||
          echo "script-output=$output" >> "$GITHUB_OUTPUT"
 | 
			
		||||
        id: alert_creation_step
 | 
			
		||||
 | 
			
		||||
      - name: Upload alerts
 | 
			
		||||
        env:
 | 
			
		||||
          ROCKSET_API_KEY: ${{ secrets.ROCKSET_API_KEY }}
 | 
			
		||||
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 | 
			
		||||
          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
 | 
			
		||||
          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
 | 
			
		||||
        uses: pytorch/test-infra/.github/actions/upload-alerts@main
 | 
			
		||||
        with:
 | 
			
		||||
          alerts: '${{ steps.alert_creation_step.outputs.script-output }}'
 | 
			
		||||
          organization: "pytorch"
 | 
			
		||||
          repo: "pytorch"
 | 
			
		||||
 | 
			
		||||
concurrency:
 | 
			
		||||
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
 | 
			
		||||
  cancel-in-progress: true
 | 
			
		||||
@ -153,7 +153,7 @@ init_command = [
 | 
			
		||||
    'junitparser==2.1.1',
 | 
			
		||||
    'rich==10.9.0',
 | 
			
		||||
    'pyyaml==6.0.1',
 | 
			
		||||
    'optree==0.12.1',
 | 
			
		||||
    'optree==0.13.0',
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[linter]]
 | 
			
		||||
@ -195,6 +195,7 @@ include_patterns = [
 | 
			
		||||
    # and excluding most sub-directories for now.
 | 
			
		||||
    'aten/src/ATen/*.h',
 | 
			
		||||
    'aten/src/ATen/*.cpp',
 | 
			
		||||
    'aten/src/ATen/cuda/*.cpp',
 | 
			
		||||
    'aten/src/ATen/cpu/*.h',
 | 
			
		||||
    'aten/src/ATen/cpu/*.cpp',
 | 
			
		||||
    'aten/src/ATen/core/*.h',
 | 
			
		||||
@ -215,6 +216,10 @@ include_patterns = [
 | 
			
		||||
    'torch/csrc/*.cpp',
 | 
			
		||||
    'torch/csrc/**/*.h',
 | 
			
		||||
    'torch/csrc/**/*.cpp',
 | 
			
		||||
    'torch/csrc/distributed/autograd/**/*.cpp',
 | 
			
		||||
    'torch/csrc/distributed/autograd/**/*.h',
 | 
			
		||||
    'torch/csrc/distributed/rpc/**/*.cpp',
 | 
			
		||||
    'torch/csrc/distributed/rpc/**/*.h',
 | 
			
		||||
    'torch/csrc/jit/serialization/*.h',
 | 
			
		||||
    'torch/csrc/jit/serialization/*.cpp',
 | 
			
		||||
]
 | 
			
		||||
@ -224,7 +229,6 @@ exclude_patterns = [
 | 
			
		||||
    # CUDA files are also excluded.
 | 
			
		||||
    '**/fb/**',
 | 
			
		||||
    '**/*pb.h',
 | 
			
		||||
    'aten/**/cuda/*pp',
 | 
			
		||||
    'c10/xpu/**/*.h',
 | 
			
		||||
    'c10/xpu/**/*.cpp',
 | 
			
		||||
    'c10/cuda/CUDAAlgorithm.h',
 | 
			
		||||
@ -246,7 +250,6 @@ exclude_patterns = [
 | 
			
		||||
    'torch/csrc/inductor/aoti_torch/c/shim.h',
 | 
			
		||||
    'torch/csrc/jit/**/*',
 | 
			
		||||
    'torch/csrc/jit/serialization/mobile_bytecode_generated.h',
 | 
			
		||||
    'torch/csrc/lazy/**/*',
 | 
			
		||||
]
 | 
			
		||||
init_command = [
 | 
			
		||||
    'python3',
 | 
			
		||||
@ -1255,7 +1258,6 @@ exclude_patterns = [
 | 
			
		||||
    'torch/fx/experimental/refinement_types.py',
 | 
			
		||||
    'torch/fx/experimental/rewriter.py',
 | 
			
		||||
    'torch/fx/experimental/schema_type_annotation.py',
 | 
			
		||||
    'torch/fx/experimental/symbolic_shapes.py',
 | 
			
		||||
    'torch/fx/experimental/unification/__init__.py',
 | 
			
		||||
    'torch/fx/experimental/unification/core.py',
 | 
			
		||||
    'torch/fx/experimental/unification/dispatch.py',
 | 
			
		||||
@ -1271,7 +1273,6 @@ exclude_patterns = [
 | 
			
		||||
    'torch/fx/experimental/unification/utils.py',
 | 
			
		||||
    'torch/fx/experimental/unification/variable.py',
 | 
			
		||||
    'torch/fx/experimental/unify_refinements.py',
 | 
			
		||||
    'torch/fx/experimental/validator.py',
 | 
			
		||||
    'torch/fx/graph.py',
 | 
			
		||||
    'torch/fx/graph_module.py',
 | 
			
		||||
    'torch/fx/interpreter.py',
 | 
			
		||||
@ -1585,6 +1586,27 @@ command = [
 | 
			
		||||
]
 | 
			
		||||
is_formatter = true
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
[[linter]]
 | 
			
		||||
code = 'META_NO_CREATE_UNBACKED'
 | 
			
		||||
include_patterns = [
 | 
			
		||||
  "torch/_meta_registrations.py"
 | 
			
		||||
]
 | 
			
		||||
command = [
 | 
			
		||||
    'python3',
 | 
			
		||||
    'tools/linter/adapters/grep_linter.py',
 | 
			
		||||
    '--pattern=create_unbacked',
 | 
			
		||||
    '--linter-name=META_NO_CREATE_UNBACKED',
 | 
			
		||||
    '--error-name=no create_unbacked in meta registrations',
 | 
			
		||||
    """--error-description=\
 | 
			
		||||
        Data-dependent operators should have their meta \
 | 
			
		||||
        registration in torch/_subclasses/fake_impls.py, \
 | 
			
		||||
        not torch/_meta_registrations.py
 | 
			
		||||
    """,
 | 
			
		||||
    '--',
 | 
			
		||||
    '@{{PATHSFILE}}'
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[linter]]
 | 
			
		||||
code = 'ATEN_CPU_GPU_AGNOSTIC'
 | 
			
		||||
include_patterns = [
 | 
			
		||||
 | 
			
		||||
@ -305,7 +305,6 @@ if(NOT DEFINED USE_VULKAN)
 | 
			
		||||
  cmake_dependent_option(USE_VULKAN "Use Vulkan GPU backend" ON "ANDROID" OFF)
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
option(USE_SLEEF_FOR_ARM_VEC256 "Use sleef for arm" OFF)
 | 
			
		||||
option(USE_SOURCE_DEBUG_ON_MOBILE "Enable" ON)
 | 
			
		||||
option(USE_LITE_INTERPRETER_PROFILER "Enable" ON)
 | 
			
		||||
cmake_dependent_option(
 | 
			
		||||
@ -369,7 +368,7 @@ cmake_dependent_option(
 | 
			
		||||
    USE_C10D_MPI "USE C10D MPI" ON "USE_DISTRIBUTED;USE_MPI" OFF)
 | 
			
		||||
cmake_dependent_option(
 | 
			
		||||
    USE_TENSORPIPE "Use TensorPipe. Only available if USE_DISTRIBUTED is on." ON
 | 
			
		||||
    "USE_DISTRIBUTED" OFF)
 | 
			
		||||
    "USE_DISTRIBUTED AND NOT WIN32" OFF)
 | 
			
		||||
option(ONNX_ML "Enable traditional ONNX ML API." ON)
 | 
			
		||||
option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF)
 | 
			
		||||
option(BUILD_LIBTORCH_CPU_WITH_DEBUG
 | 
			
		||||
@ -912,11 +911,6 @@ if(USE_PYTORCH_QNNPACK)
 | 
			
		||||
  string(APPEND CMAKE_CXX_FLAGS " -DUSE_PYTORCH_QNNPACK")
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
if(USE_SLEEF_FOR_ARM_VEC256)
 | 
			
		||||
  string(APPEND CMAKE_CXX_FLAGS " -DAT_BUILD_ARM_VEC256_WITH_SLEEF")
 | 
			
		||||
  add_definitions(-DAT_BUILD_ARM_VEC256_WITH_SLEEF)
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
# Enable sleef on macOS with Apple silicon by default
 | 
			
		||||
if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") AND ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm64"))
 | 
			
		||||
  message(STATUS "Running on macOS with Apple silicon")
 | 
			
		||||
@ -924,6 +918,14 @@ if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") AND ("${CMAKE_SYSTEM_PROCESSOR}" STR
 | 
			
		||||
  add_definitions(-DAT_BUILD_ARM_VEC256_WITH_SLEEF)
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
# Enable sleef on Arm(R) architecture by default (except Android)
 | 
			
		||||
if((NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Android")
 | 
			
		||||
  AND("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "aarch64"))
 | 
			
		||||
  string(APPEND CMAKE_CXX_FLAGS " -DAT_BUILD_ARM_VEC256_WITH_SLEEF")
 | 
			
		||||
  add_definitions(-DAT_BUILD_ARM_VEC256_WITH_SLEEF)
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if(USE_XNNPACK)
 | 
			
		||||
  string(APPEND CMAKE_CXX_FLAGS " -DUSE_XNNPACK")
 | 
			
		||||
endif()
 | 
			
		||||
@ -1081,8 +1083,16 @@ if(NOT MSVC)
 | 
			
		||||
  append_cxx_flag_if_supported("-Wno-unused-but-set-variable" CMAKE_CXX_FLAGS)
 | 
			
		||||
  append_cxx_flag_if_supported("-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS)
 | 
			
		||||
  append_cxx_flag_if_supported("-fstandalone-debug" CMAKE_CXX_FLAGS_DEBUG)
 | 
			
		||||
  string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
 | 
			
		||||
  string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
 | 
			
		||||
  if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" AND CMAKE_CXX_COMPILER_ID MATCHES "GNU")
 | 
			
		||||
    if(CMAKE_BUILD_TYPE MATCHES Debug)
 | 
			
		||||
      message(Warning "Applying -Og optimization for aarch64 GCC debug build to workaround ICE")
 | 
			
		||||
    endif()
 | 
			
		||||
    string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-omit-frame-pointer -Og")
 | 
			
		||||
    string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -Og")
 | 
			
		||||
  else()
 | 
			
		||||
    string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
 | 
			
		||||
    string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
 | 
			
		||||
  endif()
 | 
			
		||||
  append_cxx_flag_if_supported("-fno-math-errno" CMAKE_CXX_FLAGS)
 | 
			
		||||
  append_cxx_flag_if_supported("-fno-trapping-math" CMAKE_CXX_FLAGS)
 | 
			
		||||
  append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS)
 | 
			
		||||
 | 
			
		||||
@ -98,6 +98,10 @@ test/test_type_promotion.py @mruberry
 | 
			
		||||
test/functorch/test_ops.py @zou3519 @chillee @kshitij12345
 | 
			
		||||
test/functorch/test_vmap.py @zou3519 @chillee @kshitij12345
 | 
			
		||||
 | 
			
		||||
# HOPs
 | 
			
		||||
torch/_higher_order_ops/*.py @zou3519
 | 
			
		||||
torch/_dynamo/variables/higher_order_ops.py @zou3519
 | 
			
		||||
 | 
			
		||||
# torch MPS
 | 
			
		||||
test/test_mps.py @kulinseth @malfet
 | 
			
		||||
aten/src/ATen/mps/ @kulinseth @malfet
 | 
			
		||||
@ -117,7 +121,7 @@ torch/profiler/ @aaronenyeshi @sraikund16
 | 
			
		||||
test/functorch/test_aotdispatch.py @ezyang @Chillee
 | 
			
		||||
 | 
			
		||||
# Dataloader
 | 
			
		||||
torch/utils/data/ @andrewkho @gokulavasan
 | 
			
		||||
torch/utils/data/ @andrewkho @divyanshk
 | 
			
		||||
 | 
			
		||||
# hipify
 | 
			
		||||
torch/utils/hipify/ @jeffdaily @jithunnair-amd
 | 
			
		||||
 | 
			
		||||
@ -208,6 +208,8 @@ If you want to compile with ROCm support, install
 | 
			
		||||
- [AMD ROCm](https://rocm.docs.amd.com/en/latest/deploy/linux/quick_start.html) 4.0 and above installation
 | 
			
		||||
- ROCm is currently supported only for Linux systems.
 | 
			
		||||
 | 
			
		||||
By default the build system expects ROCm to be installed in `/opt/rocm`. If ROCm is installed in a different directory, the `ROCM_PATH` environment variable must be set to the ROCm installation directory. The build system automatically detects the AMD GPU architecture. Optionally, the AMD GPU architecture can be explicitly set with the `PYTORCH_ROCM_ARCH` environment variable [AMD GPU architecture](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html#supported-gpus)
 | 
			
		||||
 | 
			
		||||
If you want to disable ROCm support, export the environment variable `USE_ROCM=0`.
 | 
			
		||||
Other potentially useful environment variables may be found in `setup.py`.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										28
									
								
								RELEASE.md
									
									
									
									
									
								
							
							
						
						
									
										28
									
								
								RELEASE.md
									
									
									
									
									
								
							@ -48,16 +48,16 @@
 | 
			
		||||
 | 
			
		||||
Following is the Release Compatibility Matrix for PyTorch releases:
 | 
			
		||||
 | 
			
		||||
| PyTorch version | Python | Stable CUDA | Experimental CUDA | Stable ROCm |
 | 
			
		||||
| --- | --- | --- | --- | --- |
 | 
			
		||||
| 2.5 | >=3.9, <=3.12, (3.13 experimental) | CUDA 11.8, CUDA 12.1, CUDA 12.4, CUDNN 9.1.0.70  | None | ROCm 6.2 |
 | 
			
		||||
| 2.4 | >=3.8, <=3.12 | CUDA 11.8, CUDA 12.1, CUDNN 9.1.0.70  | CUDA 12.4, CUDNN 9.1.0.70 | ROCm 6.1 |
 | 
			
		||||
| 2.3 | >=3.8, <=3.11, (3.12 experimental) | CUDA 11.8, CUDNN 8.7.0.84 | CUDA 12.1, CUDNN 8.9.2.26 | ROCm 6.0 |
 | 
			
		||||
| 2.2 | >=3.8, <=3.11, (3.12 experimental) | CUDA 11.8, CUDNN 8.7.0.84 | CUDA 12.1, CUDNN 8.9.2.26 | ROCm 5.7 |
 | 
			
		||||
| 2.1 | >=3.8, <=3.11 | CUDA 11.8, CUDNN 8.7.0.84 | CUDA 12.1, CUDNN 8.9.2.26 | ROCm 5.6 |
 | 
			
		||||
| 2.0 | >=3.8, <=3.11 | CUDA 11.7, CUDNN 8.5.0.96 | CUDA 11.8, CUDNN 8.7.0.84 | ROCm 5.4 |
 | 
			
		||||
| 1.13 | >=3.7, <=3.10 | CUDA 11.6, CUDNN 8.3.2.44 | CUDA 11.7, CUDNN 8.5.0.96 | ROCm 5.2 |
 | 
			
		||||
| 1.12 | >=3.7, <=3.10 | CUDA 11.3, CUDNN 8.3.2.44 | CUDA 11.6, CUDNN 8.3.2.44 | ROCm 5.0 |
 | 
			
		||||
| PyTorch version | Python | C++ | Stable CUDA | Experimental CUDA | Stable ROCm |
 | 
			
		||||
| --- | --- | --- | --- | --- | --- |
 | 
			
		||||
| 2.5 | >=3.9, <=3.12, (3.13 experimental) | C++17 | CUDA 11.8, CUDA 12.1, CUDA 12.4, CUDNN 9.1.0.70  | None | ROCm 6.2 |
 | 
			
		||||
| 2.4 | >=3.8, <=3.12 | C++17 | CUDA 11.8, CUDA 12.1, CUDNN 9.1.0.70  | CUDA 12.4, CUDNN 9.1.0.70 | ROCm 6.1 |
 | 
			
		||||
| 2.3 | >=3.8, <=3.11, (3.12 experimental) | C++17 | CUDA 11.8, CUDNN 8.7.0.84 | CUDA 12.1, CUDNN 8.9.2.26 | ROCm 6.0 |
 | 
			
		||||
| 2.2 | >=3.8, <=3.11, (3.12 experimental) | C++17 | CUDA 11.8, CUDNN 8.7.0.84 | CUDA 12.1, CUDNN 8.9.2.26 | ROCm 5.7 |
 | 
			
		||||
| 2.1 | >=3.8, <=3.11 | C++17 | CUDA 11.8, CUDNN 8.7.0.84 | CUDA 12.1, CUDNN 8.9.2.26 | ROCm 5.6 |
 | 
			
		||||
| 2.0 | >=3.8, <=3.11 | C++14 | CUDA 11.7, CUDNN 8.5.0.96 | CUDA 11.8, CUDNN 8.7.0.84 | ROCm 5.4 |
 | 
			
		||||
| 1.13 | >=3.7, <=3.10 | C++14 | CUDA 11.6, CUDNN 8.3.2.44 | CUDA 11.7, CUDNN 8.5.0.96 | ROCm 5.2 |
 | 
			
		||||
| 1.12 | >=3.7, <=3.10 | C++14 | CUDA 11.3, CUDNN 8.3.2.44 | CUDA 11.6, CUDNN 8.3.2.44 | ROCm 5.0 |
 | 
			
		||||
 | 
			
		||||
## Release Cadence
 | 
			
		||||
 | 
			
		||||
@ -234,7 +234,7 @@ Typically, within a release cycle fixes are necessary for regressions, test fixe
 | 
			
		||||
For fixes that are to go into a release after the release branch has been cut we typically employ the use of a cherry pick tracker.
 | 
			
		||||
 | 
			
		||||
An example of this would look like:
 | 
			
		||||
* https://github.com/pytorch/pytorch/issues/51886
 | 
			
		||||
* https://github.com/pytorch/pytorch/issues/128436
 | 
			
		||||
 | 
			
		||||
Please also make sure to add milestone target to the PR/issue, especially if it needs to be considered for inclusion into the dot release.
 | 
			
		||||
 | 
			
		||||
@ -243,7 +243,9 @@ Please also make sure to add milestone target to the PR/issue, especially if it
 | 
			
		||||
#### How to do Cherry Picking
 | 
			
		||||
 | 
			
		||||
You can now use `pytorchbot` to cherry pick a PyTorch PR that has been committed
 | 
			
		||||
to the main branch using `@pytorchbot cherry-pick` command as follows.
 | 
			
		||||
to the main branch using `@pytorchbot cherry-pick` command as follows (make sure
 | 
			
		||||
that the cherry-pick tracker issue for the target release labelled as "release tracker" -
 | 
			
		||||
this will allow the bot to find it and post comments).
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
usage: @pytorchbot cherry-pick --onto ONTO [--fixes FIXES] -c
 | 
			
		||||
@ -380,7 +382,7 @@ Patch release process takes around 4-5 weeks to complete.
 | 
			
		||||
### Issue Tracker for Patch releases
 | 
			
		||||
 | 
			
		||||
For patch releases issue tracker needs to be created. For patch release, we require all cherry-pick changes to have links to either a high-priority GitHub issue or a CI failure from previous RC. An example of this would look like:
 | 
			
		||||
* https://github.com/pytorch/pytorch/issues/51886
 | 
			
		||||
* https://github.com/pytorch/pytorch/issues/128436
 | 
			
		||||
 | 
			
		||||
Only following issues are accepted:
 | 
			
		||||
1. Fixes to regressions against previous major version (e.g. regressions introduced in 1.13.0 from 1.12.0 are pickable for 1.13.1)
 | 
			
		||||
 | 
			
		||||
@ -467,6 +467,9 @@ if(NOT EMSCRIPTEN AND NOT INTERN_BUILD_MOBILE)
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
if(USE_CUDA AND NOT USE_ROCM)
 | 
			
		||||
  add_definitions(-DCUTLASS_ENABLE_TENSOR_CORE_MMA=1)
 | 
			
		||||
  add_definitions(-DCUTLASS_ENABLE_SM90_EXTENDED_MMA_SHAPES=1)
 | 
			
		||||
  add_definitions(-DCUTE_SM90_EXTENDED_MMA_SHAPES_ENABLED)
 | 
			
		||||
  list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/cutlass/include)
 | 
			
		||||
  list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/cutlass/tools/util/include)
 | 
			
		||||
  if($ENV{ATEN_STATIC_CUDA})
 | 
			
		||||
 | 
			
		||||
@ -145,6 +145,14 @@ void Context::setSDPUseMath(bool e) {
 | 
			
		||||
  enabled_mathSDP = e;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool Context::allowFP16BF16ReductionMathSDP() const {
 | 
			
		||||
  return allow_fp16_bf16_reduction_mathSDP;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Context::setAllowFP16BF16ReductionMathSDP(bool e) {
 | 
			
		||||
  allow_fp16_bf16_reduction_mathSDP = e;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool Context::userEnabledCuDNNSDP() const {
 | 
			
		||||
  return enabled_cudnnSDP;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -39,25 +39,16 @@ class TORCH_API Context {
 | 
			
		||||
 | 
			
		||||
  const Generator& defaultGenerator(Device device) {
 | 
			
		||||
    c10::DeviceType device_type = device.type();
 | 
			
		||||
    initCUDAIfNeeded(device_type);
 | 
			
		||||
    initHIPIfNeeded(device_type);
 | 
			
		||||
    lazyInitDevice(device_type);
 | 
			
		||||
 | 
			
		||||
    if (device_type == at::kCPU) {
 | 
			
		||||
      return at::detail::getDefaultCPUGenerator();
 | 
			
		||||
    } else if (device_type == at::kCUDA) {
 | 
			
		||||
      return at::detail::getCUDAHooks().getDefaultCUDAGenerator(device.index());
 | 
			
		||||
    } else if (device_type == at::kMPS) {
 | 
			
		||||
      return at::detail::getMPSHooks().getDefaultMPSGenerator();
 | 
			
		||||
    } else if (device_type == at::kXPU) {
 | 
			
		||||
      return at::detail::getXPUHooks().getDefaultXPUGenerator(device.index());
 | 
			
		||||
    } else if (device_type == at::kIPU) {
 | 
			
		||||
      return at::detail::getIPUHooks().getDefaultIPUGenerator(device.index());
 | 
			
		||||
    } else if (device_type == at::kPrivateUse1) {
 | 
			
		||||
      return at::detail::getPrivateUse1Hooks().getDefaultGenerator(
 | 
			
		||||
          device.index());
 | 
			
		||||
    } else {
 | 
			
		||||
      AT_ERROR(c10::DeviceTypeName(device_type), " device type not enabled.");
 | 
			
		||||
      return getAcceleratorHooksInterface(device_type)
 | 
			
		||||
          .getDefaultGenerator(device.index());
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  const AcceleratorHooksInterface& getAcceleratorHooksInterface(
 | 
			
		||||
      std::optional<c10::DeviceType> opt_device_type = std::nullopt) {
 | 
			
		||||
    c10::DeviceType device_type = opt_device_type.has_value()
 | 
			
		||||
@ -80,10 +71,10 @@ class TORCH_API Context {
 | 
			
		||||
          c10::DeviceTypeName(device_type), " device type not an accelerator.");
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  Device getDeviceFromPtr(void* data, c10::DeviceType device_type) {
 | 
			
		||||
    initCUDAIfNeeded(device_type);
 | 
			
		||||
    initHIPIfNeeded(device_type);
 | 
			
		||||
    initXPUIfNeeded(device_type);
 | 
			
		||||
    lazyInitDevice(device_type);
 | 
			
		||||
 | 
			
		||||
    if (device_type == at::kCPU) {
 | 
			
		||||
      return c10::DeviceType::CPU;
 | 
			
		||||
    } else if (device_type == at::kCUDA) {
 | 
			
		||||
@ -96,6 +87,7 @@ class TORCH_API Context {
 | 
			
		||||
      AT_ERROR(c10::DeviceTypeName(device_type), " device type not enabled.");
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  bool isPinnedPtr(
 | 
			
		||||
      const void* data,
 | 
			
		||||
      std::optional<c10::DeviceType> device_type = std::nullopt) {
 | 
			
		||||
@ -106,13 +98,22 @@ class TORCH_API Context {
 | 
			
		||||
            opt_device_type.value())) { // passed device not an accelerator
 | 
			
		||||
      return false;
 | 
			
		||||
    }
 | 
			
		||||
    return getAcceleratorHooksInterface(opt_device_type.value())
 | 
			
		||||
        .isPinnedPtr(data);
 | 
			
		||||
    return getAcceleratorHooksInterface(opt_device_type).isPinnedPtr(data);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  Allocator* getPinnedMemoryAllocator(
 | 
			
		||||
      std::optional<c10::DeviceType> device_type = std::nullopt) {
 | 
			
		||||
    return getAcceleratorHooksInterface(device_type).getPinnedMemoryAllocator();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void lazyInitDevice(c10::DeviceType device_type) {
 | 
			
		||||
    if (device_type != at::kCPU) {
 | 
			
		||||
      c10::call_once(init_[static_cast<int8_t>(device_type)], [&] {
 | 
			
		||||
        getAcceleratorHooksInterface(device_type).init();
 | 
			
		||||
      });
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static bool hasOpenMP();
 | 
			
		||||
  static bool hasMKL();
 | 
			
		||||
  static bool hasLAPACK();
 | 
			
		||||
@ -165,27 +166,6 @@ class TORCH_API Context {
 | 
			
		||||
  static bool hasMAIA() {
 | 
			
		||||
    return c10::impl::hasDeviceGuardImpl(c10::DeviceType::MAIA);
 | 
			
		||||
  }
 | 
			
		||||
  // defined in header so that getNonVariableType has ability to inline
 | 
			
		||||
  // call_once check. getNonVariableType is called fairly frequently
 | 
			
		||||
  void lazyInitCUDA() {
 | 
			
		||||
    c10::call_once(thc_init, [&] { detail::getCUDAHooks().initCUDA(); });
 | 
			
		||||
  }
 | 
			
		||||
  void lazyInitHIP() {
 | 
			
		||||
    c10::call_once(thh_init, [&] { detail::getHIPHooks().initHIP(); });
 | 
			
		||||
  }
 | 
			
		||||
  void lazyInitXPU() {
 | 
			
		||||
    c10::call_once(thx_init, [&] { detail::getXPUHooks().initXPU(); });
 | 
			
		||||
  }
 | 
			
		||||
  void lazyInitMTIA() {
 | 
			
		||||
    c10::call_once(th_mtia_init, [&] { detail::getMTIAHooks().initMTIA(); });
 | 
			
		||||
  }
 | 
			
		||||
  void lazyInitPrivateUse1() {
 | 
			
		||||
    c10::call_once(thp_init, [&] {
 | 
			
		||||
      if (isPrivateUse1HooksRegistered()) {
 | 
			
		||||
        at::detail::getPrivateUse1Hooks().initPrivateUse1();
 | 
			
		||||
      }
 | 
			
		||||
    });
 | 
			
		||||
  }
 | 
			
		||||
  static const at::cuda::NVRTC& getNVRTC() {
 | 
			
		||||
    return detail::getCUDAHooks().nvrtc();
 | 
			
		||||
  }
 | 
			
		||||
@ -234,6 +214,9 @@ class TORCH_API Context {
 | 
			
		||||
  void setSDPUseCuDNN(bool);
 | 
			
		||||
  bool userEnabledCuDNNSDP() const;
 | 
			
		||||
 | 
			
		||||
  void setAllowFP16BF16ReductionMathSDP(bool);
 | 
			
		||||
  bool allowFP16BF16ReductionMathSDP() const;
 | 
			
		||||
 | 
			
		||||
  void setSDPUseOverrideable(bool);
 | 
			
		||||
  bool userEnabledOverrideableSDP() const;
 | 
			
		||||
 | 
			
		||||
@ -358,27 +341,8 @@ class TORCH_API Context {
 | 
			
		||||
  void setAllowFP16ReductionCPU(bool);
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  void initCUDAIfNeeded(c10::DeviceType p) {
 | 
			
		||||
    if (p == c10::DeviceType::CUDA) {
 | 
			
		||||
      lazyInitCUDA();
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  void initHIPIfNeeded(c10::DeviceType p) {
 | 
			
		||||
    if (p == c10::DeviceType::HIP) {
 | 
			
		||||
      lazyInitHIP();
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  void initXPUIfNeeded(c10::DeviceType p) {
 | 
			
		||||
    if (p == c10::DeviceType::XPU) {
 | 
			
		||||
      lazyInitXPU();
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  static bool checkCuBLASConfigDeterministic();
 | 
			
		||||
  c10::once_flag thc_init;
 | 
			
		||||
  c10::once_flag thh_init;
 | 
			
		||||
  c10::once_flag thx_init;
 | 
			
		||||
  c10::once_flag th_mtia_init;
 | 
			
		||||
  c10::once_flag thp_init;
 | 
			
		||||
  std::array<c10::once_flag, at::COMPILE_TIME_MAX_DEVICE_TYPES> init_;
 | 
			
		||||
  bool enabled_cudnn = true;
 | 
			
		||||
  bool deterministic_cudnn = false;
 | 
			
		||||
  bool deterministic_mkldnn = false;
 | 
			
		||||
@ -390,6 +354,7 @@ class TORCH_API Context {
 | 
			
		||||
  bool enabled_mathSDP = true;
 | 
			
		||||
  bool enabled_cudnnSDP = true;
 | 
			
		||||
  bool enabled_overrideable = true;
 | 
			
		||||
  bool allow_fp16_bf16_reduction_mathSDP = false;
 | 
			
		||||
#ifdef USE_ROCM
 | 
			
		||||
  bool benchmark_cudnn = true;
 | 
			
		||||
#else
 | 
			
		||||
@ -509,7 +474,7 @@ inline size_t getNumGPUs() {
 | 
			
		||||
        "to be CUDA (e.g., when you say CUDA, on a HIP build of ATen, this actually "
 | 
			
		||||
        "means HIP.  Rebuild PyTorch with one or the other disabled.");
 | 
			
		||||
  } else if (hasCUDA()) {
 | 
			
		||||
    return detail::getCUDAHooks().getNumGPUs();
 | 
			
		||||
    return detail::getCUDAHooks().deviceCount();
 | 
			
		||||
  } else if (hasHIP()) {
 | 
			
		||||
    return detail::getHIPHooks().getNumGPUs();
 | 
			
		||||
  } else {
 | 
			
		||||
@ -546,7 +511,7 @@ inline void manual_seed(uint64_t seed) {
 | 
			
		||||
  }
 | 
			
		||||
  // NB: Sometimes we build with CUDA, but we don't have any GPUs
 | 
			
		||||
  // available. In that case, we must not seed CUDA; it will fail!
 | 
			
		||||
  const auto cuda_num_gpus = detail::getCUDAHooks().getNumGPUs();
 | 
			
		||||
  const auto cuda_num_gpus = detail::getCUDAHooks().deviceCount();
 | 
			
		||||
  if (hasCUDA() && cuda_num_gpus > 0) {
 | 
			
		||||
    for (const auto i : c10::irange(cuda_num_gpus)) {
 | 
			
		||||
      auto cuda_gen = globalContext().defaultGenerator(
 | 
			
		||||
@ -559,7 +524,7 @@ inline void manual_seed(uint64_t seed) {
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  const auto xpu_num_gpus = detail::getXPUHooks().getNumGPUs();
 | 
			
		||||
  const auto xpu_num_gpus = detail::getXPUHooks().deviceCount();
 | 
			
		||||
  if (hasXPU() && xpu_num_gpus) {
 | 
			
		||||
    for (const auto i : c10::irange(xpu_num_gpus)) {
 | 
			
		||||
      auto xpu_gen = globalContext().defaultGenerator(
 | 
			
		||||
 | 
			
		||||
@ -18,6 +18,8 @@ c10::Allocator* GetCPUAllocatorMaybePinned(bool pin_memory) {
 | 
			
		||||
    // To properly support this, see https://github.com/pytorch/pytorch/issues/14560
 | 
			
		||||
    if (at::globalContext().hasCUDA()) {
 | 
			
		||||
      return at::detail::getCUDAHooks().getPinnedMemoryAllocator();
 | 
			
		||||
    } else if (at::globalContext().hasMTIA()) {
 | 
			
		||||
      return at::detail::getMTIAHooks().getPinnedMemoryAllocator();
 | 
			
		||||
    } else if (at::globalContext().hasXPU()) {
 | 
			
		||||
      return at::detail::getXPUHooks().getPinnedMemoryAllocator();
 | 
			
		||||
    } else if(at::isPrivateUse1HooksRegistered()) {
 | 
			
		||||
 | 
			
		||||
@ -420,15 +420,15 @@ inline c10::MaybeOwned<Tensor> expand_size(
 | 
			
		||||
inline std::vector<Tensor> expand_outplace(TensorList to_expand) {
 | 
			
		||||
  // expands a list of Tensors; ignores undefined (null) tensors
 | 
			
		||||
  bool first = true;
 | 
			
		||||
  DimVector sizes;
 | 
			
		||||
  SymDimVector sizes;
 | 
			
		||||
  for (const auto i : c10::irange(to_expand.size())) {
 | 
			
		||||
    if (!to_expand[i].defined()) {
 | 
			
		||||
      continue;
 | 
			
		||||
    } else if (first) {
 | 
			
		||||
      sizes = to_expand[i].sizes();
 | 
			
		||||
      sizes = to_expand[i].sym_sizes();
 | 
			
		||||
      first = false;
 | 
			
		||||
    } else {
 | 
			
		||||
      sizes = infer_size_dimvector(sizes, to_expand[i].sizes());
 | 
			
		||||
      sizes = infer_size_symdimvector(sizes, to_expand[i].sym_sizes());
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@ -436,10 +436,10 @@ inline std::vector<Tensor> expand_outplace(TensorList to_expand) {
 | 
			
		||||
  for (const auto i : c10::irange(to_expand.size())) {
 | 
			
		||||
    if (!to_expand[i].defined()) {
 | 
			
		||||
      continue;
 | 
			
		||||
    } else if (to_expand[i].sizes().equals(sizes)) {
 | 
			
		||||
    } else if (to_expand[i].sym_sizes().equals(sizes)) {
 | 
			
		||||
      result[i] = to_expand[i];
 | 
			
		||||
    } else {
 | 
			
		||||
      result[i] = to_expand[i].expand(sizes);
 | 
			
		||||
      result[i] = to_expand[i].expand_symint(sizes);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return result;
 | 
			
		||||
 | 
			
		||||
@ -61,9 +61,8 @@ void set_num_threads(int nthreads) {
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef USE_PTHREADPOOL
 | 
			
		||||
  // because PyTorch uses caffe2::pthreadpool() in QNNPACK
 | 
			
		||||
  caffe2::PThreadPool* const pool = caffe2::pthreadpool();
 | 
			
		||||
  caffe2::PThreadPool* const pool = caffe2::pthreadpool(nthreads);
 | 
			
		||||
  TORCH_INTERNAL_ASSERT(pool, "Invalid thread pool!");
 | 
			
		||||
  pool->set_thread_count(nthreads);
 | 
			
		||||
#endif
 | 
			
		||||
#if AT_MKLDNN_ENABLED()
 | 
			
		||||
  at::native::mkldnn::clear_computation_cache();
 | 
			
		||||
 | 
			
		||||
@ -19,7 +19,7 @@ Tensor& scalar_fill(Tensor& self, const Scalar& value) {
 | 
			
		||||
  AT_DISPATCH_V2(
 | 
			
		||||
      self.scalar_type(), "fill_out", AT_WRAP([&]() {
 | 
			
		||||
        fill_inplace<scalar_t>(self, value);
 | 
			
		||||
      }), kComplexHalf, kHalf, kBool, kBFloat16, AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES));
 | 
			
		||||
      }), kComplexHalf, kHalf, kBool, kBFloat16, AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), AT_EXPAND(AT_FLOAT8_TYPES), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES));
 | 
			
		||||
  return self;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -144,8 +144,8 @@ class CheckSparseTensorInvariants {
 | 
			
		||||
  bool old_state;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
  CheckSparseTensorInvariants(bool state) {
 | 
			
		||||
    old_state = at::globalContext().checkSparseTensorInvariants();
 | 
			
		||||
  CheckSparseTensorInvariants(bool state)
 | 
			
		||||
      : old_state(at::globalContext().checkSparseTensorInvariants()) {
 | 
			
		||||
    at::globalContext().setCheckSparseTensorInvariants(state);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -255,7 +255,9 @@ inline Tensor applySelect(
 | 
			
		||||
    // the other hand, indexing wraping is valid for all negative int64_t
 | 
			
		||||
    // values, as x[INT64_MIN] is the same as x[INT64_MAX]
 | 
			
		||||
    TORCH_CHECK_INDEX(
 | 
			
		||||
        size > -1 - index && size > index,
 | 
			
		||||
        size.sym_gt(-1 - index)
 | 
			
		||||
            .sym_and(size.sym_gt(index))
 | 
			
		||||
            .expect_true(__FILE__, __LINE__),
 | 
			
		||||
        "index ",
 | 
			
		||||
        index,
 | 
			
		||||
        " is out of bounds for dimension ",
 | 
			
		||||
 | 
			
		||||
@ -82,7 +82,7 @@ class TORCH_API ThreadLocalState {
 | 
			
		||||
    !defined(BUILD_LITE_INTERPRETER)
 | 
			
		||||
  // TLS for autocast dtypes
 | 
			
		||||
  std::array<at::ScalarType, at::COMPILE_TIME_MAX_DEVICE_TYPES>
 | 
			
		||||
      autocast_dtypes_;
 | 
			
		||||
      autocast_dtypes_{};
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  friend class ThreadLocalStateGuard;
 | 
			
		||||
 | 
			
		||||
@ -111,17 +111,6 @@ template <
 | 
			
		||||
    typename E,
 | 
			
		||||
    typename B = HostBlock<S>>
 | 
			
		||||
struct CachingHostAllocatorImpl {
 | 
			
		||||
  CachingHostAllocatorImpl() {
 | 
			
		||||
    // Launch the background thread and process events in a loop.
 | 
			
		||||
    if (pinned_use_background_threads()) {
 | 
			
		||||
      getBackgroundThreadPool()->run([&]() {
 | 
			
		||||
        while (true) {
 | 
			
		||||
          process_events();
 | 
			
		||||
          std::this_thread::sleep_for(std::chrono::microseconds(100));
 | 
			
		||||
        }
 | 
			
		||||
      });
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  virtual ~CachingHostAllocatorImpl() = default;
 | 
			
		||||
 | 
			
		||||
 public:
 | 
			
		||||
@ -155,6 +144,17 @@ struct CachingHostAllocatorImpl {
 | 
			
		||||
      if (block) {
 | 
			
		||||
        return {block->ptr_, reinterpret_cast<void*>(block)};
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // Launch the background thread and process events in a loop.
 | 
			
		||||
      static c10::once_flag background_thread_flag;
 | 
			
		||||
      c10::call_once(background_thread_flag, [this] {
 | 
			
		||||
        getBackgroundThreadPool()->run([&]() {
 | 
			
		||||
          while (true) {
 | 
			
		||||
            process_events();
 | 
			
		||||
            std::this_thread::sleep_for(std::chrono::microseconds(100));
 | 
			
		||||
          }
 | 
			
		||||
        });
 | 
			
		||||
      });
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Slow path: if we can't allocate from the cached free list, we need
 | 
			
		||||
 | 
			
		||||
@ -13,8 +13,6 @@
 | 
			
		||||
 | 
			
		||||
#include <ATen/core/Array.h>
 | 
			
		||||
#include <c10/macros/Macros.h>
 | 
			
		||||
#include <c10/util/Exception.h>
 | 
			
		||||
#include <c10/util/Half.h>
 | 
			
		||||
#include <cmath>
 | 
			
		||||
#include <cstdint>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -45,7 +45,7 @@ private:
 | 
			
		||||
  c10::impl::LocalDispatchKeySet saved_;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void pythonFallback(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
 | 
			
		||||
void pythonFallback(const c10::OperatorHandle& op, c10::DispatchKeySet dispatch_keys, torch::jit::Stack* stack) {
 | 
			
		||||
  TORCH_INTERNAL_ASSERT(tls_on_entry.has_value());
 | 
			
		||||
  // c10::impl::ForceDispatchKeyGuard dispatcher_guard(tls_on_entry.value());
 | 
			
		||||
  // StashTLSOnEntryGuard stash_guard;
 | 
			
		||||
@ -68,12 +68,20 @@ void pythonFallback(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
 | 
			
		||||
  // we actually run dispatch(), we will take out PyObjects in the context
 | 
			
		||||
  // of that interpreter, and this will ensure that everyone is on the same
 | 
			
		||||
  // interpreter.
 | 
			
		||||
  bool tensors_with_python_key_present = false;
 | 
			
		||||
  c10::impl::PyInterpreter* interpreter = nullptr;
 | 
			
		||||
  for (const auto& ivalue : torch::jit::last(*stack, num_arguments)) {
 | 
			
		||||
    if (ivalue.isTensor()) {
 | 
			
		||||
      auto* interpreter = ivalue.unsafeToTensorImpl()->pyobj_slot()->pyobj_interpreter();
 | 
			
		||||
      if (interpreter) {
 | 
			
		||||
        (*interpreter)->dispatch(op, stack);
 | 
			
		||||
        return;
 | 
			
		||||
      auto* t = ivalue.unsafeToTensorImpl();
 | 
			
		||||
      if (t->key_set().has(c10::DispatchKey::Python)) {
 | 
			
		||||
        tensors_with_python_key_present = true;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      if (!interpreter) {
 | 
			
		||||
        auto* t_interpreter = t->pyobj_slot()->pyobj_interpreter();
 | 
			
		||||
        if (t_interpreter) {
 | 
			
		||||
          interpreter = t_interpreter;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    } else if (ivalue.isTensorList() || ivalue.isOptionalTensorList()) {
 | 
			
		||||
      // NB: use toListRef as it doesn't induce refcount bumps (toTensorListRef
 | 
			
		||||
@ -82,14 +90,43 @@ void pythonFallback(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
 | 
			
		||||
        if (nv.isNone()) {
 | 
			
		||||
          continue;
 | 
			
		||||
        }
 | 
			
		||||
        auto* interpreter = nv.unsafeToTensorImpl()->pyobj_slot()->pyobj_interpreter();
 | 
			
		||||
        if (interpreter) {
 | 
			
		||||
          (*interpreter)->dispatch(op, stack);
 | 
			
		||||
          return;
 | 
			
		||||
 | 
			
		||||
        auto* t = nv.unsafeToTensorImpl();
 | 
			
		||||
        if (t->key_set().has(c10::DispatchKey::Python)) {
 | 
			
		||||
          tensors_with_python_key_present = true;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (!interpreter) {
 | 
			
		||||
          auto* t_interpreter = t->pyobj_slot()->pyobj_interpreter();
 | 
			
		||||
          if (t_interpreter) {
 | 
			
		||||
            interpreter = t_interpreter;
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (interpreter) {
 | 
			
		||||
    if (tensors_with_python_key_present) {
 | 
			
		||||
      (*interpreter)->dispatch(op, stack);
 | 
			
		||||
    } else {
 | 
			
		||||
      // At this point, there are no modes in the stack and no tensors with the python key.
 | 
			
		||||
      // so disable the python key before redispatching.
 | 
			
		||||
      // See https://github.com/pytorch/pytorch/issues/136565
 | 
			
		||||
      c10::DispatchKeySet keyset = dispatch_keys.remove(c10::DispatchKey::Python);
 | 
			
		||||
 | 
			
		||||
      // Remove Python key from the included set as well (modes add it there).
 | 
			
		||||
      c10::impl::LocalDispatchKeySet local_keyset = c10::impl::tls_local_dispatch_key_set();
 | 
			
		||||
      c10::impl::ForceDispatchKeyGuard no_python_guard(
 | 
			
		||||
        local_keyset.included_.remove(c10::DispatchKey::Python),
 | 
			
		||||
        local_keyset.excluded_
 | 
			
		||||
      );
 | 
			
		||||
 | 
			
		||||
      op.redispatchBoxed(keyset, stack);
 | 
			
		||||
    }
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  TORCH_INTERNAL_ASSERT(0, "Hit Python dispatch key but no arguments had PyInterpreter (no tensor args?)");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -17,8 +17,22 @@ TORCH_SDT_DEFINE_SEMAPHORE(operator_end)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
bool show_dispatch_trace() {
 | 
			
		||||
    static char const* temp = getenv("TORCH_SHOW_DISPATCH_TRACE");
 | 
			
		||||
    return temp != nullptr;
 | 
			
		||||
  static auto envar = std::getenv("TORCH_SHOW_DISPATCH_TRACE");
 | 
			
		||||
 | 
			
		||||
  if (envar) {
 | 
			
		||||
    if (strcmp(envar, "0") == 0) {
 | 
			
		||||
      return false;
 | 
			
		||||
    }
 | 
			
		||||
    if (strcmp(envar, "1") == 0) {
 | 
			
		||||
      return true;
 | 
			
		||||
    }
 | 
			
		||||
    TORCH_WARN(
 | 
			
		||||
        "ignoring invalid value for TORCH_SHOW_DISPATCH_TRACE: ",
 | 
			
		||||
        envar,
 | 
			
		||||
        " valid values are 0 or 1.");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static thread_local int64_t dispatch_trace_nesting_value_;
 | 
			
		||||
 | 
			
		||||
@ -261,7 +261,7 @@ public:
 | 
			
		||||
  Vectorized<double> nextafter(const Vectorized<double> &b) const {
 | 
			
		||||
    USE_SLEEF(
 | 
			
		||||
      {
 | 
			
		||||
        return Vectorized<double>(Sleef_nextafterfx_sve(values, b));
 | 
			
		||||
        return Vectorized<double>(Sleef_nextafterdx_sve(values, b));
 | 
			
		||||
      },
 | 
			
		||||
      {
 | 
			
		||||
        __at_align__ double tmp[size()];
 | 
			
		||||
 | 
			
		||||
@ -208,8 +208,27 @@ struct VecConvert<
 | 
			
		||||
            (is_reduced_floating_point_v<src_t> && is_8bit_integer_v<dst_t>),
 | 
			
		||||
        void>> {
 | 
			
		||||
  static inline VectorizedN<dst_t, 1> apply(const VectorizedN<src_t, 1>& src) {
 | 
			
		||||
    VectorizedN<float, 1> tmp_fp32 = VecConvert<float, 1, src_t, 1>::apply(src);
 | 
			
		||||
    return VecConvert<dst_t, 1, float, 1>::apply(tmp_fp32);
 | 
			
		||||
    VectorizedN<float, 2> tmp_fp32 = VecConvert<float, 2, src_t, 1>::apply(src);
 | 
			
		||||
    return VecConvert<dst_t, 1, float, 2>::apply(tmp_fp32);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <typename dst_t>
 | 
			
		||||
struct VecConvert<
 | 
			
		||||
    dst_t,
 | 
			
		||||
    1,
 | 
			
		||||
    float,
 | 
			
		||||
    2,
 | 
			
		||||
    typename std::enable_if_t<is_8bit_integer_v<dst_t>,
 | 
			
		||||
        void>> {
 | 
			
		||||
  static inline VectorizedN<dst_t, 1> apply(const VectorizedN<float, 2>& src) {
 | 
			
		||||
    at::vec::Vectorized<dst_t> vec1 = convert_float_to_int8<dst_t>(src[0]);
 | 
			
		||||
    at::vec::Vectorized<dst_t> vec2 = convert_float_to_int8<dst_t>(src[1]);
 | 
			
		||||
    __m128 lane2 = _mm256_castps256_ps128(_mm256_castsi256_ps(vec2));
 | 
			
		||||
    __m256 combined = _mm256_insertf128_ps(_mm256_castsi256_ps(vec1), lane2, 1);
 | 
			
		||||
    // Shuffle [191:128] bit from combined in to [127:64] bit of result
 | 
			
		||||
    __m256i result = _mm256_permute4x64_epi64(_mm256_castps_si256(combined), 0b11011000);
 | 
			
		||||
    return at::vec::Vectorized<dst_t>(result);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -226,6 +245,25 @@ struct VecConvert<
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <typename src_t>
 | 
			
		||||
struct VecConvert<
 | 
			
		||||
    float,
 | 
			
		||||
    2,
 | 
			
		||||
    src_t,
 | 
			
		||||
    1,
 | 
			
		||||
    typename std::enable_if_t<is_8bit_integer_v<src_t>,
 | 
			
		||||
        void>> {
 | 
			
		||||
  static inline VectorizedN<float, 2> apply(const VectorizedN<src_t, 1>& src) {
 | 
			
		||||
    // Shuffle [127:64] bit from src[0] in to [191:128] bit of shuffled
 | 
			
		||||
    __m256i shuffled = _mm256_permute4x64_epi64(src[0], 0b11011000);
 | 
			
		||||
    __m256i src2 = _mm256_castsi128_si256(
 | 
			
		||||
      _mm_castps_si128(
 | 
			
		||||
        _mm256_extractf128_ps(_mm256_castsi256_ps(shuffled), 1) // Extract the second 128-bit lane
 | 
			
		||||
      )
 | 
			
		||||
    );
 | 
			
		||||
    return VectorizedN<float, 2>(convert_int8_to_float<src_t>(src[0]), convert_int8_to_float<src_t>(src2));
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <typename dst_t>
 | 
			
		||||
struct VecConvert<
 | 
			
		||||
@ -268,11 +306,10 @@ struct VecConvert<float, 1, BFloat16, 1> {
 | 
			
		||||
      const VectorizedN<BFloat16, 1>& src) {
 | 
			
		||||
    VectorizedN<float, 1> result;
 | 
			
		||||
    uint16x8_t u16_8 = vld1q_u16(reinterpret_cast<const uint16_t*>(&src[0]));
 | 
			
		||||
    int32x4_t shift = vdupq_n_s32(16);
 | 
			
		||||
    auto u16_low1 = vget_low_u16(u16_8);
 | 
			
		||||
    auto u16_high1 = vget_high_u16(u16_8);
 | 
			
		||||
    float32x4_t f32x4_0 = vreinterpretq_f32_u32(vshlq_u32(vmovl_u16(u16_low1), shift));
 | 
			
		||||
    float32x4_t f32x4_1 = vreinterpretq_f32_u32(vshlq_u32(vmovl_u16(u16_high1), shift));
 | 
			
		||||
    float32x4_t f32x4_0 = vreinterpretq_f32_u32(vshlq_n_u32(vmovl_u16(u16_low1), 16));
 | 
			
		||||
    float32x4_t f32x4_1 = vreinterpretq_f32_u32(vshlq_n_u32(vmovl_u16(u16_high1), 16));
 | 
			
		||||
    result[0] = {f32x4_0, f32x4_1};
 | 
			
		||||
    return result;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@ -216,27 +216,27 @@ public:
 | 
			
		||||
  }
 | 
			
		||||
  Vectorized<float> exp_u20() const {
 | 
			
		||||
    // A faster version of exp with ULP=20
 | 
			
		||||
    static __m256 vec_factorial_1 =
 | 
			
		||||
    const __m256 vec_factorial_1 =
 | 
			
		||||
        _mm256_set1_ps(0.999999701f); // 1/factorial(1)
 | 
			
		||||
    static __m256 vec_factorial_2 =
 | 
			
		||||
    const __m256 vec_factorial_2 =
 | 
			
		||||
        _mm256_set1_ps(0.499991506f); // 1/factorial(2)
 | 
			
		||||
    static __m256 vec_factorial_3 =
 | 
			
		||||
    const __m256 vec_factorial_3 =
 | 
			
		||||
        _mm256_set1_ps(0.166676521f); // 1/factorial(3)
 | 
			
		||||
    static __m256 vec_factorial_4 =
 | 
			
		||||
    const __m256 vec_factorial_4 =
 | 
			
		||||
        _mm256_set1_ps(0.0418978221f); // 1/factorial(4)
 | 
			
		||||
    static __m256 vec_factorial_5 =
 | 
			
		||||
    const __m256 vec_factorial_5 =
 | 
			
		||||
        _mm256_set1_ps(0.00828929059f); // 1/factorial(5)
 | 
			
		||||
    static __m256 vec_exp_log2ef =
 | 
			
		||||
    const __m256 vec_exp_log2ef =
 | 
			
		||||
        _mm256_castsi256_ps(_mm256_set1_epi32(0x3fb8aa3b)); // log2(e)
 | 
			
		||||
    static __m256 vec_half = _mm256_set1_ps(0.5f);
 | 
			
		||||
    static __m256 vec_one = _mm256_set1_ps(1.f);
 | 
			
		||||
    static __m256 vec_zero = _mm256_set1_ps(0.f);
 | 
			
		||||
    static __m256 vec_two = _mm256_set1_ps(2.f);
 | 
			
		||||
    static __m256 vec_ln2f = _mm256_castsi256_ps(_mm256_set1_epi32(0x3f317218)); // ln(2)
 | 
			
		||||
    static __m256 vec_ln_flt_min = _mm256_castsi256_ps(_mm256_set1_epi32(0xc2aeac50));
 | 
			
		||||
    static __m256 vec_ln_flt_max = _mm256_castsi256_ps(_mm256_set1_epi32(0x42b17218));
 | 
			
		||||
    static __m256i vec_127 = _mm256_set1_epi32(0x0000007f);
 | 
			
		||||
    static int n_mantissa_bits = 23;
 | 
			
		||||
    const __m256 vec_half = _mm256_set1_ps(0.5f);
 | 
			
		||||
    const __m256 vec_one = _mm256_set1_ps(1.f);
 | 
			
		||||
    const __m256 vec_zero = _mm256_set1_ps(0.f);
 | 
			
		||||
    const __m256 vec_two = _mm256_set1_ps(2.f);
 | 
			
		||||
    const __m256 vec_ln2f = _mm256_castsi256_ps(_mm256_set1_epi32(0x3f317218)); // ln(2)
 | 
			
		||||
    const __m256 vec_ln_flt_min = _mm256_castsi256_ps(_mm256_set1_epi32(0xc2aeac50));
 | 
			
		||||
    const __m256 vec_ln_flt_max = _mm256_castsi256_ps(_mm256_set1_epi32(0x42b17218));
 | 
			
		||||
    const __m256i vec_127 = _mm256_set1_epi32(0x0000007f);
 | 
			
		||||
    const int n_mantissa_bits = 23;
 | 
			
		||||
 | 
			
		||||
    // exp(x) =
 | 
			
		||||
    // = exp(n * ln(2) + r) // divide x by ln(2) and get quot and rem
 | 
			
		||||
 | 
			
		||||
@ -75,7 +75,7 @@ inline __m256i pack_saturate_and_clamp<int32_t>(
 | 
			
		||||
    int32_t /*min_val*/,
 | 
			
		||||
    int32_t /*max_val*/) {
 | 
			
		||||
  // This function is for linkage only, will not be used
 | 
			
		||||
  AT_ERROR("pack_saturate_and_clamp<int32_t> is not supported");
 | 
			
		||||
  TORCH_CHECK(false, "pack_saturate_and_clamp<int32_t> is not supported");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <>
 | 
			
		||||
 | 
			
		||||
@ -209,8 +209,25 @@ struct VecConvert<
 | 
			
		||||
            (is_reduced_floating_point_v<src_t> && is_8bit_integer_v<dst_t>),
 | 
			
		||||
        void>> {
 | 
			
		||||
  static inline VectorizedN<dst_t, 1> apply(const VectorizedN<src_t, 1>& src) {
 | 
			
		||||
    VectorizedN<float, 1> tmp_fp32 = VecConvert<float, 1, src_t, 1>::apply(src);
 | 
			
		||||
    return VecConvert<dst_t, 1, float, 1>::apply(tmp_fp32);
 | 
			
		||||
    VectorizedN<float, 2> tmp_fp32 = VecConvert<float, 2, src_t, 1>::apply(src);
 | 
			
		||||
    return VecConvert<dst_t, 1, float, 2>::apply(tmp_fp32);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <typename dst_t>
 | 
			
		||||
struct VecConvert<
 | 
			
		||||
    dst_t,
 | 
			
		||||
    1,
 | 
			
		||||
    float,
 | 
			
		||||
    2,
 | 
			
		||||
    typename std::enable_if_t<is_8bit_integer_v<dst_t>,
 | 
			
		||||
        void>> {
 | 
			
		||||
  static inline VectorizedN<dst_t, 1> apply(const VectorizedN<float, 2>& src) {
 | 
			
		||||
    at::vec::Vectorized<dst_t> vec1 = convert_float_to_int8<dst_t>(src[0]);
 | 
			
		||||
    at::vec::Vectorized<dst_t> vec2 = convert_float_to_int8<dst_t>(src[1]);
 | 
			
		||||
    __m128 lane2 = _mm512_castps512_ps128(_mm512_castsi512_ps(vec2));
 | 
			
		||||
    __m512 result = _mm512_insertf32x4(_mm512_castsi512_ps(vec1), lane2, 1); // Insert lane2 into the second 128-bit lane
 | 
			
		||||
    return at::vec::Vectorized<dst_t>(_mm512_castps_si512(result));
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -227,6 +244,24 @@ struct VecConvert<
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <typename src_t>
 | 
			
		||||
struct VecConvert<
 | 
			
		||||
    float,
 | 
			
		||||
    2,
 | 
			
		||||
    src_t,
 | 
			
		||||
    1,
 | 
			
		||||
    typename std::enable_if_t<is_8bit_integer_v<src_t>,
 | 
			
		||||
        void>> {
 | 
			
		||||
  static inline VectorizedN<float, 2> apply(const VectorizedN<src_t, 1>& src) {
 | 
			
		||||
    __m512i src2 = _mm512_castsi128_si512(
 | 
			
		||||
      _mm_castps_si128(
 | 
			
		||||
        _mm512_extractf32x4_ps(_mm512_castsi512_ps(src[0]), 1) // Extract the second 128-bit lane
 | 
			
		||||
      )
 | 
			
		||||
    );
 | 
			
		||||
    return VectorizedN<float, 2>(convert_int8_to_float<src_t>(src[0]), convert_int8_to_float<src_t>(src2));
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <typename src_t>
 | 
			
		||||
struct VecConvert<
 | 
			
		||||
    float,
 | 
			
		||||
 | 
			
		||||
@ -236,27 +236,27 @@ public:
 | 
			
		||||
  }
 | 
			
		||||
  Vectorized<float> exp_u20() const {
 | 
			
		||||
    // A faster version of exp with ULP=20
 | 
			
		||||
    static __m512 vec_factorial_1 =
 | 
			
		||||
    const __m512 vec_factorial_1 =
 | 
			
		||||
        _mm512_set1_ps(0.999999701f); // 1/factorial(1)
 | 
			
		||||
    static __m512 vec_factorial_2 =
 | 
			
		||||
    const __m512 vec_factorial_2 =
 | 
			
		||||
        _mm512_set1_ps(0.499991506f); // 1/factorial(2)
 | 
			
		||||
    static __m512 vec_factorial_3 =
 | 
			
		||||
    const __m512 vec_factorial_3 =
 | 
			
		||||
        _mm512_set1_ps(0.166676521f); // 1/factorial(3)
 | 
			
		||||
    static __m512 vec_factorial_4 =
 | 
			
		||||
    const __m512 vec_factorial_4 =
 | 
			
		||||
        _mm512_set1_ps(0.0418978221f); // 1/factorial(4)
 | 
			
		||||
    static __m512 vec_factorial_5 =
 | 
			
		||||
    const __m512 vec_factorial_5 =
 | 
			
		||||
        _mm512_set1_ps(0.00828929059f); // 1/factorial(5)
 | 
			
		||||
    static __m512 vec_exp_log2ef =
 | 
			
		||||
    const __m512 vec_exp_log2ef =
 | 
			
		||||
        _mm512_castsi512_ps(_mm512_set1_epi32(0x3fb8aa3b)); // log2(e)
 | 
			
		||||
    static __m512 vec_half = _mm512_set1_ps(0.5f);
 | 
			
		||||
    static __m512 vec_one = _mm512_set1_ps(1.f);
 | 
			
		||||
    static __m512 vec_zero = _mm512_set1_ps(0.f);
 | 
			
		||||
    static __m512 vec_two = _mm512_set1_ps(2.f);
 | 
			
		||||
    static __m512 vec_ln2f = _mm512_castsi512_ps(_mm512_set1_epi32(0x3f317218)); // ln(2)
 | 
			
		||||
    static __m512 vec_ln_flt_min = _mm512_castsi512_ps(_mm512_set1_epi32(0xc2aeac50));
 | 
			
		||||
    static __m512 vec_ln_flt_max = _mm512_castsi512_ps(_mm512_set1_epi32(0x42b17218));
 | 
			
		||||
    static __m512i vec_127 = _mm512_set1_epi32(0x0000007f);
 | 
			
		||||
    static int n_mantissa_bits = 23;
 | 
			
		||||
    const __m512 vec_half = _mm512_set1_ps(0.5f);
 | 
			
		||||
    const __m512 vec_one = _mm512_set1_ps(1.f);
 | 
			
		||||
    const __m512 vec_zero = _mm512_set1_ps(0.f);
 | 
			
		||||
    const __m512 vec_two = _mm512_set1_ps(2.f);
 | 
			
		||||
    const __m512 vec_ln2f = _mm512_castsi512_ps(_mm512_set1_epi32(0x3f317218)); // ln(2)
 | 
			
		||||
    const __m512 vec_ln_flt_min = _mm512_castsi512_ps(_mm512_set1_epi32(0xc2aeac50));
 | 
			
		||||
    const __m512 vec_ln_flt_max = _mm512_castsi512_ps(_mm512_set1_epi32(0x42b17218));
 | 
			
		||||
    const __m512i vec_127 = _mm512_set1_epi32(0x0000007f);
 | 
			
		||||
    const int n_mantissa_bits = 23;
 | 
			
		||||
 | 
			
		||||
    // exp(x) =
 | 
			
		||||
    // = exp(n * ln(2) + r) // divide x by ln(2) and get quot and rem
 | 
			
		||||
 | 
			
		||||
@ -77,7 +77,7 @@ inline __m512i pack_saturate_and_clamp<int32_t>(
 | 
			
		||||
    int32_t min_val [[maybe_unused]],
 | 
			
		||||
    int32_t max_val [[maybe_unused]]) {
 | 
			
		||||
  // This function is for linkage only, will not be used
 | 
			
		||||
  AT_ERROR("pack_saturate_and_clamp<int32_t> is not supported");
 | 
			
		||||
  TORCH_CHECK(false, "pack_saturate_and_clamp<int32_t> is not supported");
 | 
			
		||||
  return __m512i{};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -125,7 +125,7 @@ void CUDAGraph::capture_begin(MempoolId_t pool/*=0*/, cudaStreamCaptureMode capt
 | 
			
		||||
  // due to the capture status being updated _after_ a capture had already started.
 | 
			
		||||
  c10::cuda::CUDACachingAllocator::beginAllocateToPool(capture_dev_, mempool_id_, [this](cudaStream_t stream) {
 | 
			
		||||
      cudaStreamCaptureStatus status;
 | 
			
		||||
      CaptureId_t stream_capture_id;
 | 
			
		||||
      CaptureId_t stream_capture_id = 0;
 | 
			
		||||
      AT_CUDA_CHECK(cudaStreamGetCaptureInfo(stream, &status, &stream_capture_id));
 | 
			
		||||
      return status == cudaStreamCaptureStatus::cudaStreamCaptureStatusActive && stream_capture_id == capture_id_;
 | 
			
		||||
  });
 | 
			
		||||
@ -160,7 +160,7 @@ void CUDAGraph::capture_end() {
 | 
			
		||||
 | 
			
		||||
  c10::cuda::CUDACachingAllocator::endAllocateToPool(capture_dev_, mempool_id_);
 | 
			
		||||
 | 
			
		||||
  TORCH_CHECK(graph_ != NULL, "Invalid capture.");
 | 
			
		||||
  TORCH_CHECK(graph_ != nullptr, "Invalid capture.");
 | 
			
		||||
  has_graph_ = true;
 | 
			
		||||
 | 
			
		||||
  // In typical graph usage some tensors (e.g. the tensors used for graph IO) are not freed
 | 
			
		||||
@ -175,7 +175,7 @@ void CUDAGraph::capture_end() {
 | 
			
		||||
  // cudaGraphInstantiateWithFlags
 | 
			
		||||
  // https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__GRAPH.html#group__CUDART__GRAPH_1ga2c652a24ba93e52b99a47bec0888233
 | 
			
		||||
#if (defined(CUDA_VERSION) && CUDA_VERSION >= 11040)
 | 
			
		||||
  int version;
 | 
			
		||||
  int version = 0;
 | 
			
		||||
  AT_CUDA_CHECK(cudaDriverGetVersion(&version));
 | 
			
		||||
  if (version < 11040) {
 | 
			
		||||
#endif
 | 
			
		||||
@ -203,7 +203,7 @@ void CUDAGraph::capture_end() {
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  size_t numCUDAGraphNodes = 0;
 | 
			
		||||
  AT_CUDA_CHECK(cudaGraphGetNodes(graph_, NULL, &numCUDAGraphNodes));
 | 
			
		||||
  AT_CUDA_CHECK(cudaGraphGetNodes(graph_, nullptr, &numCUDAGraphNodes));
 | 
			
		||||
  if (numCUDAGraphNodes == 0) {
 | 
			
		||||
      TORCH_WARN("The CUDA Graph is empty. This usually means that the graph was ",
 | 
			
		||||
                 "attempted to be captured on wrong device or stream.");
 | 
			
		||||
@ -233,7 +233,7 @@ void CUDAGraph::replay() {
 | 
			
		||||
  // graph_exec_ may be replayed in any stream.
 | 
			
		||||
  AT_CUDA_CHECK(cudaGraphLaunch(graph_exec_, at::cuda::getCurrentCUDAStream()));
 | 
			
		||||
 | 
			
		||||
  int version;
 | 
			
		||||
  int version = 0;
 | 
			
		||||
  AT_CUDA_CHECK(cudaDriverGetVersion(&version));
 | 
			
		||||
  if (version < 11040) {
 | 
			
		||||
    // Workaround for bug in libcuda.so that causes replayed graphs with
 | 
			
		||||
 | 
			
		||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user