[aoti] Add cpp loader

ghstack-source-id: ccb800e2667132afdd1ab6f2b974be635f581e24 Pull Request resolved: https://github.com/pytorch/pytorch/pull/134865
2025-10-28 02:04:53 +08:00 · 2024-08-30 14:26:09 -07:00
2477 changed files with 58839 additions and 106739 deletions
--- a/.buckconfig.oss
+++ b/.buckconfig.oss
@ -21,3 +21,6 @@
  cxx = /usr/bin/clang++
  cxxpp = /usr/bin/clang++
  ld = /usr/bin/clang++
+
+[project]
+  default_flavors_mode=all
--- a/.ci/docker/aotriton_version.txt
+++ b/.ci/docker/aotriton_version.txt
@ -1,5 +1,5 @@
-0.7b
+0.6b
 manylinux_2_17
 rocm6.2
-9be04068c3c0857a4cfd17d7e39e71d0423ebac2
-3e9e1959d23b93d78a08fcc5f868125dc3854dece32fd9458be9ef4467982291
+7f07e8a1cb1f99627eb6d77f5c0e9295c775f3c7
+e4ab195d2bd19e939c675a13280c29714c6ef9f2cf420690da150fa0cac043b1
--- a/.ci/docker/build.sh
+++ b/.ci/docker/build.sh
@ -236,7 +236,7 @@ case "$image" in
    TRITON=yes
    ;;
  pytorch-linux-focal-py3-clang10-onnx)
-    ANACONDA_PYTHON_VERSION=3.9
+    ANACONDA_PYTHON_VERSION=3.8
    CLANG_VERSION=10
    PROTOBUF=yes
    DB=yes
@ -245,7 +245,7 @@ case "$image" in
    ONNX=yes
    ;;
  pytorch-linux-focal-py3-clang9-android-ndk-r21e)
-    ANACONDA_PYTHON_VERSION=3.9
+    ANACONDA_PYTHON_VERSION=3.8
    CLANG_VERSION=9
    LLVMDEV=yes
    PROTOBUF=yes
@ -254,8 +254,8 @@ case "$image" in
    GRADLE_VERSION=6.8.3
    NINJA_VERSION=1.9.0
    ;;
-  pytorch-linux-focal-py3.9-clang10)
-    ANACONDA_PYTHON_VERSION=3.9
+  pytorch-linux-focal-py3.8-clang10)
+    ANACONDA_PYTHON_VERSION=3.8
    CLANG_VERSION=10
    PROTOBUF=yes
    DB=yes
@ -276,8 +276,8 @@ case "$image" in
    CONDA_CMAKE=yes
    TRITON=yes
    ;;
-  pytorch-linux-focal-py3.9-gcc9)
-    ANACONDA_PYTHON_VERSION=3.9
+  pytorch-linux-focal-py3.8-gcc9)
+    ANACONDA_PYTHON_VERSION=3.8
    GCC_VERSION=9
    PROTOBUF=yes
    DB=yes
@ -286,23 +286,23 @@ case "$image" in
    TRITON=yes
    ;;
  pytorch-linux-focal-rocm-n-1-py3)
-    ANACONDA_PYTHON_VERSION=3.10
+    ANACONDA_PYTHON_VERSION=3.8
    GCC_VERSION=9
    PROTOBUF=yes
    DB=yes
    VISION=yes
-    ROCM_VERSION=6.1
+    ROCM_VERSION=6.0
    NINJA_VERSION=1.9.0
    CONDA_CMAKE=yes
    TRITON=yes
    ;;
  pytorch-linux-focal-rocm-n-py3)
-    ANACONDA_PYTHON_VERSION=3.10
+    ANACONDA_PYTHON_VERSION=3.8
    GCC_VERSION=9
    PROTOBUF=yes
    DB=yes
    VISION=yes
-    ROCM_VERSION=6.2
+    ROCM_VERSION=6.1
    NINJA_VERSION=1.9.0
    CONDA_CMAKE=yes
    TRITON=yes
@ -318,8 +318,8 @@ case "$image" in
    CONDA_CMAKE=yes
    TRITON=yes
    ;;
-    pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks)
-    ANACONDA_PYTHON_VERSION=3.9
+    pytorch-linux-jammy-py3.8-gcc11-inductor-benchmarks)
+    ANACONDA_PYTHON_VERSION=3.8
    GCC_VERSION=11
    PROTOBUF=yes
    DB=yes
@ -330,8 +330,8 @@ case "$image" in
    DOCS=yes
    INDUCTOR_BENCHMARKS=yes
    ;;
-  pytorch-linux-jammy-cuda11.8-cudnn9-py3.9-clang12)
-    ANACONDA_PYTHON_VERSION=3.9
+  pytorch-linux-jammy-cuda11.8-cudnn9-py3.8-clang12)
+    ANACONDA_PYTHON_VERSION=3.8
    CUDA_VERSION=11.8
    CUDNN_VERSION=9
    CLANG_VERSION=12
@ -355,14 +355,8 @@ case "$image" in
    CONDA_CMAKE=yes
    VISION=yes
    ;;
-  pytorch-linux-jammy-py3-clang18-asan)
-    ANACONDA_PYTHON_VERSION=3.10
-    CLANG_VERSION=18
-    CONDA_CMAKE=yes
-    VISION=yes
-    ;;
-  pytorch-linux-jammy-py3.9-gcc11)
-    ANACONDA_PYTHON_VERSION=3.9
+  pytorch-linux-jammy-py3.8-gcc11)
+    ANACONDA_PYTHON_VERSION=3.8
    GCC_VERSION=11
    PROTOBUF=yes
    DB=yes
@ -385,14 +379,6 @@ case "$image" in
    GCC_VERSION=11
    CONDA_CMAKE=yes
    HALIDE=yes
-    TRITON=yes
-    ;;
-  pytorch-linux-jammy-py3.12-triton-cpu)
-    CUDA_VERSION=12.4
-    ANACONDA_PYTHON_VERSION=3.12
-    GCC_VERSION=11
-    CONDA_CMAKE=yes
-    TRITON_CPU=yes
    ;;
  pytorch-linux-focal-linter)
    # TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
@ -523,7 +509,6 @@ docker build \
       --build-arg "UCC_COMMIT=${UCC_COMMIT}" \
       --build-arg "CONDA_CMAKE=${CONDA_CMAKE}" \
       --build-arg "TRITON=${TRITON}" \
-       --build-arg "TRITON_CPU=${TRITON_CPU}" \
       --build-arg "ONNX=${ONNX}" \
       --build-arg "DOCS=${DOCS}" \
       --build-arg "INDUCTOR_BENCHMARKS=${INDUCTOR_BENCHMARKS}" \
--- a/.ci/docker/centos-rocm/Dockerfile
+++ b/.ci/docker/centos-rocm/Dockerfile
@ -108,10 +108,10 @@ ENV CMAKE_C_COMPILER cc
 ENV CMAKE_CXX_COMPILER c++
 COPY ./common/install_triton.sh install_triton.sh
 COPY ./common/common_utils.sh common_utils.sh
-COPY ci_commit_pins/triton.txt triton.txt
+COPY ci_commit_pins/triton-rocm.txt triton-rocm.txt
 COPY triton_version.txt triton_version.txt
 RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
-RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt
+RUN rm install_triton.sh common_utils.sh triton-rocm.txt triton_version.txt

 # Install AOTriton (Early fail)
 COPY ./aotriton_version.txt aotriton_version.txt
--- a/.ci/docker/ci_commit_pins/executorch.txt
+++ b/.ci/docker/ci_commit_pins/executorch.txt
@ -1 +1 @@
-cd1c833b079adb324871dcbbe75b43d42ffc0ade
+69472e5c43481324ad923ceb29392ab72830acee
--- a/.ci/docker/ci_commit_pins/halide.txt
+++ b/.ci/docker/ci_commit_pins/halide.txt
@ -1 +1 @@
-461c12871f336fe6f57b55d6a297f13ef209161b
+340136fec6d3ebc73e7a19eba1663e9b0ba8ab2d
--- a/.ci/docker/ci_commit_pins/triton-cpu.txt
+++ b/.ci/docker/ci_commit_pins/triton-cpu.txt
@ -1 +0,0 @@
-c7711371cace304afe265c1ffa906415ab82fc66
--- a/.ci/docker/ci_commit_pins/triton-rocm.txt
+++ b/.ci/docker/ci_commit_pins/triton-rocm.txt
@ -0,0 +1 @@
+21eae954efa5bf584da70324b640288c3ee7aede
--- a/.ci/docker/ci_commit_pins/triton-xpu.txt
+++ b/.ci/docker/ci_commit_pins/triton-xpu.txt
@ -1 +1 @@
-91b14bf5593cf58a8541f3e6b9125600a867d4ef
+1b2f15840e0d70eec50d84c7a0575cb835524def
--- a/.ci/docker/ci_commit_pins/triton.txt
+++ b/.ci/docker/ci_commit_pins/triton.txt
@ -1 +1 @@
-cf34004b8a67d290a962da166f5aa2fc66751326
+dedb7bdf339a3546896d4820366ca562c586bfa0
--- a/.ci/docker/common/install_aotriton.sh
+++ b/.ci/docker/common/install_aotriton.sh
@ -4,12 +4,12 @@ set -ex

 source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"

-TARBALL='aotriton.tar.gz'
+TARBALL='aotriton.tar.bz2'
 # This read command alwasy returns with exit code 1
 read -d "\n" VER MANYLINUX ROCMBASE PINNED_COMMIT SHA256 < aotriton_version.txt || true
 ARCH=$(uname -m)
 AOTRITON_INSTALL_PREFIX="$1"
-AOTRITON_URL="https://github.com/ROCm/aotriton/releases/download/${VER}/aotriton-${VER}-${MANYLINUX}_${ARCH}-${ROCMBASE}-shared.tar.gz"
+AOTRITON_URL="https://github.com/ROCm/aotriton/releases/download/${VER}/aotriton-${VER}-${MANYLINUX}_${ARCH}-${ROCMBASE}-shared.tar.bz2"

 cd "${AOTRITON_INSTALL_PREFIX}"
 # Must use -L to follow redirects
--- a/.ci/docker/common/install_clang.sh
+++ b/.ci/docker/common/install_clang.sh
@ -13,18 +13,11 @@ if [ -n "$CLANG_VERSION" ]; then
  elif [[ $UBUNTU_VERSION == 22.04 ]]; then
    # work around ubuntu apt-get conflicts
    sudo apt-get -y -f install
-    wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add  -
-    if [[ $CLANG_VERSION == 18 ]]; then
-      apt-add-repository "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main"
-    fi
  fi

  sudo apt-get update
-  if [[ $CLANG_VERSION -ge 18 ]]; then
-    apt-get install -y libomp-${CLANG_VERSION}-dev libclang-rt-${CLANG_VERSION}-dev clang-"$CLANG_VERSION" llvm-"$CLANG_VERSION"
-  else
-    apt-get install -y --no-install-recommends clang-"$CLANG_VERSION" llvm-"$CLANG_VERSION"
-  fi
+  apt-get install -y --no-install-recommends clang-"$CLANG_VERSION"
+  apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION"

  # Install dev version of LLVM.
  if [ -n "$LLVMDEV" ]; then
--- a/.ci/docker/common/install_conda.sh
+++ b/.ci/docker/common/install_conda.sh
@ -5,22 +5,32 @@ set -ex
 # Optionally install conda
 if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
  BASE_URL="https://repo.anaconda.com/miniconda"
-  CONDA_FILE="Miniconda3-latest-Linux-x86_64.sh"
-  if [[ $(uname -m) == "aarch64" ]] || [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
-    BASE_URL="https://github.com/conda-forge/miniforge/releases/latest/download"
-    CONDA_FILE="Miniforge3-Linux-$(uname -m).sh"
-  fi

  MAJOR_PYTHON_VERSION=$(echo "$ANACONDA_PYTHON_VERSION" | cut -d . -f 1)
  MINOR_PYTHON_VERSION=$(echo "$ANACONDA_PYTHON_VERSION" | cut -d . -f 2)

+if [[ $(uname -m) == "aarch64" ]]; then
+  BASE_URL="https://github.com/conda-forge/miniforge/releases/latest/download"
  case "$MAJOR_PYTHON_VERSION" in
-    3);;
+    3)
+      CONDA_FILE="Miniforge3-Linux-aarch64.sh"
+    ;;
    *)
      echo "Unsupported ANACONDA_PYTHON_VERSION: $ANACONDA_PYTHON_VERSION"
      exit 1
      ;;
  esac
+else
+  case "$MAJOR_PYTHON_VERSION" in
+    3)
+      CONDA_FILE="Miniconda3-latest-Linux-x86_64.sh"
+    ;;
+    *)
+      echo "Unsupported ANACONDA_PYTHON_VERSION: $ANACONDA_PYTHON_VERSION"
+      exit 1
+      ;;
+  esac
+fi

  mkdir -p /opt/conda
  chown jenkins:jenkins /opt/conda
@ -65,10 +75,23 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then

  # Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README
  if [[ $(uname -m) == "aarch64" ]]; then
-    conda_install "openblas==0.3.25=*openmp*"
+    CONDA_COMMON_DEPS="astunparse pyyaml setuptools openblas==0.3.25=*openmp* ninja==1.11.1 scons==4.5.2"
+
+    if [ "$ANACONDA_PYTHON_VERSION" = "3.8" ]; then
+      NUMPY_VERSION=1.24.4
+    else
+      NUMPY_VERSION=1.26.2
+    fi
  else
-    conda_install "mkl=2021.4.0 mkl-include=2021.4.0"
+    CONDA_COMMON_DEPS="astunparse pyyaml mkl=2021.4.0 mkl-include=2021.4.0 setuptools"
+
+    if [ "$ANACONDA_PYTHON_VERSION" = "3.11" ] || [ "$ANACONDA_PYTHON_VERSION" = "3.12" ] || [ "$ANACONDA_PYTHON_VERSION" = "3.13" ]; then
+      NUMPY_VERSION=1.26.0
+    else
+      NUMPY_VERSION=1.21.2
+    fi
  fi
+  conda_install ${CONDA_COMMON_DEPS}

  # Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
  # and libpython-static for torch deploy
@ -90,6 +113,8 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then

  # Install some other packages, including those needed for Python test reporting
  pip_install -r /opt/conda/requirements-ci.txt
+  pip_install numpy=="$NUMPY_VERSION"
+  pip_install -U scikit-learn

  if [ -n "$DOCS" ]; then
    apt-get update
--- a/.ci/docker/common/install_cpython.sh
+++ b/.ci/docker/common/install_cpython.sh
@ -7,7 +7,7 @@ PYTHON_DOWNLOAD_GITHUB_BRANCH=https://github.com/python/cpython/archive/refs/hea
 GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py

 # Python versions to be installed in /opt/$VERSION_NO
-CPYTHON_VERSIONS=${CPYTHON_VERSIONS:-"3.8.1 3.9.0 3.10.1 3.11.0 3.12.0 3.13.0 3.13.0t"}
+CPYTHON_VERSIONS=${CPYTHON_VERSIONS:-"3.8.1 3.9.0 3.10.1 3.11.0 3.12.0 3.13.0"}

 function check_var {
    if [ -z "$1" ]; then
@ -22,13 +22,6 @@ function do_cpython_build {
    check_var $py_ver
    check_var $py_folder
    tar -xzf Python-$py_ver.tgz
-
-    local additional_flags=""
-    if [ "$py_ver" == "3.13.0t" ]; then
-        additional_flags=" --disable-gil"
-        mv cpython-3.13/ cpython-3.13t/
-    fi
-
    pushd $py_folder

    local prefix="/opt/_internal/cpython-${py_ver}"
@ -44,10 +37,8 @@ function do_cpython_build {
        local openssl_flags="--with-openssl=${WITH_OPENSSL} --with-openssl-rpath=auto"
    fi

-
-
    # -Wformat added for https://bugs.python.org/issue17547 on Python 2.6
-    CFLAGS="-Wformat" ./configure --prefix=${prefix} ${openssl_flags} ${shared_flags} ${additional_flags} > /dev/null
+    CFLAGS="-Wformat" ./configure --prefix=${prefix} ${openssl_flags} ${shared_flags} > /dev/null

    make -j40 > /dev/null
    make install > /dev/null
@ -78,14 +69,7 @@ function build_cpython {
    check_var $py_ver
    check_var $PYTHON_DOWNLOAD_URL
    local py_ver_folder=$py_ver
-
-    if [ "$py_ver" = "3.13.0t" ]; then
-        PY_VER_SHORT="3.13"
-        PYT_VER_SHORT="3.13t"
-        check_var $PYTHON_DOWNLOAD_GITHUB_BRANCH
-        wget $PYTHON_DOWNLOAD_GITHUB_BRANCH/$PY_VER_SHORT.tar.gz -O Python-$py_ver.tgz
-        do_cpython_build $py_ver cpython-$PYT_VER_SHORT
-    elif [ "$py_ver" = "3.13.0" ]; then
+    if [ "$py_ver" = "3.13.0" ]; then
        PY_VER_SHORT="3.13"
        check_var $PYTHON_DOWNLOAD_GITHUB_BRANCH
        wget $PYTHON_DOWNLOAD_GITHUB_BRANCH/$PY_VER_SHORT.tar.gz -O Python-$py_ver.tgz
--- a/.ci/docker/common/install_cuda.sh
+++ b/.ci/docker/common/install_cuda.sh
@ -105,7 +105,7 @@ function install_121 {
 }

 function install_124 {
-  echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
+  echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
  rm -rf /usr/local/cuda-12.4 /usr/local/cuda
  # install CUDA 12.4.1 in the same container
  wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux.run
--- a/.ci/docker/common/install_cuda_aarch64.sh
+++ b/.ci/docker/common/install_cuda_aarch64.sh
@ -5,19 +5,19 @@ set -ex

 NCCL_VERSION=v2.21.5-1

-function install_cusparselt_062 {
+function install_cusparselt_052 {
    # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
    mkdir tmp_cusparselt && pushd tmp_cusparselt
-    wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.6.2.3-archive.tar.xz
-    tar xf libcusparse_lt-linux-sbsa-0.6.2.3-archive.tar.xz
-    cp -a libcusparse_lt-linux-sbsa-0.6.2.3-archive/include/* /usr/local/cuda/include/
-    cp -a libcusparse_lt-linux-sbsa-0.6.2.3-archive/lib/* /usr/local/cuda/lib64/
+    wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.5.2.1-archive.tar.xz
+    tar xf libcusparse_lt-linux-sbsa-0.5.2.1-archive.tar.xz
+    cp -a libcusparse_lt-linux-sbsa-0.5.2.1-archive/include/* /usr/local/cuda/include/
+    cp -a libcusparse_lt-linux-sbsa-0.5.2.1-archive/lib/* /usr/local/cuda/lib64/
    popd
    rm -rf tmp_cusparselt
 }

 function install_124 {
-  echo "Installing CUDA 12.4.1 and cuDNN 9.1 and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
+  echo "Installing CUDA 12.4.1 and cuDNN 9.1 and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
  rm -rf /usr/local/cuda-12.4 /usr/local/cuda
  # install CUDA 12.4.1 in the same container
  wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux_sbsa.run
@ -44,7 +44,7 @@ function install_124 {
  cd ..
  rm -rf nccl

-  install_cusparselt_062
+  install_cusparselt_052

  ldconfig
 }
--- a/.ci/docker/common/install_cusparselt.sh
+++ b/.ci/docker/common/install_cusparselt.sh
@ -5,7 +5,7 @@ set -ex
 # cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html
 mkdir tmp_cusparselt && cd tmp_cusparselt

-if [[ ${CUDA_VERSION:0:4} =~ ^12\.[2-6]$ ]]; then
+if [[ ${CUDA_VERSION:0:4} =~ ^12\.[2-4]$ ]]; then
    arch_path='sbsa'
    export TARGETARCH=${TARGETARCH:-$(uname -m)}
    if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
--- a/.ci/docker/common/install_miopen.sh
+++ b/.ci/docker/common/install_miopen.sh
@ -10,21 +10,6 @@ if [[ -z $ROCM_VERSION ]]; then
    exit 1;
 fi

-IS_UBUNTU=0
-ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
-case "$ID" in
-  ubuntu)
-    IS_UBUNTU=1
-    ;;
-  centos)
-    IS_UBUNTU=0
-    ;;
-  *)
-    echo "Unable to determine OS..."
-    exit 1
-    ;;
-esac
-
 # To make version comparison easier, create an integer representation.
 save_IFS="$IFS"
 IFS=. ROCM_VERSION_ARRAY=(${ROCM_VERSION})
@ -72,11 +57,9 @@ MIOPEN_CMAKE_COMMON_FLAGS="
 -DMIOPEN_BUILD_DRIVER=OFF
 "
 # Pull MIOpen repo and set DMIOPEN_EMBED_DB based on ROCm version
-if [[ $ROCM_INT -ge 60300 ]]; then
-    echo "ROCm 6.3+ MIOpen does not need any patches, do not build from source"
+if [[ $ROCM_INT -ge 60200 ]] && [[ $ROCM_INT -lt 60300 ]]; then
+    echo "ROCm 6.2 MIOpen does not need any patches, do not build from source"
    exit 0
-elif [[ $ROCM_INT -ge 60200 ]] && [[ $ROCM_INT -lt 60300 ]]; then
-    MIOPEN_BRANCH="release/rocm-rel-6.2-staging"
 elif [[ $ROCM_INT -ge 60100 ]] && [[ $ROCM_INT -lt 60200 ]]; then
    echo "ROCm 6.1 MIOpen does not need any patches, do not build from source"
    exit 0
@ -110,21 +93,12 @@ else
    exit 1
 fi

-
-if [[ ${IS_UBUNTU} == 1 ]]; then
-  apt-get remove -y miopen-hip
-else
-  yum remove -y miopen-hip
-fi
+yum remove -y miopen-hip

 git clone https://github.com/ROCm/MIOpen -b ${MIOPEN_BRANCH}
 pushd MIOpen
 # remove .git to save disk space since CI runner was running out
 rm -rf .git
-# Don't build CK to save docker build time
-if [[ $ROCM_INT -ge 60200 ]]; then
-    sed -i '/composable_kernel/d' requirements.txt
-fi
 # Don't build MLIR to save docker build time
 # since we are disabling MLIR backend for MIOpen anyway
 if [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then
@ -137,15 +111,10 @@ cmake -P install_deps.cmake --minimum

 # clean up since CI runner was running out of disk space
 rm -rf /tmp/*
-if [[ ${IS_UBUNTU} == 1 ]]; then
-  apt-get autoclean && apt-get clean
-  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
-else
-  yum clean all
-  rm -rf /var/cache/yum
-  rm -rf /var/lib/yum/yumdb
-  rm -rf /var/lib/yum/history
-fi
+yum clean all
+rm -rf /var/cache/yum
+rm -rf /var/lib/yum/yumdb
+rm -rf /var/lib/yum/history

 ## Build MIOpen
 mkdir -p build
@ -162,11 +131,7 @@ make -j $(nproc) package
 # clean up since CI runner was running out of disk space
 rm -rf /usr/local/cget

-if [[ ${IS_UBUNTU} == 1 ]]; then
-  sudo dpkg -i miopen-hip*.deb
-else
-  yum install -y miopen-*.rpm
-fi
+yum install -y miopen-*.rpm

 popd
 rm -rf MIOpen
--- a/.ci/docker/common/install_onnx.sh
+++ b/.ci/docker/common/install_onnx.sh
@ -15,7 +15,7 @@ pip_install \
  flatbuffers==2.0 \
  mock==5.0.1 \
  ninja==1.10.2 \
-  networkx==2.5 \
+  networkx==2.0 \
  numpy==1.24.2

 # ONNXRuntime should be installed before installing
@ -30,9 +30,10 @@ pip_install \

 pip_install coloredlogs packaging

-pip_install onnxruntime==1.18.1
-pip_install onnx==1.16.2
-pip_install onnxscript==0.1.0.dev20241009 --no-deps
+pip_install onnxruntime==1.18
+pip_install onnx==1.16.0
+# pip_install "onnxscript@git+https://github.com/microsoft/onnxscript@3e869ef8ccf19b5ebd21c10d3e9c267c9a9fa729" --no-deps
+pip_install onnxscript==0.1.0.dev20240613 --no-deps
 # required by onnxscript
 pip_install ml_dtypes

--- a/.ci/docker/common/install_triton.sh
+++ b/.ci/docker/common/install_triton.sh
@ -12,14 +12,14 @@ conda_reinstall() {
  as_jenkins conda install -q -n py_$ANACONDA_PYTHON_VERSION -y --force-reinstall $*
 }

-if [ -n "${XPU_VERSION}" ]; then
+if [ -n "${ROCM_VERSION}" ]; then
+  TRITON_REPO="https://github.com/openai/triton"
+  TRITON_TEXT_FILE="triton-rocm"
+elif [ -n "${XPU_VERSION}" ]; then
  TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton"
  TRITON_TEXT_FILE="triton-xpu"
-elif [ -n "${TRITON_CPU}" ]; then
-  TRITON_REPO="https://github.com/triton-lang/triton-cpu"
-  TRITON_TEXT_FILE="triton-cpu"
 else
-  TRITON_REPO="https://github.com/triton-lang/triton"
+  TRITON_REPO="https://github.com/openai/triton"
  TRITON_TEXT_FILE="triton"
 fi

@ -47,10 +47,9 @@ chown -R jenkins /var/lib/jenkins/triton
 chgrp -R jenkins /var/lib/jenkins/triton
 pushd /var/lib/jenkins/

-as_jenkins git clone --recursive ${TRITON_REPO} triton
+as_jenkins git clone ${TRITON_REPO} triton
 cd triton
 as_jenkins git checkout ${TRITON_PINNED_COMMIT}
-as_jenkins git submodule update --init --recursive
 cd python

 # TODO: remove patch setup.py once we have a proper fix for https://github.com/triton-lang/triton/issues/4527
--- a/.ci/docker/conda/build.sh
+++ b/.ci/docker/conda/build.sh
@ -37,12 +37,6 @@ esac

 (
  set -x
-  # TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
-  # is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
-  sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
-  sudo systemctl daemon-reload
-  sudo systemctl restart docker
-
  docker build \
    --target final \
    --progress plain \
--- a/.ci/docker/manywheel/Dockerfile
+++ b/.ci/docker/manywheel/Dockerfile
@ -10,7 +10,6 @@ ENV LANG en_US.UTF-8
 ENV LANGUAGE en_US.UTF-8

 ARG DEVTOOLSET_VERSION=9
-
 # Note: This is required patch since CentOS have reached EOL
 # otherwise any yum install setp will fail
 RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
--- a/.ci/docker/manywheel/build.sh
+++ b/.ci/docker/manywheel/build.sh
@ -124,14 +124,7 @@ if [[ -n ${MANY_LINUX_VERSION} && -z ${DOCKERFILE_SUFFIX} ]]; then
 fi
 (
    set -x
-
-    # TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
-    # is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
-    sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
-    sudo systemctl daemon-reload
-    sudo systemctl restart docker
-
-    DOCKER_BUILDKIT=1 docker build  \
+    DOCKER_BUILDKIT=1 docker build \
        ${DOCKER_GPU_BUILD_ARG} \
        --build-arg "GPU_IMAGE=${GPU_IMAGE}" \
        --target "${TARGET}" \
--- a/.ci/docker/requirements-ci.txt
+++ b/.ci/docker/requirements-ci.txt
@ -30,14 +30,9 @@ dill==0.3.7
 #Pinned versions: 0.3.7
 #test that import: dynamo/test_replay_record.py test_dataloader.py test_datapipe.py test_serialization.py

-expecttest==0.2.1
+expecttest==0.1.6
 #Description: method for writing tests where test framework auto populates
 # the expected output based on previous runs
-#Pinned versions: 0.2.1
-#test that import:
-
-fbscribelogger==0.1.6
-#Description: write to scribe from authenticated jobs on CI
 #Pinned versions: 0.1.6
 #test that import:

@ -90,7 +85,7 @@ librosa>=0.6.2 ; python_version < "3.11"
 #Pinned versions:
 #test that import:

-mypy==1.11.2
+mypy==1.10.0
 # Pin MyPy version because new errors are likely to appear with each release
 #Description: linter
 #Pinned versions: 1.10.0
@ -109,7 +104,7 @@ networkx==2.8.8
 #test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py

 numba==0.49.0 ; python_version < "3.9"
-numba==0.55.2 ; python_version == "3.9"
+numba==0.54.1 ; python_version == "3.9"
 numba==0.55.2 ; python_version == "3.10"
 #Description: Just-In-Time Compiler for Numerical Functions
 #Pinned versions: 0.54.1, 0.49.0, <=0.49.1
@ -118,7 +113,7 @@ numba==0.55.2 ; python_version == "3.10"

 #numpy
 #Description: Provides N-dimensional arrays and linear algebra
-#Pinned versions: 1.26.2
+#Pinned versions: 1.20
 #test that import: test_view_ops.py, test_unary_ufuncs.py, test_type_promotion.py,
 #test_type_info.py, test_torch.py, test_tensorexpr_pybind.py, test_tensorexpr.py,
 #test_tensorboard.py, test_tensor_creation_ops.py, test_static_runtime.py,
@ -128,10 +123,6 @@ numba==0.55.2 ; python_version == "3.10"
 #test_nn.py, test_namedtensor.py, test_linalg.py, test_jit_cuda_fuser.py,
 #test_jit.py, test_indexing.py, test_datapipe.py, test_dataloader.py,
 #test_binary_ufuncs.py
-numpy==1.21.2; python_version == "3.9"
-numpy==1.22.4; python_version == "3.10"
-numpy==1.26.2; python_version == "3.11" or python_version == "3.12"
-numpy==2.1.2; python_version >= "3.13"

 #onnxruntime
 #Description: scoring engine for Open Neural Network Exchange (ONNX) models
@ -143,9 +134,9 @@ opt-einsum==3.3
 #Pinned versions: 3.3
 #test that import: test_linalg.py

-optree==0.13.0
+optree==0.12.1
 #Description: A library for tree manipulation
-#Pinned versions: 0.13.0
+#Pinned versions: 0.12.1
 #test that import: test_vmap.py, test_aotdispatch.py, test_dynamic_shapes.py,
 #test_pytree.py, test_ops.py, test_control_flow.py, test_modules.py,
 #common_utils.py, test_eager_transforms.py, test_python_dispatch.py,
@ -326,6 +317,7 @@ lxml==5.0.0

 PyGithub==2.3.0

+sympy==1.12.1 ; python_version == "3.8"
 sympy==1.13.1 ; python_version >= "3.9"
 #Description: Required by coremltools, also pinned in .github/requirements/pip-requirements-macOS.txt
 #Pinned versions:
@ -340,31 +332,3 @@ onnxscript==0.1.0.dev20240817
 #Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
 #Pinned versions:
 #test that import:
-
-parameterized==0.8.1
-#Description: Parameterizes unittests, both the tests themselves and the entire testing class
-#Pinned versions:
-#test that import:
-
-#Description: required for testing torch/distributed/_tools/sac_estimator.py
-#Pinned versions: 1.24.0
-#test that import: test_sac_estimator.py
-
-pwlf==2.2.1 ; python_version >= "3.8"
-#Description: required for testing torch/distributed/_tools/sac_estimator.py
-#Pinned versions: 2.2.1
-#test that import: test_sac_estimator.py
-
-
-# To build PyTorch itself
-astunparse
-PyYAML
-setuptools
-
-ninja==1.11.1 ; platform_machine == "aarch64"
-scons==4.5.2 ; platform_machine == "aarch64"
-
-pulp==2.9.0 ; python_version >= "3.8"
-#Description: required for testing ilp formulaiton under torch/distributed/_tools
-#Pinned versions: 2.9.0
-#test that import: test_sac_ilp.py
--- a/.ci/docker/triton_version.txt
+++ b/.ci/docker/triton_version.txt
@ -1 +1 @@
-3.1.0
+3.0.0
--- a/.ci/docker/ubuntu-rocm/Dockerfile
+++ b/.ci/docker/ubuntu-rocm/Dockerfile
@ -68,8 +68,6 @@ RUN rm install_rocm.sh
 COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
 RUN bash ./install_rocm_magma.sh
 RUN rm install_rocm_magma.sh
-ADD ./common/install_miopen.sh install_miopen.sh
-RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
 ENV ROCM_PATH /opt/rocm
 ENV PATH /opt/rocm/bin:$PATH
 ENV PATH /opt/rocm/hcc/bin:$PATH
@ -102,10 +100,10 @@ ARG TRITON
 # try to reach out to S3, which docker build runners don't have access
 COPY ./common/install_triton.sh install_triton.sh
 COPY ./common/common_utils.sh common_utils.sh
-COPY ci_commit_pins/triton.txt triton.txt
+COPY ci_commit_pins/triton-rocm.txt triton-rocm.txt
 COPY triton_version.txt triton_version.txt
 RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
-RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt
+RUN rm install_triton.sh common_utils.sh triton-rocm.txt triton_version.txt

 # Install AOTriton
 COPY ./aotriton_version.txt aotriton_version.txt
@ -123,8 +121,5 @@ RUN bash ./install_cache.sh && rm install_cache.sh
 ARG BUILD_ENVIRONMENT
 ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}

-# Install LLVM dev version (Defined in the pytorch/builder github repository)
-COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
-
 USER jenkins
 CMD ["bash"]
--- a/.ci/docker/ubuntu-xpu/Dockerfile
+++ b/.ci/docker/ubuntu-xpu/Dockerfile
@ -30,7 +30,6 @@ RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh
 ARG ANACONDA_PYTHON_VERSION
 ARG CONDA_CMAKE
 ARG DOCS
-ARG BUILD_ENVIRONMENT
 ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
 ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
 ENV DOCS=$DOCS
--- a/.ci/docker/ubuntu/Dockerfile
+++ b/.ci/docker/ubuntu/Dockerfile
@ -147,13 +147,6 @@ COPY ci_commit_pins/triton.txt triton.txt
 RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
 RUN rm install_triton.sh common_utils.sh triton.txt

-ARG TRITON_CPU
-COPY ./common/install_triton.sh install_triton.sh
-COPY ./common/common_utils.sh common_utils.sh
-COPY ci_commit_pins/triton-cpu.txt triton-cpu.txt
-RUN if [ -n "${TRITON_CPU}" ]; then bash ./install_triton.sh; fi
-RUN rm install_triton.sh common_utils.sh triton-cpu.txt
-
 ARG EXECUTORCH
 # Build and install executorch
 COPY ./common/install_executorch.sh install_executorch.sh
--- a/.ci/libtorch/build.sh
+++ b/.ci/libtorch/build.sh
@ -1,10 +0,0 @@
-#!/usr/bin/env bash
-
-# This is mostly just a shim to manywheel/build.sh
-# TODO: Make this a dedicated script to build just libtorch
-
-set -ex
-
-SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-
-USE_CUSPARSELT=0 BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.9" ${SCRIPTPATH}/../manywheel/build.sh
--- a/.ci/manywheel/LICENSE
+++ b/.ci/manywheel/LICENSE
@ -1,21 +0,0 @@
-The MIT License (MIT)
-
-Copyright (c) 2016 manylinux
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
--- a/.ci/manywheel/build.sh
+++ b/.ci/manywheel/build.sh
@ -1,25 +0,0 @@
-#!/usr/bin/env bash
-
-set -ex
-
-SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-
-case "${GPU_ARCH_TYPE:-BLANK}" in
-    BLANK)
-        # Legacy behavior for CircleCI
-        bash "${SCRIPTPATH}/build_cuda.sh"
-        ;;
-    cuda)
-        bash "${SCRIPTPATH}/build_cuda.sh"
-        ;;
-    rocm)
-        bash "${SCRIPTPATH}/build_rocm.sh"
-        ;;
-    cpu | cpu-cxx11-abi | cpu-s390x | xpu)
-        bash "${SCRIPTPATH}/build_cpu.sh"
-        ;;
-    *)
-        echo "Un-recognized GPU_ARCH_TYPE '${GPU_ARCH_TYPE}', exiting..."
-        exit 1
-        ;;
-esac
--- a/.ci/manywheel/build_common.sh
+++ b/.ci/manywheel/build_common.sh
@ -1,505 +0,0 @@
-#!/usr/bin/env bash
-# meant to be called only from the neighboring build.sh and build_cpu.sh scripts
-
-set -ex
-SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
-
-
-# Require only one python installation
-if [[ -z "$DESIRED_PYTHON" ]]; then
-    echo "Need to set DESIRED_PYTHON env variable"
-    exit 1
-fi
-if [[ -n "$BUILD_PYTHONLESS" && -z "$LIBTORCH_VARIANT" ]]; then
-    echo "BUILD_PYTHONLESS is set, so need LIBTORCH_VARIANT to also be set"
-    echo "LIBTORCH_VARIANT should be one of shared-with-deps shared-without-deps static-with-deps static-without-deps"
-    exit 1
-fi
-
-# Function to retry functions that sometimes timeout or have flaky failures
-retry () {
-    $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
-}
-
-# TODO move this into the Docker images
-OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release)
-if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
-    retry yum install -q -y zip openssl
-elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then
-    retry yum install -q -y zip openssl
-elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then
-    retry dnf install -q -y zip openssl
-elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
-    # TODO: Remove this once nvidia package repos are back online
-    # Comment out nvidia repositories to prevent them from getting apt-get updated, see https://github.com/pytorch/pytorch/issues/74968
-    # shellcheck disable=SC2046
-    sed -i 's/.*nvidia.*/# &/' $(find /etc/apt/ -type f -name "*.list")
-
-    retry apt-get update
-    retry apt-get -y install zip openssl
-fi
-
-# We use the package name to test the package by passing this to 'pip install'
-# This is the env variable that setup.py uses to name the package. Note that
-# pip 'normalizes' the name first by changing all - to _
-if [[ -z "$TORCH_PACKAGE_NAME" ]]; then
-    TORCH_PACKAGE_NAME='torch'
-fi
-
-if [[ -z "$TORCH_NO_PYTHON_PACKAGE_NAME" ]]; then
-    TORCH_NO_PYTHON_PACKAGE_NAME='torch_no_python'
-fi
-
-TORCH_PACKAGE_NAME="$(echo $TORCH_PACKAGE_NAME | tr '-' '_')"
-TORCH_NO_PYTHON_PACKAGE_NAME="$(echo $TORCH_NO_PYTHON_PACKAGE_NAME | tr '-' '_')"
-echo "Expecting the built wheels to all be called '$TORCH_PACKAGE_NAME' or '$TORCH_NO_PYTHON_PACKAGE_NAME'"
-
-# Version: setup.py uses $PYTORCH_BUILD_VERSION.post$PYTORCH_BUILD_NUMBER if
-# PYTORCH_BUILD_NUMBER > 1
-build_version="$PYTORCH_BUILD_VERSION"
-build_number="$PYTORCH_BUILD_NUMBER"
-if [[ -n "$OVERRIDE_PACKAGE_VERSION" ]]; then
-    # This will be the *exact* version, since build_number<1
-    build_version="$OVERRIDE_PACKAGE_VERSION"
-    build_number=0
-fi
-if [[ -z "$build_version" ]]; then
-    build_version=1.0.0
-fi
-if [[ -z "$build_number" ]]; then
-    build_number=1
-fi
-export PYTORCH_BUILD_VERSION=$build_version
-export PYTORCH_BUILD_NUMBER=$build_number
-
-export CMAKE_LIBRARY_PATH="/opt/intel/lib:/lib:$CMAKE_LIBRARY_PATH"
-export CMAKE_INCLUDE_PATH="/opt/intel/include:$CMAKE_INCLUDE_PATH"
-
-if [[ -e /opt/openssl ]]; then
-    export OPENSSL_ROOT_DIR=/opt/openssl
-    export CMAKE_INCLUDE_PATH="/opt/openssl/include":$CMAKE_INCLUDE_PATH
-fi
-
-# If given a python version like 3.6m or 2.7mu, convert this to the format we
-# expect. The binary CI jobs pass in python versions like this; they also only
-# ever pass one python version, so we assume that DESIRED_PYTHON is not a list
-# in this case
-if [[ -n "$DESIRED_PYTHON" && $DESIRED_PYTHON =~ ([0-9].[0-9]+)t ]]; then
-    python_digits="$(echo $DESIRED_PYTHON | tr -cd [:digit:])"
-    py_majmin="${DESIRED_PYTHON}"
-    DESIRED_PYTHON="cp${python_digits}-cp${python_digits}t"
-elif [[ -n "$DESIRED_PYTHON" && "$DESIRED_PYTHON" != cp* ]]; then
-    python_nodot="$(echo $DESIRED_PYTHON | tr -d m.u)"
-    DESIRED_PYTHON="cp${python_nodot}-cp${python_nodot}"
-    if [[ ${python_nodot} -ge 310 ]]; then
-        py_majmin="${DESIRED_PYTHON:2:1}.${DESIRED_PYTHON:3:2}"
-    else
-        py_majmin="${DESIRED_PYTHON:2:1}.${DESIRED_PYTHON:3:1}"
-    fi
-fi
-
-pydir="/opt/python/$DESIRED_PYTHON"
-export PATH="$pydir/bin:$PATH"
-echo "Will build for Python version: ${DESIRED_PYTHON} with ${python_installation}"
-
-mkdir -p /tmp/$WHEELHOUSE_DIR
-
-export PATCHELF_BIN=/usr/local/bin/patchelf
-patchelf_version=$($PATCHELF_BIN --version)
-echo "patchelf version: " $patchelf_version
-if [[ "$patchelf_version" == "patchelf 0.9" ]]; then
-    echo "Your patchelf version is too old. Please use version >= 0.10."
-    exit 1
-fi
-
-########################################################
-# Compile wheels as well as libtorch
-#######################################################
-if [[ -z "$PYTORCH_ROOT" ]]; then
-    echo "Need to set PYTORCH_ROOT env variable"
-    exit 1
-fi
-pushd "$PYTORCH_ROOT"
-python setup.py clean
-retry pip install -qr requirements.txt
-case ${DESIRED_PYTHON} in
-  cp31*)
-    retry pip install -q --pre numpy==2.1.0
-    ;;
-  # Should catch 3.9+
-  *)
-    retry pip install -q --pre numpy==2.0.2
-    ;;
-esac
-
-if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
-    export _GLIBCXX_USE_CXX11_ABI=1
-else
-    export _GLIBCXX_USE_CXX11_ABI=0
-fi
-
-if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
-    echo "Calling build_amd.py at $(date)"
-    python tools/amd_build/build_amd.py
-fi
-
-# This value comes from binary_linux_build.sh (and should only be set to true
-# for master / release branches)
-BUILD_DEBUG_INFO=${BUILD_DEBUG_INFO:=0}
-
-if [[ $BUILD_DEBUG_INFO == "1" ]]; then
-    echo "Building wheel and debug info"
-else
-    echo "BUILD_DEBUG_INFO was not set, skipping debug info"
-fi
-
-if [[ "$DISABLE_RCCL" = 1 ]]; then
-    echo "Disabling NCCL/RCCL in pyTorch"
-    USE_RCCL=0
-    USE_NCCL=0
-    USE_KINETO=0
-else
-    USE_RCCL=1
-    USE_NCCL=1
-    USE_KINETO=1
-fi
-
-echo "Calling setup.py bdist at $(date)"
-
-if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
-    echo "Calling setup.py bdist_wheel for split build (BUILD_LIBTORCH_WHL)"
-    time EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \
-    BUILD_LIBTORCH_WHL=1 BUILD_PYTHON_ONLY=0 \
-    BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \
-    USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \
-    python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR
-    echo "Finished setup.py bdist_wheel for split build (BUILD_LIBTORCH_WHL)"
-    echo "Calling setup.py bdist_wheel for split build (BUILD_PYTHON_ONLY)"
-    time EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \
-    BUILD_LIBTORCH_WHL=0 BUILD_PYTHON_ONLY=1 \
-    BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \
-    USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \
-    python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR --cmake
-    echo "Finished setup.py bdist_wheel for split build (BUILD_PYTHON_ONLY)"
-else
-    time CMAKE_ARGS=${CMAKE_ARGS[@]} \
-        EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \
-        BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \
-        USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \
-        python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR
-fi
-echo "Finished setup.py bdist at $(date)"
-
-# Build libtorch packages
-if [[ -n "$BUILD_PYTHONLESS" ]]; then
-    # Now build pythonless libtorch
-    # Note - just use whichever python we happen to be on
-    python setup.py clean
-
-    if [[ $LIBTORCH_VARIANT = *"static"* ]]; then
-        STATIC_CMAKE_FLAG="-DTORCH_STATIC=1"
-    fi
-
-    mkdir -p build
-    pushd build
-    echo "Calling tools/build_libtorch.py at $(date)"
-    time CMAKE_ARGS=${CMAKE_ARGS[@]} \
-         EXTRA_CAFFE2_CMAKE_FLAGS="${EXTRA_CAFFE2_CMAKE_FLAGS[@]} $STATIC_CMAKE_FLAG" \
-         python ../tools/build_libtorch.py
-    echo "Finished tools/build_libtorch.py at $(date)"
-    popd
-
-    mkdir -p libtorch/{lib,bin,include,share}
-    cp -r build/build/lib libtorch/
-
-    # for now, the headers for the libtorch package will just be copied in
-    # from one of the wheels (this is from when this script built multiple
-    # wheels at once)
-    ANY_WHEEL=$(ls /tmp/$WHEELHOUSE_DIR/torch*.whl | head -n1)
-    unzip -d any_wheel $ANY_WHEEL
-    if [[ -d any_wheel/torch/include ]]; then
-        cp -r any_wheel/torch/include libtorch/
-    else
-        cp -r any_wheel/torch/lib/include libtorch/
-    fi
-    cp -r any_wheel/torch/share/cmake libtorch/share/
-    rm -rf any_wheel
-
-    echo $PYTORCH_BUILD_VERSION > libtorch/build-version
-    echo "$(pushd $PYTORCH_ROOT && git rev-parse HEAD)" > libtorch/build-hash
-
-    mkdir -p /tmp/$LIBTORCH_HOUSE_DIR
-
-    if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
-        LIBTORCH_ABI="cxx11-abi-"
-    else
-        LIBTORCH_ABI=
-    fi
-
-    zip -rq /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip libtorch
-    cp /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip \
-       /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-latest.zip
-fi
-
-popd
-
-#######################################################################
-# ADD DEPENDENCIES INTO THE WHEEL
-#
-# auditwheel repair doesn't work correctly and is buggy
-# so manually do the work of copying dependency libs and patchelfing
-# and fixing RECORDS entries correctly
-######################################################################
-
-fname_with_sha256() {
-    HASH=$(sha256sum $1 | cut -c1-8)
-    DIRNAME=$(dirname $1)
-    BASENAME=$(basename $1)
-    # Do not rename nvrtc-builtins.so as they are dynamically loaded
-    # by libnvrtc.so
-    # Similarly don't mangle libcudnn and libcublas library names
-    if [[ $BASENAME == "libnvrtc-builtins.s"* || $BASENAME == "libcudnn"* || $BASENAME == "libcublas"*  ]]; then
-        echo $1
-    else
-        INITNAME=$(echo $BASENAME | cut -f1 -d".")
-        ENDNAME=$(echo $BASENAME | cut -f 2- -d".")
-        echo "$DIRNAME/$INITNAME-$HASH.$ENDNAME"
-    fi
-}
-
-fname_without_so_number() {
-    LINKNAME=$(echo $1 | sed -e 's/\.so.*/.so/g')
-    echo "$LINKNAME"
-}
-
-make_wheel_record() {
-    FPATH=$1
-    if echo $FPATH | grep RECORD >/dev/null 2>&1; then
-        # if the RECORD file, then
-        echo "$FPATH,,"
-    else
-        HASH=$(openssl dgst -sha256 -binary $FPATH | openssl base64 | sed -e 's/+/-/g' | sed -e 's/\//_/g' | sed -e 's/=//g')
-        FSIZE=$(ls -nl $FPATH | awk '{print $5}')
-        echo "$FPATH,sha256=$HASH,$FSIZE"
-    fi
-}
-
-replace_needed_sofiles() {
-    find $1 -name '*.so*' | while read sofile; do
-        origname=$2
-        patchedname=$3
-        if [[ "$origname" != "$patchedname" ]] || [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
-            set +e
-            origname=$($PATCHELF_BIN --print-needed $sofile | grep "$origname.*")
-            ERRCODE=$?
-            set -e
-            if [ "$ERRCODE" -eq "0" ]; then
-                echo "patching $sofile entry $origname to $patchedname"
-                $PATCHELF_BIN --replace-needed $origname $patchedname $sofile
-            fi
-        fi
-    done
-}
-
-echo 'Built this wheel:'
-ls /tmp/$WHEELHOUSE_DIR
-mkdir -p "/$WHEELHOUSE_DIR"
-mv /tmp/$WHEELHOUSE_DIR/torch*linux*.whl /$WHEELHOUSE_DIR/
-
-if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
-    mv /tmp/$WHEELHOUSE_DIR/torch_no_python*.whl /$WHEELHOUSE_DIR/ || true
-fi
-
-if [[ -n "$BUILD_PYTHONLESS" ]]; then
-    mkdir -p /$LIBTORCH_HOUSE_DIR
-    mv /tmp/$LIBTORCH_HOUSE_DIR/*.zip /$LIBTORCH_HOUSE_DIR
-    rm -rf /tmp/$LIBTORCH_HOUSE_DIR
-fi
-rm -rf /tmp/$WHEELHOUSE_DIR
-rm -rf /tmp_dir
-mkdir /tmp_dir
-pushd /tmp_dir
-
-for pkg in /$WHEELHOUSE_DIR/torch_no_python*.whl /$WHEELHOUSE_DIR/torch*linux*.whl /$LIBTORCH_HOUSE_DIR/libtorch*.zip; do
-
-    # if the glob didn't match anything
-    if [[ ! -e $pkg ]]; then
-        continue
-    fi
-
-    rm -rf tmp
-    mkdir -p tmp
-    cd tmp
-    cp $pkg .
-
-    unzip -q $(basename $pkg)
-    rm -f $(basename $pkg)
-
-    if [[ -d torch ]]; then
-        PREFIX=torch
-    else
-        PREFIX=libtorch
-    fi
-
-    if [[ $pkg != *"without-deps"* ]]; then
-        # copy over needed dependent .so files over and tag them with their hash
-        patched=()
-        for filepath in "${DEPS_LIST[@]}"; do
-            filename=$(basename $filepath)
-            destpath=$PREFIX/lib/$filename
-            if [[ "$filepath" != "$destpath" ]]; then
-                cp $filepath $destpath
-            fi
-
-            # ROCm workaround for roctracer dlopens
-            if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
-                patchedpath=$(fname_without_so_number $destpath)
-            # Keep the so number for XPU dependencies
-            elif [[ "$DESIRED_CUDA" == *"xpu"* ]]; then
-                patchedpath=$destpath
-            else
-                patchedpath=$(fname_with_sha256 $destpath)
-            fi
-            patchedname=$(basename $patchedpath)
-            if [[ "$destpath" != "$patchedpath" ]]; then
-                mv $destpath $patchedpath
-            fi
-            patched+=("$patchedname")
-            echo "Copied $filepath to $patchedpath"
-        done
-
-        echo "patching to fix the so names to the hashed names"
-        for ((i=0;i<${#DEPS_LIST[@]};++i)); do
-            replace_needed_sofiles $PREFIX ${DEPS_SONAME[i]} ${patched[i]}
-            # do the same for caffe2, if it exists
-            if [[ -d caffe2 ]]; then
-                replace_needed_sofiles caffe2 ${DEPS_SONAME[i]} ${patched[i]}
-            fi
-        done
-
-        # copy over needed auxiliary files
-        for ((i=0;i<${#DEPS_AUX_SRCLIST[@]};++i)); do
-            srcpath=${DEPS_AUX_SRCLIST[i]}
-            dstpath=$PREFIX/${DEPS_AUX_DSTLIST[i]}
-            mkdir -p $(dirname $dstpath)
-            cp $srcpath $dstpath
-        done
-    fi
-
-    # set RPATH of _C.so and similar to $ORIGIN, $ORIGIN/lib
-    find $PREFIX -maxdepth 1 -type f -name "*.so*" | while read sofile; do
-        echo "Setting rpath of $sofile to ${C_SO_RPATH:-'$ORIGIN:$ORIGIN/lib'}"
-        $PATCHELF_BIN --set-rpath ${C_SO_RPATH:-'$ORIGIN:$ORIGIN/lib'} ${FORCE_RPATH:-} $sofile
-        $PATCHELF_BIN --print-rpath $sofile
-    done
-
-    # set RPATH of lib/ files to $ORIGIN
-    find $PREFIX/lib -maxdepth 1 -type f -name "*.so*" | while read sofile; do
-        echo "Setting rpath of $sofile to ${LIB_SO_RPATH:-'$ORIGIN'}"
-        $PATCHELF_BIN --set-rpath ${LIB_SO_RPATH:-'$ORIGIN'} ${FORCE_RPATH:-} $sofile
-        $PATCHELF_BIN --print-rpath $sofile
-    done
-
-    # regenerate the RECORD file with new hashes
-    record_file=$(echo $(basename $pkg) | sed -e 's/-cp.*$/.dist-info\/RECORD/g')
-    if [[ -e $record_file ]]; then
-        echo "Generating new record file $record_file"
-        : > "$record_file"
-        # generate records for folders in wheel
-        find * -type f | while read fname; do
-            make_wheel_record "$fname" >>"$record_file"
-        done
-    fi
-
-    if [[ $BUILD_DEBUG_INFO == "1" ]]; then
-        pushd "$PREFIX/lib"
-
-        # Duplicate library into debug lib
-        cp libtorch_cpu.so libtorch_cpu.so.dbg
-
-        # Keep debug symbols on debug lib
-        strip --only-keep-debug libtorch_cpu.so.dbg
-
-        # Remove debug info from release lib
-        strip --strip-debug libtorch_cpu.so
-
-        objcopy libtorch_cpu.so --add-gnu-debuglink=libtorch_cpu.so.dbg
-
-        # Zip up debug info
-        mkdir -p /tmp/debug
-        mv libtorch_cpu.so.dbg /tmp/debug/libtorch_cpu.so.dbg
-        CRC32=$(objcopy --dump-section .gnu_debuglink=>(tail -c4 | od -t x4 -An | xargs echo) libtorch_cpu.so)
-
-        pushd /tmp
-        PKG_NAME=$(basename "$pkg" | sed 's/\.whl$//g')
-        zip /tmp/debug-whl-libtorch-"$PKG_NAME"-"$CRC32".zip /tmp/debug/libtorch_cpu.so.dbg
-        cp /tmp/debug-whl-libtorch-"$PKG_NAME"-"$CRC32".zip "$PYTORCH_FINAL_PACKAGE_DIR"
-        popd
-
-        popd
-    fi
-
-    # zip up the wheel back
-    zip -rq $(basename $pkg) $PREIX*
-
-    # replace original wheel
-    rm -f $pkg
-    mv $(basename $pkg) $pkg
-    cd ..
-    rm -rf tmp
-done
-
-# Copy wheels to host machine for persistence before testing
-if [[ -n "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
-    mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
-    if [[ -n "$BUILD_PYTHONLESS" ]]; then
-        cp /$LIBTORCH_HOUSE_DIR/libtorch*.zip "$PYTORCH_FINAL_PACKAGE_DIR"
-    else
-        cp /$WHEELHOUSE_DIR/torch*.whl "$PYTORCH_FINAL_PACKAGE_DIR"
-    fi
-fi
-
-# remove stuff before testing
-rm -rf /opt/rh
-if ls /usr/local/cuda* >/dev/null 2>&1; then
-    rm -rf /usr/local/cuda*
-fi
-
-
-# Test that all the wheels work
-if [[ -z "$BUILD_PYTHONLESS" ]]; then
-  export OMP_NUM_THREADS=4 # on NUMA machines this takes too long
-  pushd $PYTORCH_ROOT/test
-
-  # Install the wheel for this Python version
-  if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
-    pip uninstall -y "$TORCH_NO_PYTHON_PACKAGE_NAME" || true
-  fi
-
-  pip uninstall -y "$TORCH_PACKAGE_NAME"
-
-  if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
-    pip install "$TORCH_NO_PYTHON_PACKAGE_NAME" --no-index -f /$WHEELHOUSE_DIR --no-dependencies -v
-  fi
-
-  pip install "$TORCH_PACKAGE_NAME" --no-index -f /$WHEELHOUSE_DIR --no-dependencies -v
-
-  # Print info on the libraries installed in this wheel
-  # Rather than adjust find command to skip non-library files with an embedded *.so* in their name,
-  # since this is only for reporting purposes, we add the || true to the ldd command.
-  installed_libraries=($(find "$pydir/lib/python${py_majmin}/site-packages/torch/" -name '*.so*'))
-  echo "The wheel installed all of the libraries: ${installed_libraries[@]}"
-  for installed_lib in "${installed_libraries[@]}"; do
-      ldd "$installed_lib" || true
-  done
-
-  # Run the tests
-  echo "$(date) :: Running tests"
-  pushd "$PYTORCH_ROOT"
-
-  #TODO: run_tests.sh and check_binary.sh should be moved to pytorch/pytorch project
-  LD_LIBRARY_PATH=/usr/local/nvidia/lib64 \
-          "/builder/run_tests.sh" manywheel "${py_majmin}" "$DESIRED_CUDA"
-  popd
-  echo "$(date) :: Finished tests"
-fi
--- a/.ci/manywheel/build_cpu.sh
+++ b/.ci/manywheel/build_cpu.sh
@ -1,99 +0,0 @@
-#!/usr/bin/env bash
-
-set -ex
-
-GPU_ARCH_TYPE=${GPU_ARCH_TYPE:-cpu}
-
-export TH_BINARY_BUILD=1
-export USE_CUDA=0
-
-# Keep an array of cmake variables to add to
-if [[ -z "$CMAKE_ARGS" ]]; then
-    # These are passed to tools/build_pytorch_libs.sh::build()
-    CMAKE_ARGS=()
-fi
-if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then
-    # These are passed to tools/build_pytorch_libs.sh::build_caffe2()
-    EXTRA_CAFFE2_CMAKE_FLAGS=()
-fi
-
-DIR_SUFFIX=cpu
-if [[ "$GPU_ARCH_TYPE" == "xpu" ]]; then
-    DIR_SUFFIX=xpu
-    # Refer https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-5.html
-    source /opt/intel/oneapi/pytorch-gpu-dev-0.5/oneapi-vars.sh
-    source /opt/intel/oneapi/pti/latest/env/vars.sh
-    export USE_STATIC_MKL=1
-fi
-
-WHEELHOUSE_DIR="wheelhouse$DIR_SUFFIX"
-LIBTORCH_HOUSE_DIR="libtorch_house$DIR_SUFFIX"
-if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
-    if [[ -z "$BUILD_PYTHONLESS" ]]; then
-        PYTORCH_FINAL_PACKAGE_DIR="/remote/wheelhouse$DIR_SUFFIX"
-    else
-        PYTORCH_FINAL_PACKAGE_DIR="/remote/libtorch_house$DIR_SUFFIX"
-    fi
-fi
-mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
-
-OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release)
-if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
-    LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
-elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then
-    LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
-elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then
-    LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
-elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
-    if [[ "$(uname -m)" == "s390x" ]]; then
-        LIBGOMP_PATH="/usr/lib/s390x-linux-gnu/libgomp.so.1"
-    else
-        LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1"
-    fi
-fi
-
-DEPS_LIST=(
-    "$LIBGOMP_PATH"
-)
-
-DEPS_SONAME=(
-    "libgomp.so.1"
-)
-
-if [[ "$GPU_ARCH_TYPE" == "xpu" ]]; then
-    echo "Bundling with xpu support package libs."
-    DEPS_LIST+=(
-        "/opt/intel/oneapi/compiler/latest/lib/libsycl-preview.so.7"
-        "/opt/intel/oneapi/compiler/latest/lib/libOpenCL.so.1"
-        "/opt/intel/oneapi/compiler/latest/lib/libxptifw.so"
-        "/opt/intel/oneapi/compiler/latest/lib/libsvml.so"
-        "/opt/intel/oneapi/compiler/latest/lib/libirng.so"
-        "/opt/intel/oneapi/compiler/latest/lib/libimf.so"
-        "/opt/intel/oneapi/compiler/latest/lib/libintlc.so.5"
-        "/opt/intel/oneapi/compiler/latest/lib/libpi_level_zero.so"
-        "/opt/intel/oneapi/pti/latest/lib/libpti_view.so.0.9"
-        "/opt/intel/oneapi/pti/latest/lib/libpti.so.0.9"
-    )
-    DEPS_SONAME+=(
-        "libsycl-preview.so.7"
-        "libOpenCL.so.1"
-        "libxptifw.so"
-        "libsvml.so"
-        "libirng.so"
-        "libimf.so"
-        "libintlc.so.5"
-        "libpi_level_zero.so"
-        "libpti_view.so.0.9"
-        "libpti.so.0.9"
-    )
-fi
-
-rm -rf /usr/local/cuda*
-
-SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
-if [[ -z "$BUILD_PYTHONLESS" ]]; then
-    BUILD_SCRIPT=build_common.sh
-else
-    BUILD_SCRIPT=build_libtorch.sh
-fi
-source ${SOURCE_DIR}/${BUILD_SCRIPT}
--- a/.ci/manywheel/build_cuda.sh
+++ b/.ci/manywheel/build_cuda.sh
@ -1,290 +0,0 @@
-#!/usr/bin/env bash
-
-set -ex
-
-SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P ))"
-
-export TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
-export NCCL_ROOT_DIR=/usr/local/cuda
-export TH_BINARY_BUILD=1
-export USE_STATIC_CUDNN=1
-export USE_STATIC_NCCL=1
-export ATEN_STATIC_CUDA=1
-export USE_CUDA_STATIC_LINK=1
-export INSTALL_TEST=0 # dont install test binaries into site-packages
-export USE_CUPTI_SO=0
-export USE_CUSPARSELT=${USE_CUSPARSELT:-1} # Enable if not disabled by libtorch build
-
-# Keep an array of cmake variables to add to
-if [[ -z "$CMAKE_ARGS" ]]; then
-    # These are passed to tools/build_pytorch_libs.sh::build()
-    CMAKE_ARGS=()
-fi
-if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then
-    # These are passed to tools/build_pytorch_libs.sh::build_caffe2()
-    EXTRA_CAFFE2_CMAKE_FLAGS=()
-fi
-
-# Determine CUDA version and architectures to build for
-#
-# NOTE: We should first check `DESIRED_CUDA` when determining `CUDA_VERSION`,
-# because in some cases a single Docker image can have multiple CUDA versions
-# on it, and `nvcc --version` might not show the CUDA version we want.
-if [[ -n "$DESIRED_CUDA" ]]; then
-    # If the DESIRED_CUDA already matches the format that we expect
-    if [[ ${DESIRED_CUDA} =~ ^[0-9]+\.[0-9]+$ ]]; then
-        CUDA_VERSION=${DESIRED_CUDA}
-    else
-        # cu90, cu92, cu100, cu101
-        if [[ ${#DESIRED_CUDA} -eq 4 ]]; then
-            CUDA_VERSION="${DESIRED_CUDA:2:1}.${DESIRED_CUDA:3:1}"
-        elif [[ ${#DESIRED_CUDA} -eq 5 ]]; then
-            CUDA_VERSION="${DESIRED_CUDA:2:2}.${DESIRED_CUDA:4:1}"
-        fi
-    fi
-    echo "Using CUDA $CUDA_VERSION as determined by DESIRED_CUDA"
-
-    # There really has to be a better way to do this - eli
-    # Possibly limiting builds to specific cuda versions be delimiting images would be a choice
-    if [[ "$OS_NAME" == *"Ubuntu"* ]]; then
-        echo "Switching to CUDA version ${DESIRED_CUDA}"
-        /builder/conda/switch_cuda_version.sh "${DESIRED_CUDA}"
-    fi
-else
-    CUDA_VERSION=$(nvcc --version|grep release|cut -f5 -d" "|cut -f1 -d",")
-    echo "CUDA $CUDA_VERSION Detected"
-fi
-
-cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.')
-
-TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6"
-case ${CUDA_VERSION} in
-    12.4)
-        if [[ "$GPU_ARCH_TYPE" = "cuda-aarch64" ]]; then
-            TORCH_CUDA_ARCH_LIST="9.0"
-        else
-            TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0+PTX"
-        fi
-        EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
-        ;;
-    12.1)
-        TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0"
-        EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
-        ;;
-    11.8)
-        TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};3.7;9.0"
-        EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
-        ;;
-    11.[67])
-        TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};3.7"
-        EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
-        ;;
-    *)
-        echo "unknown cuda version $CUDA_VERSION"
-        exit 1
-        ;;
-esac
-
-export TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}
-echo "${TORCH_CUDA_ARCH_LIST}"
-
-# Package directories
-WHEELHOUSE_DIR="wheelhouse$cuda_version_nodot"
-LIBTORCH_HOUSE_DIR="libtorch_house$cuda_version_nodot"
-if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
-    if [[ -z "$BUILD_PYTHONLESS" ]]; then
-        PYTORCH_FINAL_PACKAGE_DIR="/remote/wheelhouse$cuda_version_nodot"
-    else
-        PYTORCH_FINAL_PACKAGE_DIR="/remote/libtorch_house$cuda_version_nodot"
-    fi
-fi
-mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
-
-OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release)
-if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
-    LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
-elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then
-    LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
-elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then
-    LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
-elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
-    LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1"
-fi
-
-DEPS_LIST=(
-    "$LIBGOMP_PATH"
-)
-DEPS_SONAME=(
-    "libgomp.so.1"
-)
-
-if [[ $USE_CUSPARSELT == "1" ]]; then
-        DEPS_SONAME+=(
-            "libcusparseLt.so.0"
-        )
-        DEPS_LIST+=(
-            "/usr/local/cuda/lib64/libcusparseLt.so.0"
-        )
-fi
-
-if [[ $CUDA_VERSION == "12.1" || $CUDA_VERSION == "12.4" ]]; then
-    export USE_STATIC_CUDNN=0
-    # Try parallelizing nvcc as well
-    export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
-
-    if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
-        echo "Bundling with cudnn and cublas."
-        DEPS_LIST+=(
-            "/usr/local/cuda/lib64/libcudnn_adv.so.9"
-            "/usr/local/cuda/lib64/libcudnn_cnn.so.9"
-            "/usr/local/cuda/lib64/libcudnn_graph.so.9"
-            "/usr/local/cuda/lib64/libcudnn_ops.so.9"
-            "/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9"
-            "/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9"
-            "/usr/local/cuda/lib64/libcudnn_heuristic.so.9"
-            "/usr/local/cuda/lib64/libcudnn.so.9"
-            "/usr/local/cuda/lib64/libcublas.so.12"
-            "/usr/local/cuda/lib64/libcublasLt.so.12"
-            "/usr/local/cuda/lib64/libcudart.so.12"
-            "/usr/local/cuda/lib64/libnvToolsExt.so.1"
-            "/usr/local/cuda/lib64/libnvrtc.so.12"
-            "/usr/local/cuda/lib64/libnvrtc-builtins.so"
-        )
-        DEPS_SONAME+=(
-            "libcudnn_adv.so.9"
-            "libcudnn_cnn.so.9"
-            "libcudnn_graph.so.9"
-            "libcudnn_ops.so.9"
-            "libcudnn_engines_runtime_compiled.so.9"
-            "libcudnn_engines_precompiled.so.9"
-            "libcudnn_heuristic.so.9"
-            "libcudnn.so.9"
-            "libcublas.so.12"
-            "libcublasLt.so.12"
-            "libcudart.so.12"
-            "libnvToolsExt.so.1"
-            "libnvrtc.so.12"
-            "libnvrtc-builtins.so"
-        )
-    else
-        echo "Using nvidia libs from pypi."
-        CUDA_RPATHS=(
-            '$ORIGIN/../../nvidia/cublas/lib'
-            '$ORIGIN/../../nvidia/cuda_cupti/lib'
-            '$ORIGIN/../../nvidia/cuda_nvrtc/lib'
-            '$ORIGIN/../../nvidia/cuda_runtime/lib'
-            '$ORIGIN/../../nvidia/cudnn/lib'
-            '$ORIGIN/../../nvidia/cufft/lib'
-            '$ORIGIN/../../nvidia/curand/lib'
-            '$ORIGIN/../../nvidia/cusolver/lib'
-            '$ORIGIN/../../nvidia/cusparse/lib'
-            '$ORIGIN/../../nvidia/nccl/lib'
-            '$ORIGIN/../../nvidia/nvtx/lib'
-        )
-        CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}")
-        export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib'
-        export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
-        export FORCE_RPATH="--force-rpath"
-        export USE_STATIC_NCCL=0
-        export USE_SYSTEM_NCCL=1
-        export ATEN_STATIC_CUDA=0
-        export USE_CUDA_STATIC_LINK=0
-        export USE_CUPTI_SO=1
-        export NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
-        export NCCL_LIB_DIR="/usr/local/cuda/lib64/"
-    fi
-elif [[ $CUDA_VERSION == "11.8" ]]; then
-    export USE_STATIC_CUDNN=0
-    # Try parallelizing nvcc as well
-    export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
-    # Bundle ptxas into the wheel, see https://github.com/pytorch/pytorch/pull/119750
-    export BUILD_BUNDLE_PTXAS=1
-
-    if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
-        echo "Bundling with cudnn and cublas."
-        DEPS_LIST+=(
-            "/usr/local/cuda/lib64/libcudnn_adv.so.9"
-            "/usr/local/cuda/lib64/libcudnn_cnn.so.9"
-            "/usr/local/cuda/lib64/libcudnn_graph.so.9"
-            "/usr/local/cuda/lib64/libcudnn_ops.so.9"
-            "/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9"
-            "/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9"
-            "/usr/local/cuda/lib64/libcudnn_heuristic.so.9"
-            "/usr/local/cuda/lib64/libcudnn.so.9"
-            "/usr/local/cuda/lib64/libcublas.so.11"
-            "/usr/local/cuda/lib64/libcublasLt.so.11"
-            "/usr/local/cuda/lib64/libcudart.so.11.0"
-            "/usr/local/cuda/lib64/libnvToolsExt.so.1"
-            "/usr/local/cuda/lib64/libnvrtc.so.11.2"    # this is not a mistake, it links to more specific cuda version
-            "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.8"
-        )
-        DEPS_SONAME+=(
-            "libcudnn_adv.so.9"
-            "libcudnn_cnn.so.9"
-            "libcudnn_graph.so.9"
-            "libcudnn_ops.so.9"
-            "libcudnn_engines_runtime_compiled.so.9"
-            "libcudnn_engines_precompiled.so.9"
-            "libcudnn_heuristic.so.9"
-            "libcudnn.so.9"
-            "libcublas.so.11"
-            "libcublasLt.so.11"
-            "libcudart.so.11.0"
-            "libnvToolsExt.so.1"
-            "libnvrtc.so.11.2"
-            "libnvrtc-builtins.so.11.8"
-        )
-    else
-        echo "Using nvidia libs from pypi."
-        CUDA_RPATHS=(
-            '$ORIGIN/../../nvidia/cublas/lib'
-            '$ORIGIN/../../nvidia/cuda_cupti/lib'
-            '$ORIGIN/../../nvidia/cuda_nvrtc/lib'
-            '$ORIGIN/../../nvidia/cuda_runtime/lib'
-            '$ORIGIN/../../nvidia/cudnn/lib'
-            '$ORIGIN/../../nvidia/cufft/lib'
-            '$ORIGIN/../../nvidia/curand/lib'
-            '$ORIGIN/../../nvidia/cusolver/lib'
-            '$ORIGIN/../../nvidia/cusparse/lib'
-            '$ORIGIN/../../nvidia/nccl/lib'
-            '$ORIGIN/../../nvidia/nvtx/lib'
-        )
-        CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}")
-        export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib'
-        export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
-        export FORCE_RPATH="--force-rpath"
-        export USE_STATIC_NCCL=0
-        export USE_SYSTEM_NCCL=1
-        export ATEN_STATIC_CUDA=0
-        export USE_CUDA_STATIC_LINK=0
-        export USE_CUPTI_SO=1
-        export NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
-        export NCCL_LIB_DIR="/usr/local/cuda/lib64/"
-    fi
-else
-    echo "Unknown cuda version $CUDA_VERSION"
-    exit 1
-fi
-
-# builder/test.sh requires DESIRED_CUDA to know what tests to exclude
-export DESIRED_CUDA="$cuda_version_nodot"
-
-# Switch `/usr/local/cuda` to the desired CUDA version
-rm -rf /usr/local/cuda || true
-ln -s "/usr/local/cuda-${CUDA_VERSION}" /usr/local/cuda
-
-# Switch `/usr/local/magma` to the desired CUDA version
-rm -rf /usr/local/magma || true
-ln -s /usr/local/cuda-${CUDA_VERSION}/magma /usr/local/magma
-
-export CUDA_VERSION=$(ls /usr/local/cuda/lib64/libcudart.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev) # 10.0.130
-export CUDA_VERSION_SHORT=$(ls /usr/local/cuda/lib64/libcudart.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev | cut -f1,2 -d".") # 10.0
-export CUDNN_VERSION=$(ls /usr/local/cuda/lib64/libcudnn.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev)
-
-SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
-if [[ -z "$BUILD_PYTHONLESS" ]]; then
-    BUILD_SCRIPT=build_common.sh
-else
-    BUILD_SCRIPT=build_libtorch.sh
-fi
-source $SCRIPTPATH/${BUILD_SCRIPT}
--- a/.ci/manywheel/build_libtorch.sh
+++ b/.ci/manywheel/build_libtorch.sh
@ -1,353 +0,0 @@
-#!/usr/bin/env bash
-# meant to be called only from the neighboring build.sh and build_cpu.sh scripts
-
-set -e pipefail
-SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
-
-# Require only one python installation
-if [[ -z "$DESIRED_PYTHON" ]]; then
-    echo "Need to set DESIRED_PYTHON env variable"
-    exit 1
-fi
-if [[ -n "$BUILD_PYTHONLESS" && -z "$LIBTORCH_VARIANT" ]]; then
-    echo "BUILD_PYTHONLESS is set, so need LIBTORCH_VARIANT to also be set"
-    echo "LIBTORCH_VARIANT should be one of shared-with-deps shared-without-deps static-with-deps static-without-deps"
-    exit 1
-fi
-
-# Function to retry functions that sometimes timeout or have flaky failures
-retry () {
-    $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
-}
-
-# TODO move this into the Docker images
-OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release`
-if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
-    retry yum install -q -y zip openssl
-elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then
-    retry yum install -q -y zip openssl
-elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then
-    retry dnf install -q -y zip openssl
-elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
-    # TODO: Remove this once nvidia package repos are back online
-    # Comment out nvidia repositories to prevent them from getting apt-get updated, see https://github.com/pytorch/pytorch/issues/74968
-    # shellcheck disable=SC2046
-    sed -i 's/.*nvidia.*/# &/' $(find /etc/apt/ -type f -name "*.list")
-    retry apt-get update
-    retry apt-get -y install zip openssl
-fi
-
-# Version: setup.py uses $PYTORCH_BUILD_VERSION.post$PYTORCH_BUILD_NUMBER if
-# PYTORCH_BUILD_NUMBER > 1
-build_version="$PYTORCH_BUILD_VERSION"
-build_number="$PYTORCH_BUILD_NUMBER"
-if [[ -n "$OVERRIDE_PACKAGE_VERSION" ]]; then
-    # This will be the *exact* version, since build_number<1
-    build_version="$OVERRIDE_PACKAGE_VERSION"
-    build_number=0
-fi
-if [[ -z "$build_version" ]]; then
-    build_version=1.0.0
-fi
-if [[ -z "$build_number" ]]; then
-    build_number=1
-fi
-export PYTORCH_BUILD_VERSION=$build_version
-export PYTORCH_BUILD_NUMBER=$build_number
-
-export CMAKE_LIBRARY_PATH="/opt/intel/lib:/lib:$CMAKE_LIBRARY_PATH"
-export CMAKE_INCLUDE_PATH="/opt/intel/include:$CMAKE_INCLUDE_PATH"
-
-# set OPENSSL_ROOT_DIR=/opt/openssl if it exists
-if [[ -e /opt/openssl ]]; then
-    export OPENSSL_ROOT_DIR=/opt/openssl
-    export CMAKE_INCLUDE_PATH="/opt/openssl/include":$CMAKE_INCLUDE_PATH
-fi
-
-# If given a python version like 3.6m or 2.7mu, convert this to the format we
-# expect. The binary CI jobs pass in python versions like this; they also only
-# ever pass one python version, so we assume that DESIRED_PYTHON is not a list
-# in this case
-if [[ -n "$DESIRED_PYTHON" && "$DESIRED_PYTHON" != cp* ]]; then
-    python_nodot="$(echo $DESIRED_PYTHON | tr -d m.u)"
-    DESIRED_PYTHON="cp${python_nodot}-cp${python_nodot}"
-fi
-pydir="/opt/python/$DESIRED_PYTHON"
-export PATH="$pydir/bin:$PATH"
-
-export PATCHELF_BIN=/usr/local/bin/patchelf
-patchelf_version=`$PATCHELF_BIN --version`
-echo "patchelf version: " $patchelf_version
-if [[ "$patchelf_version" == "patchelf 0.9" ]]; then
-    echo "Your patchelf version is too old. Please use version >= 0.10."
-    exit 1
-fi
-
-########################################################
-# Compile wheels as well as libtorch
-#######################################################
-if [[ -z "$PYTORCH_ROOT" ]]; then
-    echo "Need to set PYTORCH_ROOT env variable"
-    exit 1
-fi
-pushd "$PYTORCH_ROOT"
-python setup.py clean
-retry pip install -qr requirements.txt
-retry pip install -q numpy==2.0.1
-
-if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
-    export _GLIBCXX_USE_CXX11_ABI=1
-else
-    export _GLIBCXX_USE_CXX11_ABI=0
-fi
-
-if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
-    echo "Calling build_amd.py at $(date)"
-    python tools/amd_build/build_amd.py
-    # TODO remove this work-around once pytorch sources are updated
-    export ROCclr_DIR=/opt/rocm/rocclr/lib/cmake/rocclr
-fi
-
-echo "Calling setup.py install at $(date)"
-
-if [[ $LIBTORCH_VARIANT = *"static"* ]]; then
-    STATIC_CMAKE_FLAG="-DTORCH_STATIC=1"
-fi
-
-(
-    set -x
-
-    mkdir -p build
-
-    time CMAKE_ARGS=${CMAKE_ARGS[@]} \
-        EXTRA_CAFFE2_CMAKE_FLAGS="${EXTRA_CAFFE2_CMAKE_FLAGS[@]} $STATIC_CMAKE_FLAG" \
-        # TODO: Remove this flag once https://github.com/pytorch/pytorch/issues/55952 is closed
-        CFLAGS='-Wno-deprecated-declarations' \
-        BUILD_LIBTORCH_CPU_WITH_DEBUG=1 \
-        python setup.py install
-
-    mkdir -p libtorch/{lib,bin,include,share}
-
-    # Make debug folder separate so it doesn't get zipped up with the rest of
-    # libtorch
-    mkdir debug
-
-    # Copy over all lib files
-    cp -rv build/lib/*                libtorch/lib/
-    cp -rv build/lib*/torch/lib/*     libtorch/lib/
-
-    # Copy over all include files
-    cp -rv build/include/*            libtorch/include/
-    cp -rv build/lib*/torch/include/* libtorch/include/
-
-    # Copy over all of the cmake files
-    cp -rv build/lib*/torch/share/*   libtorch/share/
-
-    # Split libtorch into debug / release version
-    cp libtorch/lib/libtorch_cpu.so libtorch/lib/libtorch_cpu.so.dbg
-
-    # Keep debug symbols on debug lib
-    strip --only-keep-debug libtorch/lib/libtorch_cpu.so.dbg
-
-    # Remove debug info from release lib
-    strip --strip-debug libtorch/lib/libtorch_cpu.so
-
-    # Add a debug link to the release lib to the debug lib (debuggers will then
-    # search for symbols in a file called libtorch_cpu.so.dbg in some
-    # predetermined locations) and embed a CRC32 of the debug library into the .so
-    cd libtorch/lib
-
-    objcopy libtorch_cpu.so --add-gnu-debuglink=libtorch_cpu.so.dbg
-    cd ../..
-
-    # Move the debug symbols to its own directory so it doesn't get processed /
-    # zipped with all the other libraries
-    mv libtorch/lib/libtorch_cpu.so.dbg debug/libtorch_cpu.so.dbg
-
-    echo "${PYTORCH_BUILD_VERSION}" > libtorch/build-version
-    echo "$(pushd $PYTORCH_ROOT && git rev-parse HEAD)" > libtorch/build-hash
-
-)
-
-if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
-    LIBTORCH_ABI="cxx11-abi-"
-else
-    LIBTORCH_ABI=
-fi
-
-(
-    set -x
-
-    mkdir -p /tmp/$LIBTORCH_HOUSE_DIR
-
-    # objcopy installs a CRC32 into libtorch_cpu above so, so add that to the name here
-    CRC32=$(objcopy --dump-section .gnu_debuglink=>(tail -c4 | od -t x4 -An | xargs echo) libtorch/lib/libtorch_cpu.so)
-
-    # Zip debug symbols
-    zip /tmp/$LIBTORCH_HOUSE_DIR/debug-libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION-$CRC32.zip debug/libtorch_cpu.so.dbg
-
-    # Zip and copy libtorch
-    zip -rq /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip libtorch
-    cp /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip \
-       /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-latest.zip
-)
-
-
-popd
-
-#######################################################################
-# ADD DEPENDENCIES INTO THE WHEEL
-#
-# auditwheel repair doesn't work correctly and is buggy
-# so manually do the work of copying dependency libs and patchelfing
-# and fixing RECORDS entries correctly
-######################################################################
-
-fname_with_sha256() {
-    HASH=$(sha256sum $1 | cut -c1-8)
-    DIRNAME=$(dirname $1)
-    BASENAME=$(basename $1)
-    if [[ $BASENAME == "libnvrtc-builtins.so" || $BASENAME == "libcudnn"* ]]; then
-        echo $1
-    else
-        INITNAME=$(echo $BASENAME | cut -f1 -d".")
-        ENDNAME=$(echo $BASENAME | cut -f 2- -d".")
-        echo "$DIRNAME/$INITNAME-$HASH.$ENDNAME"
-    fi
-}
-
-fname_without_so_number() {
-    LINKNAME=$(echo $1 | sed -e 's/\.so.*/.so/g')
-    echo "$LINKNAME"
-}
-
-make_wheel_record() {
-    FPATH=$1
-    if echo $FPATH | grep RECORD >/dev/null 2>&1; then
-        # if the RECORD file, then
-        echo "$FPATH,,"
-    else
-        HASH=$(openssl dgst -sha256 -binary $FPATH | openssl base64 | sed -e 's/+/-/g' | sed -e 's/\//_/g' | sed -e 's/=//g')
-        FSIZE=$(ls -nl $FPATH | awk '{print $5}')
-        echo "$FPATH,sha256=$HASH,$FSIZE"
-    fi
-}
-
-echo 'Built this package:'
-(
-    set -x
-    mkdir -p /$LIBTORCH_HOUSE_DIR
-    mv /tmp/$LIBTORCH_HOUSE_DIR/*.zip /$LIBTORCH_HOUSE_DIR
-    rm -rf /tmp/$LIBTORCH_HOUSE_DIR
-)
-TMP_DIR=$(mktemp -d)
-trap "rm -rf ${TMP_DIR}" EXIT
-pushd "${TMP_DIR}"
-
-for pkg in /$LIBTORCH_HOUSE_DIR/libtorch*.zip; do
-
-    # if the glob didn't match anything
-    if [[ ! -e $pkg ]]; then
-        continue
-    fi
-
-    rm -rf tmp
-    mkdir -p tmp
-    cd tmp
-    cp $pkg .
-
-    unzip -q $(basename $pkg)
-    rm -f $(basename $pkg)
-
-    PREFIX=libtorch
-
-    if [[ $pkg != *"without-deps"* ]]; then
-        # copy over needed dependent .so files over and tag them with their hash
-        patched=()
-        for filepath in "${DEPS_LIST[@]}"; do
-            filename=$(basename $filepath)
-            destpath=$PREFIX/lib/$filename
-            if [[ "$filepath" != "$destpath" ]]; then
-                cp $filepath $destpath
-            fi
-
-            if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
-                patchedpath=$(fname_without_so_number $destpath)
-            else
-                patchedpath=$(fname_with_sha256 $destpath)
-            fi
-            patchedname=$(basename $patchedpath)
-            if [[ "$destpath" != "$patchedpath" ]]; then
-                mv $destpath $patchedpath
-            fi
-            patched+=("$patchedname")
-            echo "Copied $filepath to $patchedpath"
-        done
-
-        echo "patching to fix the so names to the hashed names"
-        for ((i=0;i<${#DEPS_LIST[@]};++i)); do
-            find $PREFIX -name '*.so*' | while read sofile; do
-                origname=${DEPS_SONAME[i]}
-                patchedname=${patched[i]}
-                if [[ "$origname" != "$patchedname" ]] || [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
-                    set +e
-                    origname=$($PATCHELF_BIN --print-needed $sofile | grep "$origname.*")
-                    ERRCODE=$?
-                    set -e
-                    if [ "$ERRCODE" -eq "0" ]; then
-                        echo "patching $sofile entry $origname to $patchedname"
-                        $PATCHELF_BIN --replace-needed $origname $patchedname $sofile
-                    fi
-                fi
-            done
-        done
-
-        # copy over needed auxiliary files
-        for ((i=0;i<${#DEPS_AUX_SRCLIST[@]};++i)); do
-            srcpath=${DEPS_AUX_SRCLIST[i]}
-            dstpath=$PREFIX/${DEPS_AUX_DSTLIST[i]}
-            mkdir -p $(dirname $dstpath)
-            cp $srcpath $dstpath
-        done
-    fi
-
-    # set RPATH of _C.so and similar to $ORIGIN, $ORIGIN/lib
-    find $PREFIX -maxdepth 1 -type f -name "*.so*" | while read sofile; do
-        echo "Setting rpath of $sofile to " '$ORIGIN:$ORIGIN/lib'
-        $PATCHELF_BIN --set-rpath '$ORIGIN:$ORIGIN/lib' $sofile
-        $PATCHELF_BIN --print-rpath $sofile
-    done
-
-    # set RPATH of lib/ files to $ORIGIN
-    find $PREFIX/lib -maxdepth 1 -type f -name "*.so*" | while read sofile; do
-        echo "Setting rpath of $sofile to " '$ORIGIN'
-        $PATCHELF_BIN --set-rpath '$ORIGIN' $sofile
-        $PATCHELF_BIN --print-rpath $sofile
-    done
-
-    # regenerate the RECORD file with new hashes
-    record_file=`echo $(basename $pkg) | sed -e 's/-cp.*$/.dist-info\/RECORD/g'`
-    if [[ -e $record_file ]]; then
-        echo "Generating new record file $record_file"
-        rm -f $record_file
-        # generate records for folders in wheel
-        find * -type f | while read fname; do
-            echo $(make_wheel_record $fname) >>$record_file
-        done
-    fi
-
-    # zip up the wheel back
-    zip -rq $(basename $pkg) $PREFIX*
-
-    # replace original wheel
-    rm -f $pkg
-    mv $(basename $pkg) $pkg
-    cd ..
-    rm -rf tmp
-done
-
-# Copy wheels to host machine for persistence before testing
-if [[ -n "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
-    cp /$LIBTORCH_HOUSE_DIR/libtorch*.zip "$PYTORCH_FINAL_PACKAGE_DIR"
-    cp /$LIBTORCH_HOUSE_DIR/debug-libtorch*.zip "$PYTORCH_FINAL_PACKAGE_DIR"
-fi
--- a/.ci/manywheel/build_rocm.sh
+++ b/.ci/manywheel/build_rocm.sh
@ -1,263 +0,0 @@
-#!/usr/bin/env bash
-
-set -ex
-
-export ROCM_HOME=/opt/rocm
-export MAGMA_HOME=$ROCM_HOME/magma
-# TODO: libtorch_cpu.so is broken when building with Debug info
-export BUILD_DEBUG_INFO=0
-
-# TODO Are these all used/needed?
-export TH_BINARY_BUILD=1
-export USE_STATIC_CUDNN=1
-export USE_STATIC_NCCL=1
-export ATEN_STATIC_CUDA=1
-export USE_CUDA_STATIC_LINK=1
-export INSTALL_TEST=0 # dont install test binaries into site-packages
-# Set RPATH instead of RUNPATH when using patchelf to avoid LD_LIBRARY_PATH override
-export FORCE_RPATH="--force-rpath"
-
-# Keep an array of cmake variables to add to
-if [[ -z "$CMAKE_ARGS" ]]; then
-    # These are passed to tools/build_pytorch_libs.sh::build()
-    CMAKE_ARGS=()
-fi
-if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then
-    # These are passed to tools/build_pytorch_libs.sh::build_caffe2()
-    EXTRA_CAFFE2_CMAKE_FLAGS=()
-fi
-
-# Determine ROCm version and architectures to build for
-#
-# NOTE: We should first check `DESIRED_CUDA` when determining `ROCM_VERSION`
-if [[ -n "$DESIRED_CUDA" ]]; then
-    if ! echo "${DESIRED_CUDA}"| grep "^rocm" >/dev/null 2>/dev/null; then
-        export DESIRED_CUDA="rocm${DESIRED_CUDA}"
-    fi
-    # rocm3.7, rocm3.5.1
-    ROCM_VERSION="$DESIRED_CUDA"
-    echo "Using $ROCM_VERSION as determined by DESIRED_CUDA"
-else
-    echo "Must set DESIRED_CUDA"
-    exit 1
-fi
-
-# Package directories
-WHEELHOUSE_DIR="wheelhouse$ROCM_VERSION"
-LIBTORCH_HOUSE_DIR="libtorch_house$ROCM_VERSION"
-if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
-    if [[ -z "$BUILD_PYTHONLESS" ]]; then
-        PYTORCH_FINAL_PACKAGE_DIR="/remote/wheelhouse$ROCM_VERSION"
-    else
-        PYTORCH_FINAL_PACKAGE_DIR="/remote/libtorch_house$ROCM_VERSION"
-    fi
-fi
-mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
-
-# To make version comparison easier, create an integer representation.
-ROCM_VERSION_CLEAN=$(echo ${ROCM_VERSION} | sed s/rocm//)
-save_IFS="$IFS"
-IFS=. ROCM_VERSION_ARRAY=(${ROCM_VERSION_CLEAN})
-IFS="$save_IFS"
-if [[ ${#ROCM_VERSION_ARRAY[@]} == 2 ]]; then
-    ROCM_VERSION_MAJOR=${ROCM_VERSION_ARRAY[0]}
-    ROCM_VERSION_MINOR=${ROCM_VERSION_ARRAY[1]}
-    ROCM_VERSION_PATCH=0
-elif [[ ${#ROCM_VERSION_ARRAY[@]} == 3 ]]; then
-    ROCM_VERSION_MAJOR=${ROCM_VERSION_ARRAY[0]}
-    ROCM_VERSION_MINOR=${ROCM_VERSION_ARRAY[1]}
-    ROCM_VERSION_PATCH=${ROCM_VERSION_ARRAY[2]}
-else
-    echo "Unhandled ROCM_VERSION ${ROCM_VERSION}"
-    exit 1
-fi
-ROCM_INT=$(($ROCM_VERSION_MAJOR * 10000 + $ROCM_VERSION_MINOR * 100 + $ROCM_VERSION_PATCH))
-
-# Required ROCm libraries
-ROCM_SO_FILES=(
-    "libMIOpen.so"
-    "libamdhip64.so"
-    "libhipblas.so"
-    "libhipfft.so"
-    "libhiprand.so"
-    "libhipsolver.so"
-    "libhipsparse.so"
-    "libhsa-runtime64.so"
-    "libamd_comgr.so"
-    "libmagma.so"
-    "librccl.so"
-    "librocblas.so"
-    "librocfft.so"
-    "librocm_smi64.so"
-    "librocrand.so"
-    "librocsolver.so"
-    "librocsparse.so"
-    "libroctracer64.so"
-    "libroctx64.so"
-    "libhipblaslt.so"
-    "libhiprtc.so"
-)
-
-if [[ $ROCM_INT -ge 60100 ]]; then
-    ROCM_SO_FILES+=("librocprofiler-register.so")
-fi
-
-if [[ $ROCM_INT -ge 60200 ]]; then
-    ROCM_SO_FILES+=("librocm-core.so")
-fi
-
-OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release`
-if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
-    LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
-    LIBNUMA_PATH="/usr/lib64/libnuma.so.1"
-    LIBELF_PATH="/usr/lib64/libelf.so.1"
-    LIBTINFO_PATH="/usr/lib64/libtinfo.so.5"
-    LIBDRM_PATH="/opt/amdgpu/lib64/libdrm.so.2"
-    LIBDRM_AMDGPU_PATH="/opt/amdgpu/lib64/libdrm_amdgpu.so.1"
-    if [[ $ROCM_INT -ge 60100 ]]; then
-        # Below libs are direct dependencies of libhipsolver
-        LIBSUITESPARSE_CONFIG_PATH="/lib64/libsuitesparseconfig.so.4"
-        LIBCHOLMOD_PATH="/lib64/libcholmod.so.2"
-        # Below libs are direct dependencies of libcholmod
-        LIBAMD_PATH="/lib64/libamd.so.2"
-        LIBCAMD_PATH="/lib64/libcamd.so.2"
-        LIBCCOLAMD_PATH="/lib64/libccolamd.so.2"
-        LIBCOLAMD_PATH="/lib64/libcolamd.so.2"
-        LIBSATLAS_PATH="/lib64/atlas/libsatlas.so.3"
-        # Below libs are direct dependencies of libsatlas
-        LIBGFORTRAN_PATH="/lib64/libgfortran.so.3"
-        LIBQUADMATH_PATH="/lib64/libquadmath.so.0"
-    fi
-    MAYBE_LIB64=lib64
-elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
-    LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1"
-    LIBNUMA_PATH="/usr/lib/x86_64-linux-gnu/libnuma.so.1"
-    LIBELF_PATH="/usr/lib/x86_64-linux-gnu/libelf.so.1"
-    if [[ $ROCM_INT -ge 50300 ]]; then
-        LIBTINFO_PATH="/lib/x86_64-linux-gnu/libtinfo.so.6"
-    else
-        LIBTINFO_PATH="/lib/x86_64-linux-gnu/libtinfo.so.5"
-    fi
-    LIBDRM_PATH="/usr/lib/x86_64-linux-gnu/libdrm.so.2"
-    LIBDRM_AMDGPU_PATH="/usr/lib/x86_64-linux-gnu/libdrm_amdgpu.so.1"
-    if [[ $ROCM_INT -ge 60100 ]]; then
-        # Below libs are direct dependencies of libhipsolver
-        LIBCHOLMOD_PATH="/lib/x86_64-linux-gnu/libcholmod.so.3"
-        # Below libs are direct dependencies of libcholmod
-        LIBSUITESPARSE_CONFIG_PATH="/lib/x86_64-linux-gnu/libsuitesparseconfig.so.5"
-        LIBAMD_PATH="/lib/x86_64-linux-gnu/libamd.so.2"
-        LIBCAMD_PATH="/lib/x86_64-linux-gnu/libcamd.so.2"
-        LIBCCOLAMD_PATH="/lib/x86_64-linux-gnu/libccolamd.so.2"
-        LIBCOLAMD_PATH="/lib/x86_64-linux-gnu/libcolamd.so.2"
-        LIBMETIS_PATH="/lib/x86_64-linux-gnu/libmetis.so.5"
-        LIBLAPACK_PATH="/lib/x86_64-linux-gnu/liblapack.so.3"
-        LIBBLAS_PATH="/lib/x86_64-linux-gnu/libblas.so.3"
-        # Below libs are direct dependencies of libblas
-        LIBGFORTRAN_PATH="/lib/x86_64-linux-gnu/libgfortran.so.5"
-        LIBQUADMATH_PATH="/lib/x86_64-linux-gnu/libquadmath.so.0"
-    fi
-    MAYBE_LIB64=lib
-fi
-OS_SO_PATHS=($LIBGOMP_PATH $LIBNUMA_PATH\
-             $LIBELF_PATH $LIBTINFO_PATH\
-             $LIBDRM_PATH $LIBDRM_AMDGPU_PATH\
-             $LIBSUITESPARSE_CONFIG_PATH\
-             $LIBCHOLMOD_PATH $LIBAMD_PATH\
-             $LIBCAMD_PATH $LIBCCOLAMD_PATH\
-             $LIBCOLAMD_PATH $LIBSATLAS_PATH\
-             $LIBGFORTRAN_PATH $LIBQUADMATH_PATH\
-             $LIBMETIS_PATH $LIBLAPACK_PATH\
-             $LIBBLAS_PATH)
-OS_SO_FILES=()
-for lib in "${OS_SO_PATHS[@]}"
-do
-    file_name="${lib##*/}" # Substring removal of path to get filename
-    OS_SO_FILES[${#OS_SO_FILES[@]}]=$file_name # Append lib to array
-done
-
-# PyTorch-version specific
-# AOTriton dependency only for PyTorch >= 2.4
-if (( $(echo "${PYTORCH_VERSION} 2.4" | awk '{print ($1 >= $2)}') )); then
-    ROCM_SO_FILES+=("libaotriton_v2.so")
-fi
-
-# rocBLAS library files
-ROCBLAS_LIB_SRC=$ROCM_HOME/lib/rocblas/library
-ROCBLAS_LIB_DST=lib/rocblas/library
-ARCH=$(echo $PYTORCH_ROCM_ARCH | sed 's/;/|/g') # Replace ; seperated arch list to bar for grep
-ARCH_SPECIFIC_FILES=$(ls $ROCBLAS_LIB_SRC | grep -E $ARCH)
-OTHER_FILES=$(ls $ROCBLAS_LIB_SRC | grep -v gfx)
-ROCBLAS_LIB_FILES=($ARCH_SPECIFIC_FILES $OTHER_FILES)
-
-# hipblaslt library files
-HIPBLASLT_LIB_SRC=$ROCM_HOME/lib/hipblaslt/library
-HIPBLASLT_LIB_DST=lib/hipblaslt/library
-ARCH_SPECIFIC_FILES=$(ls $HIPBLASLT_LIB_SRC | grep -E $ARCH)
-OTHER_FILES=$(ls $HIPBLASLT_LIB_SRC | grep -v gfx)
-HIPBLASLT_LIB_FILES=($ARCH_SPECIFIC_FILES $OTHER_FILES)
-
-# ROCm library files
-ROCM_SO_PATHS=()
-for lib in "${ROCM_SO_FILES[@]}"
-do
-    file_path=($(find $ROCM_HOME/lib/ -name "$lib")) # First search in lib
-    if [[ -z $file_path ]]; then
-        if [ -d "$ROCM_HOME/lib64/" ]; then
-            file_path=($(find $ROCM_HOME/lib64/ -name "$lib")) # Then search in lib64
-        fi
-    fi
-    if [[ -z $file_path ]]; then
-        file_path=($(find $ROCM_HOME/ -name "$lib")) # Then search in ROCM_HOME
-    fi
-    if [[ -z $file_path ]]; then
-        echo "Error: Library file $lib is not found." >&2
-        exit 1
-    fi
-    ROCM_SO_PATHS[${#ROCM_SO_PATHS[@]}]="$file_path" # Append lib to array
-done
-
-DEPS_LIST=(
-    ${ROCM_SO_PATHS[*]}
-    ${OS_SO_PATHS[*]}
-)
-
-DEPS_SONAME=(
-    ${ROCM_SO_FILES[*]}
-    ${OS_SO_FILES[*]}
-)
-
-DEPS_AUX_SRCLIST=(
-    "${ROCBLAS_LIB_FILES[@]/#/$ROCBLAS_LIB_SRC/}"
-    "${HIPBLASLT_LIB_FILES[@]/#/$HIPBLASLT_LIB_SRC/}"
-    "/opt/amdgpu/share/libdrm/amdgpu.ids"
-)
-
-DEPS_AUX_DSTLIST=(
-    "${ROCBLAS_LIB_FILES[@]/#/$ROCBLAS_LIB_DST/}"
-    "${HIPBLASLT_LIB_FILES[@]/#/$HIPBLASLT_LIB_DST/}"
-    "share/libdrm/amdgpu.ids"
-)
-
-# MIOpen library files
-MIOPEN_SHARE_SRC=$ROCM_HOME/share/miopen/db
-MIOPEN_SHARE_DST=share/miopen/db
-MIOPEN_SHARE_FILES=($(ls $MIOPEN_SHARE_SRC | grep -E $ARCH))
-DEPS_AUX_SRCLIST+=(${MIOPEN_SHARE_FILES[@]/#/$MIOPEN_SHARE_SRC/})
-DEPS_AUX_DSTLIST+=(${MIOPEN_SHARE_FILES[@]/#/$MIOPEN_SHARE_DST/})
-
-# RCCL library files
-RCCL_SHARE_SRC=$ROCM_HOME/share/rccl/msccl-algorithms
-RCCL_SHARE_DST=share/rccl/msccl-algorithms
-RCCL_SHARE_FILES=($(ls $RCCL_SHARE_SRC))
-DEPS_AUX_SRCLIST+=(${RCCL_SHARE_FILES[@]/#/$RCCL_SHARE_SRC/})
-DEPS_AUX_DSTLIST+=(${RCCL_SHARE_FILES[@]/#/$RCCL_SHARE_DST/})
-
-echo "PYTORCH_ROCM_ARCH: ${PYTORCH_ROCM_ARCH}"
-
-SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
-if [[ -z "$BUILD_PYTHONLESS" ]]; then
-    BUILD_SCRIPT=build_common.sh
-else
-    BUILD_SCRIPT=build_libtorch.sh
-fi
-source $SCRIPTPATH/${BUILD_SCRIPT}
--- a/.ci/manywheel/test_wheel.sh
+++ b/.ci/manywheel/test_wheel.sh
@ -1,26 +0,0 @@
-#!/usr/bin/env bash
-set -e
-
-yum install -y wget git
-
-rm -rf /usr/local/cuda*
-
-# Install Anaconda
-if ! ls /py
-then
-    echo "Miniconda needs to be installed"
-    wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
-    bash ~/miniconda.sh -b -p /py
-else
-    echo "Miniconda is already installed"
-fi
-
-export PATH="/py/bin:$PATH"
-
-# Anaconda token
-if ls /remote/token
-then
-   source /remote/token
-fi
-
-conda install -y conda-build anaconda-client
--- a/.ci/pytorch/build.sh
+++ b/.ci/pytorch/build.sh
@ -49,8 +49,13 @@ if [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
 fi

 # Enable LLVM dependency for TensorExpr testing
-export USE_LLVM=/opt/llvm
-export LLVM_DIR=/opt/llvm/lib/cmake/llvm
+if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
+  export USE_LLVM=/opt/rocm/llvm
+  export LLVM_DIR=/opt/rocm/llvm/lib/cmake/llvm
+else
+  export USE_LLVM=/opt/llvm
+  export LLVM_DIR=/opt/llvm/lib/cmake/llvm
+fi

 if [[ "$BUILD_ENVIRONMENT" == *executorch* ]]; then
  # To build test_edge_op_registration
@ -178,7 +183,7 @@ fi
 # sccache will fail for CUDA builds if all cores are used for compiling
 # gcc 7 with sccache seems to have intermittent OOM issue if all cores are used
 if [ -z "$MAX_JOBS" ]; then
-  if { [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; } && which sccache > /dev/null; then
+  if { [[ "$BUILD_ENVIRONMENT" == *cuda* ]] || [[ "$BUILD_ENVIRONMENT" == *gcc7* ]]; } && which sccache > /dev/null; then
    export MAX_JOBS=$(($(nproc) - 1))
  fi
 fi
@ -203,6 +208,7 @@ if [[ "${BUILD_ENVIRONMENT}" == *clang* ]]; then
 fi

 if [[ "$BUILD_ENVIRONMENT" == *-clang*-asan* ]]; then
+  export LDSHARED="clang --shared"
  export USE_CUDA=0
  export USE_ASAN=1
  export UBSAN_FLAGS="-fno-sanitize-recover=all;-fno-sanitize=float-divide-by-zero;-fno-sanitize=float-cast-overflow"
@ -217,6 +223,10 @@ if [[ "${BUILD_ENVIRONMENT}" == *-pch* ]]; then
    export USE_PRECOMPILED_HEADERS=1
 fi

+if [[ "${BUILD_ENVIRONMENT}" == *linux-focal-py3.7-gcc7-build*  ]]; then
+  export USE_GLOO_WITH_OPENSSL=ON
+fi
+
 if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* ]]; then
  export BUILD_STATIC_RUNTIME_BENCHMARK=ON
 fi
@ -227,7 +237,7 @@ fi

 # Do not change workspace permissions for ROCm CI jobs
 # as it can leave workspace with bad permissions for cancelled jobs
-if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* ]]; then
+if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
  # Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96)
  WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace")
  cleanup_workspace() {
@ -273,11 +283,11 @@ else
    # set only when building other architectures
    # or building non-XLA tests.
    if [[ "$BUILD_ENVIRONMENT" != *rocm*  &&
-          "$BUILD_ENVIRONMENT" != *s390x*   &&
          "$BUILD_ENVIRONMENT" != *xla* ]]; then
      if [[ "$BUILD_ENVIRONMENT" != *py3.8* ]]; then
-        # Install numpy-2.0.2 for builds which are backward compatible with 1.X
-        python -mpip install --pre numpy==2.0.2
+        # Install numpy-2.0 release candidate for builds
+        # Which should be backward compatible with Numpy-1.X
+        python -mpip install --pre numpy==2.0.0rc1
      fi

      WERROR=1 python setup.py clean
@ -336,11 +346,11 @@ else
    CUSTOM_OP_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-op-build"
    CUSTOM_OP_TEST="$PWD/test/custom_operator"
    python --version
-    SITE_PACKAGES="$(python -c 'import site; print(";".join([x for x in site.getsitepackages()] + [x + "/torch" for x in site.getsitepackages()]))')"
+    SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"

    mkdir -p "$CUSTOM_OP_BUILD"
    pushd "$CUSTOM_OP_BUILD"
-    cmake "$CUSTOM_OP_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
+    cmake "$CUSTOM_OP_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch;$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
          -DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
    make VERBOSE=1
    popd
@ -350,10 +360,10 @@ else
    JIT_HOOK_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/jit-hook-build"
    JIT_HOOK_TEST="$PWD/test/jit_hooks"
    python --version
-    SITE_PACKAGES="$(python -c 'import site; print(";".join([x for x in site.getsitepackages()] + [x + "/torch" for x in site.getsitepackages()]))')"
+    SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
    mkdir -p "$JIT_HOOK_BUILD"
    pushd "$JIT_HOOK_BUILD"
-    cmake "$JIT_HOOK_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
+    cmake "$JIT_HOOK_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch;$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
          -DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
    make VERBOSE=1
    popd
@ -365,7 +375,7 @@ else
    python --version
    mkdir -p "$CUSTOM_BACKEND_BUILD"
    pushd "$CUSTOM_BACKEND_BUILD"
-    cmake "$CUSTOM_BACKEND_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
+    cmake "$CUSTOM_BACKEND_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch;$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
          -DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
    make VERBOSE=1
    popd
@ -398,6 +408,6 @@ fi

 # snadampal: skipping it till sccache support added for aarch64
 # https://github.com/pytorch/pytorch/issues/121559
-if [[ "$BUILD_ENVIRONMENT" != *aarch64* &&  "$BUILD_ENVIRONMENT" != *s390x* ]]; then
+if [[ "$BUILD_ENVIRONMENT" != *aarch64* ]]; then
  print_sccache_stats
 fi
--- a/.ci/pytorch/common_utils.sh
+++ b/.ci/pytorch/common_utils.sh
@ -191,22 +191,9 @@ function install_torchrec_and_fbgemm() {
  pip_uninstall torchrec-nightly
  pip_uninstall fbgemm-gpu-nightly
  pip_install setuptools-git-versioning scikit-build pyre-extensions
-
-  # TODO (huydhn): I still have no clue on why sccache doesn't work with only fbgemm_gpu here, but it
-  # seems to be an sccache-related issue
-  if [[ "$IS_A100_RUNNER" == "1" ]]; then
-    unset CMAKE_CUDA_COMPILER_LAUNCHER
-    sudo mv /opt/cache/bin /opt/cache/bin-backup
-  fi
-
  # See https://github.com/pytorch/pytorch/issues/106971
  CUDA_PATH=/usr/local/cuda-12.1 pip_install --no-use-pep517 --user "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#egg=fbgemm-gpu&subdirectory=fbgemm_gpu"
  pip_install --no-use-pep517 --user "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}"
-
-  if [[ "$IS_A100_RUNNER" == "1" ]]; then
-    export CMAKE_CUDA_COMPILER_LAUNCHER=/opt/cache/bin/sccache
-    sudo mv /opt/cache/bin-backup /opt/cache/bin
-  fi
 }

 function clone_pytorch_xla() {
--- a/.ci/pytorch/create_test_cert.py
+++ b/.ci/pytorch/create_test_cert.py
@ -1,4 +1,4 @@
-from datetime import datetime, timedelta, timezone
+from datetime import datetime, timedelta
 from tempfile import mkdtemp

 from cryptography import x509
@ -42,10 +42,11 @@ def create_cert(path, C, ST, L, O, key):
        .issuer_name(issuer)
        .public_key(key.public_key())
        .serial_number(x509.random_serial_number())
-        .not_valid_before(datetime.now(timezone.utc))
+        .not_valid_before(datetime.utcnow())
        .not_valid_after(
            # Our certificate will be valid for 10 days
-            datetime.now(timezone.utc) + timedelta(days=10)
+            datetime.utcnow()
+            + timedelta(days=10)
        )
        .add_extension(
            x509.BasicConstraints(ca=True, path_length=None),
@ -87,10 +88,11 @@ def sign_certificate_request(path, csr_cert, ca_cert, private_ca_key):
        .issuer_name(ca_cert.subject)
        .public_key(csr_cert.public_key())
        .serial_number(x509.random_serial_number())
-        .not_valid_before(datetime.now(timezone.utc))
+        .not_valid_before(datetime.utcnow())
        .not_valid_after(
            # Our certificate will be valid for 10 days
-            datetime.now(timezone.utc) + timedelta(days=10)
+            datetime.utcnow()
+            + timedelta(days=10)
            # Sign our certificate with our private key
        )
        .sign(private_ca_key, hashes.SHA256())
--- a/.ci/pytorch/macos-test.sh
+++ b/.ci/pytorch/macos-test.sh
@ -9,13 +9,15 @@ if [[ -n "$CONDA_ENV" ]]; then
  export PATH="$CONDA_ENV/bin":$PATH
 fi

-# Test that OpenMP is enabled
-pushd test
-if [[ ! $(python -c "import torch; print(int(torch.backends.openmp.is_available()))") == "1" ]]; then
-  echo "Build should have OpenMP enabled, but torch.backends.openmp.is_available() is False"
-  exit 1
+# Test that OpenMP is enabled for non-arm64 build
+if [[ ${BUILD_ENVIRONMENT} != *arm64* ]]; then
+  pushd test
+  if [[ ! $(python -c "import torch; print(int(torch.backends.openmp.is_available()))") == "1" ]]; then
+    echo "Build should have OpenMP enabled, but torch.backends.openmp.is_available() is False"
+    exit 1
+  fi
+  popd
 fi
-popd

 setup_test_python() {
  # The CircleCI worker hostname doesn't resolve to an address.
@ -25,9 +27,8 @@ setup_test_python() {
  echo "Ninja version: $(ninja --version)"
  echo "Python version: $(which python) ($(python --version))"

-  # Set the limit on open file handles to 16384
-  # might help with intermittent compiler test failures
-  ulimit -n 16384
+  # Increase default limit on open file handles from 256 to 1024
+  ulimit -n 1024
 }

 test_python_all() {
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@ -49,16 +49,16 @@ NUM_TEST_SHARDS="${NUM_TEST_SHARDS:=1}"
 export VALGRIND=ON
 # export TORCH_INDUCTOR_INSTALL_GXX=ON
 if [[ "$BUILD_ENVIRONMENT" == *clang9* ]]; then
-  # clang9 appears to miscompile code involving std::optional<c10::SymInt>,
+  # clang9 appears to miscompile code involving c10::optional<c10::SymInt>,
  # such that valgrind complains along these lines:
  #
  # Conditional jump or move depends on uninitialised value(s)
  #    at 0x40303A: ~optional_base (Optional.h:281)
  #    by 0x40303A: call (Dispatcher.h:448)
-  #    by 0x40303A: call(at::Tensor const&, c10::ArrayRef<c10::SymInt>, c10::ArrayRef<c10::SymInt>, std::optional<c10::SymInt>) (basic.cpp:10)
+  #    by 0x40303A: call(at::Tensor const&, c10::ArrayRef<c10::SymInt>, c10::ArrayRef<c10::SymInt>, c10::optional<c10::SymInt>) (basic.cpp:10)
  #    by 0x403700: main (basic.cpp:16)
  #  Uninitialised value was created by a stack allocation
-  #    at 0x402AAA: call(at::Tensor const&, c10::ArrayRef<c10::SymInt>, c10::ArrayRef<c10::SymInt>, std::optional<c10::SymInt>) (basic.cpp:6)
+  #    at 0x402AAA: call(at::Tensor const&, c10::ArrayRef<c10::SymInt>, c10::ArrayRef<c10::SymInt>, c10::optional<c10::SymInt>) (basic.cpp:6)
  #
  # The problem does not appear with gcc or newer versions of clang (we tested
  # clang14).  So we suppress valgrind testing for clang9 specifically.
@ -72,7 +72,7 @@ if [[ "$BUILD_ENVIRONMENT" == *clang9* ]]; then
  #
  # using namespace at;
  #
-  # Tensor call(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, std::optional<c10::SymInt> storage_offset) {
+  # Tensor call(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, c10::optional<c10::SymInt> storage_offset) {
  #   auto op = c10::Dispatcher::singleton()
  #       .findSchemaOrThrow(at::_ops::as_strided::name, at::_ops::as_strided::overload_name)
  #       .typed<at::_ops::as_strided::schema>();
@ -233,8 +233,8 @@ if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then
    # it depends on a ton of dynamic libraries that most programs aren't gonna
    # have, and it applies to child processes.

-    LD_PRELOAD=$(clang --print-file-name=libclang_rt.asan-x86_64.so)
-    export LD_PRELOAD
+    # TODO: get rid of the hardcoded path
+    export LD_PRELOAD=/usr/lib/llvm-15/lib/clang/15.0.7/lib/linux/libclang_rt.asan-x86_64.so
    # Disable valgrind for asan
    export VALGRIND=OFF

@ -369,27 +369,22 @@ test_inductor_aoti() {
  CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference
 }

-test_inductor_cpp_wrapper() {
-  export TORCHINDUCTOR_CPP_WRAPPER=1
+test_inductor_cpp_wrapper_abi_compatible() {
+  export TORCHINDUCTOR_ABI_COMPATIBLE=1
  TEST_REPORTS_DIR=$(pwd)/test/test-reports
  mkdir -p "$TEST_REPORTS_DIR"

-  python benchmarks/dynamo/timm_models.py --device cuda --accuracy --amp \
+  echo "Testing Inductor cpp wrapper mode with TORCHINDUCTOR_ABI_COMPATIBLE=1"
+  # cpu stack allocation causes segfault and needs more investigation
+  PYTORCH_TESTING_DEVICE_ONLY_FOR="" python test/run_test.py --include inductor/test_cpu_cpp_wrapper
+  python test/run_test.py --include inductor/test_cuda_cpp_wrapper
+
+  TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/timm_models.py --device cuda --accuracy --amp \
    --training --inductor --disable-cudagraphs --only vit_base_patch16_224 \
    --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv"
  python benchmarks/dynamo/check_accuracy.py \
    --actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv" \
    --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_timm_training.csv"
-
-  python benchmarks/dynamo/torchbench.py --device cuda --accuracy \
-    --bfloat16 --inference --inductor --only hf_T5 --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
-  python benchmarks/dynamo/torchbench.py --device cuda --accuracy \
-    --bfloat16 --inference --inductor --only llama --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
-  python benchmarks/dynamo/torchbench.py --device cuda --accuracy \
-    --bfloat16 --inference --inductor --only moco --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
-  python benchmarks/dynamo/check_accuracy.py \
-    --actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" \
-    --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv"
 }

 # "Global" flags for inductor benchmarking controlled by TEST_CONFIG
@ -406,10 +401,10 @@ pr_time_benchmarks() {

  TEST_REPORTS_DIR=$(pwd)/test/test-reports
  mkdir -p "$TEST_REPORTS_DIR"
-  PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks source benchmarks/dynamo/pr_time_benchmarks/benchmark_runner.sh "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv" "benchmarks/dynamo/pr_time_benchmarks/benchmarks"
+  PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks source benchmarks/dynamo/pr_time_benchmarks/benchmark_runner.sh "$TEST_REPORTS_DIR/pr_time_benchmarks_after.txt" "benchmarks/dynamo/pr_time_benchmarks/benchmarks"
  echo "benchmark results on current PR: "
-  cat  "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv"
-  PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks python benchmarks/dynamo/pr_time_benchmarks/check_results.py "benchmarks/dynamo/pr_time_benchmarks/expected_results.csv" "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv" "$TEST_REPORTS_DIR/new_expected_results.csv"
+  cat  "$TEST_REPORTS_DIR/pr_time_benchmarks_after.txt"
+
 }

 if [[ "${TEST_CONFIG}" == *pr_time_benchmarks* ]]; then
@ -517,7 +512,7 @@ test_perf_for_dashboard() {
              "${target_flag[@]}" --"$mode" --"$dtype" --export --disable-cudagraphs "$@" \
              --output "$TEST_REPORTS_DIR/${backend}_export_${suite}_${dtype}_${mode}_${device}_${target}.csv"
        fi
-        $TASKSET python "benchmarks/dynamo/$suite.py" \
+        TORCHINDUCTOR_ABI_COMPATIBLE=1 $TASKSET python "benchmarks/dynamo/$suite.py" \
            "${target_flag[@]}" --"$mode" --"$dtype" --export-aot-inductor --disable-cudagraphs "$@" \
            --output "$TEST_REPORTS_DIR/${backend}_aot_inductor_${suite}_${dtype}_${mode}_${device}_${target}.csv"
      fi
@ -572,11 +567,18 @@ test_single_dynamo_benchmark() {
    test_perf_for_dashboard "$suite" \
      "${DYNAMO_BENCHMARK_FLAGS[@]}" "$@" "${partition_flags[@]}"
  else
+    if [[ "${TEST_CONFIG}" == *aot_inductor* && "${TEST_CONFIG}" != *cpu_aot_inductor* ]]; then
+      # Test AOTInductor with the ABI-compatible mode on CI
+      # This can be removed once the ABI-compatible mode becomes default.
+      # For CPU device, we perfer non ABI-compatible mode on CI when testing AOTInductor.
+      export TORCHINDUCTOR_ABI_COMPATIBLE=1
+    fi
+
    if [[ "${TEST_CONFIG}" == *_avx2* ]]; then
-      TEST_CONFIG=${TEST_CONFIG//_avx2/}
+      TEST_CONFIG=${TEST_CONFIG::-5}
    fi
    if [[ "${TEST_CONFIG}" == *_avx512* ]]; then
-      TEST_CONFIG=${TEST_CONFIG//_avx512/}
+      TEST_CONFIG=${TEST_CONFIG::-7}
    fi
    python "benchmarks/dynamo/$suite.py" \
      --ci --accuracy --timing --explain \
@ -594,9 +596,6 @@ test_single_dynamo_benchmark() {

 test_inductor_micro_benchmark() {
  TEST_REPORTS_DIR=$(pwd)/test/test-reports
-  if [[ "${TEST_CONFIG}" == *cpu* ]]; then
-    test_inductor_set_cpu_affinity
-  fi
  python benchmarks/gpt_fast/benchmark.py --output "${TEST_REPORTS_DIR}/gpt_fast_benchmark.csv"
 }

@ -605,11 +604,6 @@ test_inductor_halide() {
  assert_git_not_dirty
 }

-test_inductor_triton_cpu() {
-  python test/run_test.py --include inductor/test_triton_cpu_backend.py --verbose
-  assert_git_not_dirty
-}
-
 test_dynamo_benchmark() {
  # Usage: test_dynamo_benchmark huggingface 0
  TEST_REPORTS_DIR=$(pwd)/test/test-reports
@ -647,12 +641,32 @@ test_inductor_torchbench_smoketest_perf() {
  TEST_REPORTS_DIR=$(pwd)/test/test-reports
  mkdir -p "$TEST_REPORTS_DIR"

+  # Test some models in the cpp wrapper mode
+  TORCHINDUCTOR_ABI_COMPATIBLE=1 TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/torchbench.py --device cuda --accuracy \
+    --bfloat16 --inference --inductor --only hf_T5 --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
+  TORCHINDUCTOR_ABI_COMPATIBLE=1 TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/torchbench.py --device cuda --accuracy \
+    --bfloat16 --inference --inductor --only llama --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
+  TORCHINDUCTOR_ABI_COMPATIBLE=1 TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/torchbench.py --device cuda --accuracy \
+    --bfloat16 --inference --inductor --only moco --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
+  python benchmarks/dynamo/check_accuracy.py \
+    --actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" \
+    --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv"
+
  python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --float16 --training \
    --batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" --only hf_Bert \
    --output "$TEST_REPORTS_DIR/inductor_training_smoketest.csv"
  # The threshold value needs to be actively maintained to make this check useful
  python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" -t 1.4

+  TORCHINDUCTOR_ABI_COMPATIBLE=1 python benchmarks/dynamo/torchbench.py --device cuda --performance --bfloat16 --inference \
+    --export-aot-inductor --only nanogpt --output "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv"
+  # The threshold value needs to be actively maintained to make this check useful
+  # The perf number of nanogpt seems not very stable, e.g.
+  # https://github.com/pytorch/pytorch/actions/runs/7158691360/job/19491437314,
+  # and thus we lower its threshold to reduce flakiness. If this continues to be a problem,
+  # we switch to use some other model.
+  python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" -t 4.9
+
  # Check memory compression ratio for a few models
  for test in hf_Albert timm_vision_transformer; do
    python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --amp --training \
@ -696,10 +710,6 @@ test_inductor_set_cpu_affinity(){
    export KMP_BLOCKTIME=1
  fi
  cores=$(test_inductor_get_core_number)
-  # Set number of cores to 16 on Aarch64 for performance runs.
-  if [[ "${TEST_CONFIG}" == *aarch64* && $cores -gt 16 ]]; then
-    cores=16
-  fi
  export OMP_NUM_THREADS=$cores
  end_core=$((cores-1))
  export TASKSET="taskset -c 0-$end_core"
@ -736,9 +746,19 @@ test_inductor_torchbench_cpu_smoketest_perf(){
    fi
    cat "$output_name"
    # The threshold value needs to be actively maintained to make this check useful.
-    # Allow 1% variance for CPU perf to accommodate perf fluctuation
-    python benchmarks/dynamo/check_perf_csv.py -f "$output_name" -t "$speedup_target" -s 0.99
+    python benchmarks/dynamo/check_perf_csv.py -f "$output_name" -t "$speedup_target"
  done
+
+  # Add a few ABI-compatible accuracy tests for CPU. These can be removed once we turn on ABI-compatible as default.
+  TORCHINDUCTOR_ABI_COMPATIBLE=1 python benchmarks/dynamo/timm_models.py --device cpu --accuracy \
+    --bfloat16 --inference --export-aot-inductor --disable-cudagraphs --only adv_inception_v3 \
+    --output "$TEST_REPORTS_DIR/aot_inductor_smoke_test.csv"
+  TORCHINDUCTOR_ABI_COMPATIBLE=1 python benchmarks/dynamo/timm_models.py --device cpu --accuracy \
+    --bfloat16 --inference --export-aot-inductor --disable-cudagraphs --only beit_base_patch16_224 \
+    --output "$TEST_REPORTS_DIR/aot_inductor_smoke_test.csv"
+  python benchmarks/dynamo/check_accuracy.py \
+    --actual "$TEST_REPORTS_DIR/aot_inductor_smoke_test.csv" \
+    --expected "benchmarks/dynamo/ci_expected_accuracy/aot_inductor_timm_inference.csv"
 }

 test_torchbench_gcp_smoketest(){
@ -1360,16 +1380,14 @@ test_executorch() {
  assert_git_not_dirty
 }

-test_linux_aarch64() {
+test_linux_aarch64(){
  python test/run_test.py --include test_modules test_mkldnn test_mkldnn_fusion test_openmp test_torch test_dynamic_shapes \
-        test_transformers test_multiprocessing test_numpy_interop \
-        --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
+       test_transformers test_multiprocessing test_numpy_interop --verbose

  # Dynamo tests
  python test/run_test.py --include dynamo/test_compile dynamo/test_backends dynamo/test_comptime dynamo/test_config \
       dynamo/test_functions dynamo/test_fx_passes_pre_grad dynamo/test_interop dynamo/test_model_output dynamo/test_modules \
-       dynamo/test_optimizers dynamo/test_recompile_ux dynamo/test_recompiles \
-       --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
+       dynamo/test_optimizers dynamo/test_recompile_ux dynamo/test_recompiles --verbose

  # Inductor tests
  python test/run_test.py --include inductor/test_torchinductor inductor/test_benchmark_fusion inductor/test_codecache \
@ -1379,8 +1397,7 @@ test_linux_aarch64() {
       inductor/test_max_autotune inductor/test_memory_planning inductor/test_metrics inductor/test_multi_kernel inductor/test_pad_mm \
       inductor/test_pattern_matcher inductor/test_perf inductor/test_profiler inductor/test_select_algorithm inductor/test_smoke \
       inductor/test_split_cat_fx_passes inductor/test_standalone_compile inductor/test_torchinductor \
-       inductor/test_torchinductor_codegen_dynamic_shapes inductor/test_torchinductor_dynamic_shapes inductor/test_memory \
-       --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
+       inductor/test_torchinductor_codegen_dynamic_shapes inductor/test_torchinductor_dynamic_shapes --verbose
 }

 if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then
@ -1413,8 +1430,6 @@ elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then
  test_inductor_distributed
 elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then
  test_inductor_halide
-elif [[ "${TEST_CONFIG}" == *inductor-triton-cpu* ]]; then
-  test_inductor_triton_cpu
 elif [[ "${TEST_CONFIG}" == *inductor-micro-benchmark* ]]; then
  test_inductor_micro_benchmark
 elif [[ "${TEST_CONFIG}" == *huggingface* ]]; then
@ -1431,13 +1446,14 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
  else
    install_torchaudio cuda
  fi
+  install_torchtext
  install_torchvision
  TORCH_CUDA_ARCH_LIST="8.0;8.6" pip_install git+https://github.com/pytorch/ao.git
  id=$((SHARD_NUMBER-1))
  # https://github.com/opencv/opencv-python/issues/885
  pip_install opencv-python==4.8.0.74
  if [[ "${TEST_CONFIG}" == *inductor_torchbench_smoketest_perf* ]]; then
-    checkout_install_torchbench hf_Bert hf_Albert timm_vision_transformer
+    checkout_install_torchbench hf_Bert hf_Albert nanogpt timm_vision_transformer
    PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_smoketest_perf
  elif [[ "${TEST_CONFIG}" == *inductor_torchbench_cpu_smoketest_perf* ]]; then
    checkout_install_torchbench timm_vision_transformer phlippe_densenet basic_gnn_edgecnn \
@ -1456,16 +1472,16 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
    fi
    PYTHONPATH=$(pwd)/torchbench test_dynamo_benchmark torchbench "$id"
  fi
-elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper* ]]; then
-  install_torchaudio cuda
+elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper_abi_compatible* ]]; then
  install_torchvision
-  checkout_install_torchbench hf_T5 llama moco
-  PYTHONPATH=$(pwd)/torchbench test_inductor_cpp_wrapper
+  test_inductor_cpp_wrapper_abi_compatible
 elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
  install_torchvision
  test_inductor_shard "${SHARD_NUMBER}"
  if [[ "${SHARD_NUMBER}" == 1 ]]; then
-    if [[ "${BUILD_ENVIRONMENT}" != linux-jammy-py3.9-gcc11-build ]]; then
+    if [[ "${BUILD_ENVIRONMENT}" != linux-jammy-py3.8-gcc11-build ]]; then
+      # Temporarily skip test_inductor_aoti due to https://github.com/pytorch/pytorch/issues/130311
+      test_inductor_aoti
      test_inductor_distributed
    fi
  fi
--- a/.ci/pytorch/win-build.sh
+++ b/.ci/pytorch/win-build.sh
@ -26,7 +26,7 @@ fi
 export SCRIPT_HELPERS_DIR=$SCRIPT_PARENT_DIR/win-test-helpers

 set +ex
-grep -E -R 'PyLong_(From|As)(Unsigned|)Long\(' --exclude=python_numbers.h  --exclude=pythoncapi_compat.h --exclude=eval_frame.c torch/
+grep -E -R 'PyLong_(From|As)(Unsigned|)Long\(' --exclude=python_numbers.h --exclude=eval_frame.c torch/
 PYLONG_API_CHECK=$?
 if [[ $PYLONG_API_CHECK == 0 ]]; then
  echo "Usage of PyLong_{From,As}{Unsigned}Long API may lead to overflow errors on Windows"
--- a/.ci/pytorch/win-test-helpers/build_pytorch.bat
+++ b/.ci/pytorch/win-test-helpers/build_pytorch.bat
@ -24,12 +24,6 @@ call %INSTALLER_DIR%\install_sccache.bat
 if errorlevel 1 goto fail
 if not errorlevel 0 goto fail

-if "%USE_XPU%"=="1" (
-  :: Install xpu support packages
-  call %INSTALLER_DIR%\install_xpu.bat
-  if errorlevel 1 exit /b 1
-)
-
 :: Miniconda has been installed as part of the Windows AMI with all the dependencies.
 :: We just need to activate it here
 call %INSTALLER_DIR%\activate_miniconda3.bat
@ -49,16 +43,6 @@ if "%VC_VERSION%" == "" (
 )
 if errorlevel 1 goto fail
 if not errorlevel 0 goto fail
-
-if "%USE_XPU%"=="1" (
-  :: Activate xpu environment - VS env is required for xpu
-  call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
-  if errorlevel 1 exit /b 1
-  :: Reduce build time. Only have MTL self-hosted runner now
-  SET TORCH_XPU_ARCH_LIST=xe-lpg
-  SET USE_KINETO=0
-)
-
@echo on
 popd

--- a/.ci/pytorch/win-test-helpers/installation-helpers/install_xpu.bat
+++ b/.ci/pytorch/win-test-helpers/installation-helpers/install_xpu.bat
@ -1,91 +0,0 @@
-@echo on
-REM Description: Install Intel Support Packages on Windows
-REM BKM reference: https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-5.html
-
-set XPU_INSTALL_MODE=%~1
-if "%XPU_INSTALL_MODE%"=="" goto xpu_bundle_install_start
-if "%XPU_INSTALL_MODE%"=="bundle" goto xpu_bundle_install_start
-if "%XPU_INSTALL_MODE%"=="driver" goto xpu_driver_install_start
-if "%XPU_INSTALL_MODE%"=="all" goto xpu_driver_install_start
-
-:arg_error
-
-echo Illegal XPU installation mode. The value can be "bundle"/"driver"/"all"
-echo If keep the value as space, will use default "bundle" mode
-exit /b 1
-
-:xpu_driver_install_start
-:: TODO Need more testing for driver installation
-set XPU_DRIVER_LINK=https://downloadmirror.intel.com/830975/gfx_win_101.5972.exe
-curl -o xpu_driver.exe --retry 3 --retry-all-errors -k %XPU_DRIVER_LINK%
-echo "XPU Driver installing..."
-start /wait "Intel XPU Driver Installer" "xpu_driver.exe"
-if errorlevel 1 exit /b 1
-del xpu_driver.exe
-if "%XPU_INSTALL_MODE%"=="driver" goto xpu_install_end
-
-:xpu_bundle_install_start
-
-set XPU_BUNDLE_PARENT_DIR=C:\Program Files (x86)\Intel\oneAPI
-set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/9d1a91e2-e8b8-40a5-8c7f-5db768a6a60c/w_intel-for-pytorch-gpu-dev_p_0.5.3.37_offline.exe
-set XPU_PTI_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/9d1a91e2-e8b8-40a5-8c7f-5db768a6a60c/w_intel-pti-dev_p_0.9.0.37_offline.exe
-set XPU_BUNDLE_VERSION=0.5.3+31
-set XPU_PTI_VERSION=0.9.0+36
-set XPU_BUNDLE_PRODUCT_NAME=intel.oneapi.win.intel-for-pytorch-gpu-dev.product
-set XPU_PTI_PRODUCT_NAME=intel.oneapi.win.intel-pti-dev.product
-set XPU_BUNDLE_INSTALLED=0
-set XPU_PTI_INSTALLED=0
-set XPU_BUNDLE_UNINSTALL=0
-set XPU_PTI_UNINSTALL=0
-
-:: Check if XPU bundle is target version or already installed
-if exist "%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" goto xpu_bundle_ver_check
-goto xpu_bundle_install
-
-:xpu_bundle_ver_check
-
-"%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" --list-products > xpu_bundle_installed_ver.log
-
-for /f "tokens=1,2" %%a in (xpu_bundle_installed_ver.log) do (
-    if "%%a"=="%XPU_BUNDLE_PRODUCT_NAME%" (
-        echo %%a Installed Version: %%b
-        set XPU_BUNDLE_INSTALLED=1
-        if not "%XPU_BUNDLE_VERSION%"=="%%b" (
-            start /wait "Installer Title" "%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" --action=remove --eula=accept --silent --product-id %XPU_BUNDLE_PRODUCT_NAME% --product-ver %%b --log-dir uninstall_bundle
-            set XPU_BUNDLE_UNINSTALL=1
-        )
-    )
-    if "%%a"=="%XPU_PTI_PRODUCT_NAME%" (
-        echo %%a Installed Version: %%b
-        set XPU_PTI_INSTALLED=1
-        if not "%XPU_PTI_VERSION%"=="%%b" (
-            start /wait "Installer Title" "%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" --action=remove --eula=accept --silent --product-id %XPU_PTI_PRODUCT_NAME% --product-ver %%b --log-dir uninstall_bundle
-            set XPU_PTI_UNINSTALL=1
-        )
-    )
-)
-if errorlevel 1 exit /b 1
-if exist xpu_bundle_installed_ver.log del xpu_bundle_installed_ver.log
-if "%XPU_BUNDLE_INSTALLED%"=="0" goto xpu_bundle_install
-if "%XPU_BUNDLE_UNINSTALL%"=="1" goto xpu_bundle_install
-if "%XPU_PTI_INSTALLED%"=="0" goto xpu_pti_install
-if "%XPU_PTI_UNINSTALL%"=="1" goto xpu_pti_install
-goto xpu_install_end
-
-:xpu_bundle_install
-
-curl -o xpu_bundle.exe --retry 3 --retry-all-errors -k %XPU_BUNDLE_URL%
-echo "XPU Bundle installing..."
-start /wait "Intel Pytorch Bundle Installer" "xpu_bundle.exe" --action=install --eula=accept --silent --log-dir install_bundle
-if errorlevel 1 exit /b 1
-del xpu_bundle.exe
-
-:xpu_pti_install
-
-curl -o xpu_pti.exe --retry 3 --retry-all-errors -k %XPU_PTI_URL%
-echo "XPU PTI installing..."
-start /wait "Intel PTI Installer" "xpu_pti.exe" --action=install --eula=accept --silent --log-dir install_bundle
-if errorlevel 1 exit /b 1
-del xpu_pti.exe
-
-:xpu_install_end
--- a/.ci/pytorch/win-test.sh
+++ b/.ci/pytorch/win-test.sh
@ -40,15 +40,6 @@ python -m pip install pytest-rerunfailures==10.3 pytest-cpp==2.3.0 tensorboard==
 # Install Z3 optional dependency for Windows builds.
 python -m pip install z3-solver==4.12.2.0

-# Install tlparse for test\dynamo\test_structured_trace.py UTs.
-python -m pip install tlparse==0.3.25
-
-# Install parameterized
-python -m pip install parameterized==0.8.1
-
-# Install pulp for testing ilps under torch\distributed\_tools
-python -m pip install pulp==2.9.0
-
 run_tests() {
    # Run nvidia-smi if available
    for path in '/c/Program Files/NVIDIA Corporation/NVSMI/nvidia-smi.exe' /c/Windows/System32/nvidia-smi.exe; do
--- a/.circleci/scripts/binary_linux_test.sh
+++ b/.circleci/scripts/binary_linux_test.sh
@ -27,11 +27,12 @@ if [[ "$PACKAGE_TYPE" == conda ]]; then
  source activate testenv >/dev/null
 elif [[ "$PACKAGE_TYPE" != libtorch ]]; then
  python_path="/opt/python/cp\$python_nodot-cp\${python_nodot}"
-  if [[ "\$python_nodot" = *t ]]; then
-    python_digits="\$(echo $DESIRED_PYTHON | tr -cd [:digit:])"
-    python_path="/opt/python/cp\$python_digits-cp\${python_digits}t"
+  # Prior to Python 3.8 paths were suffixed with an 'm'
+  if [[ -d  "\${python_path}/bin" ]]; then
+    export PATH="\${python_path}/bin:\$PATH"
+  elif [[ -d "\${python_path}m/bin" ]]; then
+    export PATH="\${python_path}m/bin:\$PATH"
  fi
-  export PATH="\${python_path}/bin:\$PATH"
 fi

 EXTRA_CONDA_FLAGS=""
@ -118,11 +119,6 @@ fi
 # Test the package
 /builder/check_binary.sh

-if [[ "\$GPU_ARCH_TYPE" != *s390x* && "\$GPU_ARCH_TYPE" != *xpu* && "\$GPU_ARCH_TYPE" != *rocm*  && "$PACKAGE_TYPE" != libtorch ]]; then
-  # Exclude s390, xpu, rocm and libtorch builds from smoke testing
-  python /builder/test/smoke_test/smoke_test.py --package=torchonly --torch-compile-check disabled
-fi
-
 # Clean temp files
 cd /builder && git clean -ffdx

--- a/.circleci/scripts/binary_populate_env.sh
+++ b/.circleci/scripts/binary_populate_env.sh
@ -90,7 +90,7 @@ fi
 if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*rocm.* && $(uname) == "Linux" ]]; then
    TRITON_REQUIREMENT="pytorch-triton-rocm==${TRITON_VERSION}; ${TRITON_CONSTRAINT}"
    if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then
-        TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton.txt)
+        TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton-rocm.txt)
        TRITON_REQUIREMENT="pytorch-triton-rocm==${TRITON_VERSION}+${TRITON_SHORTHASH}; ${TRITON_CONSTRAINT}"
    fi
    if [[ -z "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" ]]; then
--- a/.circleci/scripts/binary_windows_build.sh
+++ b/.circleci/scripts/binary_windows_build.sh
@ -10,11 +10,6 @@ export SCCACHE_BUCKET=ossci-compiler-cache
 export SCCACHE_IGNORE_SERVER_IO_ERROR=1
 export VC_YEAR=2019

-if [[ "$DESIRED_CUDA" == 'xpu' ]]; then
-    export VC_YEAR=2022
-    export USE_SCCACHE=0
-fi
-
 echo "Free space on filesystem before build:"
 df -h

--- a/.circleci/scripts/binary_windows_test.sh
+++ b/.circleci/scripts/binary_windows_test.sh
@ -6,10 +6,6 @@ source "${BINARY_ENV_FILE:-/c/w/env}"
 export CUDA_VERSION="${DESIRED_CUDA/cu/}"
 export VC_YEAR=2019

-if [[ "$DESIRED_CUDA" == 'xpu' ]]; then
-    export VC_YEAR=2022
-fi
-
 pushd "$BUILDER_ROOT"

 ./windows/internal/smoke_test.bat
--- a/.clang-format
+++ b/.clang-format
@ -44,9 +44,7 @@ ContinuationIndentWidth: 4
 Cpp11BracedListStyle: true
 DerivePointerAlignment: false
 DisableFormat:   false
-ForEachMacros:
-  - FOR_EACH_RANGE
-  - FOR_EACH
+ForEachMacros:   [ FOR_EACH_RANGE, FOR_EACH, ]
 IncludeCategories:
  - Regex:           '^<.*\.h(pp)?>'
    Priority:        1
@ -60,24 +58,6 @@ IndentWrappedFunctionNames: false
 KeepEmptyLinesAtTheStartOfBlocks: false
 MacroBlockBegin: ''
 MacroBlockEnd:   ''
-Macros:
-  - >-
-    PyObject_HEAD_INIT(type)={
-        /* this is not exactly match with PyObject_HEAD_INIT in Python source code
-         * but it is enough for clang-format */
-        { 0xFFFFFFFF },
-        (type)
-    },
-  - >-
-    PyVarObject_HEAD_INIT(type, size)={
-        {
-            /* manually expand PyObject_HEAD_INIT(type) above
-             * because clang-format do not support recursive expansion */
-            { 0xFFFFFFFF },
-            (type)
-        },
-        (size)
-    },
 MaxEmptyLinesToKeep: 1
 NamespaceIndentation: None
 PenaltyBreakBeforeFirstCallParameter: 1
@ -99,11 +79,7 @@ SpacesInContainerLiterals: true
 SpacesInCStyleCastParentheses: false
 SpacesInParentheses: false
 SpacesInSquareBrackets: false
-Standard:        c++17
-StatementMacros:
-  - PyObject_HEAD
-  - PyObject_VAR_HEAD
-  - PyException_HEAD
+Standard:        Cpp11
 TabWidth:        8
 UseTab:          Never
 ---
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@ -0,0 +1,38 @@
+If you have a question or would like help and support, please ask at our
+[forums](https://discuss.pytorch.org/).
+
+If you are submitting a feature request, please preface the title with [feature request].
+If you are submitting a bug report, please fill in the following details.
+
+## Issue description
+
+Provide a short description.
+
+## Code example
+
+Please try to provide a minimal example to repro the bug.
+Error messages and stack traces are also helpful.
+
+## System Info
+Please copy and paste the output from our
+[environment collection script](https://raw.githubusercontent.com/pytorch/pytorch/main/torch/utils/collect_env.py)
+(or fill out the checklist below manually).
+
+You can get the script and run it with:
+```
+wget https://raw.githubusercontent.com/pytorch/pytorch/main/torch/utils/collect_env.py
+# For security purposes, please check the contents of collect_env.py before running it.
+python collect_env.py
+```
+
+- PyTorch or Caffe2:
+- How you installed PyTorch (conda, pip, source):
+- Build command you used (if compiling from source):
+- OS:
+- PyTorch version:
+- Python version:
+- CUDA/cuDNN version:
+- GPU models and configuration:
+- GCC version (if compiling from source):
+- CMake version:
+- Versions of any other relevant libraries:
--- a/.github/ISSUE_TEMPLATE/ci-sev.md
+++ b/.github/ISSUE_TEMPLATE/ci-sev.md
@ -5,8 +5,7 @@ about: Tracking incidents for PyTorch's CI infra.

 > NOTE: Remember to label this issue with "`ci: sev`"

- <!-- uncomment the below line if you don't want this SEV to block merges -->
- <!--  **MERGE BLOCKING** -->
+**MERGE BLOCKING** <!-- remove this line if you don't want this SEV to block merges -->

 ## Current Status
 *Status could be: preemptive, ongoing, mitigated, closed. Also tell people if they need to take action to fix it (i.e. rebase)*.
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@ -3,6 +3,8 @@ self-hosted-runner:
    # GitHub hosted x86 Linux runners
    - linux.20_04.4x
    - linux.20_04.16x
+    # Repo-specific LF hosted ARC runners
+    - linux.large.arc
    # Organization-wide AWS Linux Runners
    - linux.large
    - linux.2xlarge
@ -32,6 +34,30 @@ self-hosted-runner:
    - lf.linux.8xlarge.nvidia.gpu
    - lf.linux.16xlarge.nvidia.gpu
    - lf.linux.g5.4xlarge.nvidia.gpu
+    # Organization-wide AWS Linux Runners with new Amazon 2023 AMI
+    - amz2023.linux.large
+    - amz2023.linux.2xlarge
+    - amz2023.linux.4xlarge
+    - amz2023.linux.12xlarge
+    - amz2023.linux.24xlarge
+    - amz2023.linux.arm64.2xlarge
+    - amz2023.linux.arm64.m7g.4xlarge
+    - amz2023.linux.arm64.m7g.4xlarge.ephemeral
+    - amz2023.linux.4xlarge.nvidia.gpu
+    - amz2023.linux.8xlarge.nvidia.gpu
+    - amz2023.linux.16xlarge.nvidia.gpu
+    - amz2023.linux.g5.4xlarge.nvidia.gpu
+    # Pytorch/pytorch AWS Linux Runners with the new Amazon 2023 AMI on Linux Foundation account
+    - amz2023.lf.linux.large
+    - amz2023.lf.linux.2xlarge
+    - amz2023.lf.linux.4xlarge
+    - amz2023.lf.linux.12xlarge
+    - amz2023.lf.linux.24xlarge
+    - amz2023.lf.linux.arm64.2xlarge
+    - amz2023.lf.linux.4xlarge.nvidia.gpu
+    - amz2023.lf.linux.8xlarge.nvidia.gpu
+    - amz2023.lf.linux.16xlarge.nvidia.gpu
+    - amz2023.lf.linux.g5.4xlarge.nvidia.gpu
    # Repo-specific IBM hosted S390x runner
    - linux.s390x
    # Organization wide AWS Windows runners
--- a/.github/actions/checkout-pytorch/action.yml
+++ b/.github/actions/checkout-pytorch/action.yml
@ -18,14 +18,8 @@ inputs:
 runs:
  using: composite
  steps:
-    - name: Check if in a container runner
-      shell: bash
-      id: check_container_runner
-      run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
-
    - name: Clean workspace
      shell: bash
-      if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
      env:
        NO_SUDO: ${{ inputs.no-sudo }}
      run: |
--- a/.github/actions/linux-test/action.yml
+++ b/.github/actions/linux-test/action.yml
@ -85,25 +85,15 @@ runs:
      with:
        docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

-    - name: Check if in a container runner
+    - name: Check if in a ARC runner
      shell: bash
-      id: check_container_runner
-      run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
+      id: check_arc_runner
+      run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> "$GITHUB_OUTPUT"

    - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
      id: install-nvidia-driver
      uses: pytorch/test-infra/.github/actions/setup-nvidia@main
-      if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
-
-    - name: Setup GPU_FLAG for docker run
-      id: setup-gpu-flag
-      run: echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
-      if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
-
-    - name: Setup SCCACHE_SERVER_PORT environment for docker run when on container
-      id: setup-sscache-port-flag
-      run: echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"
-      if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
+      if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}

    - name: Lock NVIDIA A100 40GB Frequency
      shell: bash
@ -111,7 +101,7 @@ runs:
        sudo nvidia-smi -pm 1
        sudo nvidia-smi -ac 1215,1410
        nvidia-smi
-      if: ${{ contains(matrix.runner, 'a100') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
+      if: contains(matrix.runner, 'a100')

    - name: Start monitoring script
      id: monitor-script
@ -182,7 +172,6 @@ runs:
        NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
        TD_DISTRIBUTED: ${{ steps.keep-going.outputs.ci-td-distributed }}
        SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
-        SCCACHE_REGION: us-east-1
        SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
        SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
        DOCKER_IMAGE: ${{ inputs.docker-image }}
@ -192,9 +181,6 @@ runs:
        PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
        DASHBOARD_TAG: ${{ inputs.dashboard-tag }}
        HUGGING_FACE_HUB_TOKEN: ${{ inputs.HUGGING_FACE_HUB_TOKEN }}
-        SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
-        IS_A100_RUNNER: ${{ contains(matrix.runner, 'a100') && '1' || '0' }}
-
      shell: bash
      run: |
        set -x
@ -213,7 +199,6 @@ runs:
        # shellcheck disable=SC2086,SC2090
        container_name=$(docker run \
          ${GPU_FLAG:-} \
-          ${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \
          -e BUILD_ENVIRONMENT \
          -e PR_NUMBER \
          -e GITHUB_ACTIONS \
@ -242,7 +227,6 @@ runs:
          -e PR_LABELS \
          -e MAX_JOBS="$(nproc --ignore=2)" \
          -e SCCACHE_BUCKET \
-          -e SCCACHE_REGION \
          -e SCCACHE_S3_KEY_PREFIX \
          -e XLA_CUDA \
          -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
@ -250,9 +234,7 @@ runs:
          -e PYTORCH_TEST_RERUN_DISABLED_TESTS \
          -e SKIP_SCCACHE_INITIALIZATION=1 \
          -e HUGGING_FACE_HUB_TOKEN \
-          -e SCRIBE_GRAPHQL_ACCESS_TOKEN \
          -e DASHBOARD_TAG \
-          -e IS_A100_RUNNER \
          --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
          --security-opt seccomp=unconfined \
          --cap-add=SYS_PTRACE \
@ -323,7 +305,7 @@ runs:

    - name: Teardown Linux
      uses: pytorch/test-infra/.github/actions/teardown-linux@main
-      if: always() && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false'
+      if: always()

    # NB: We are currently having an intermittent GPU-related issue on G5 runners with
    # A10G GPU. Once this happens, trying to reset the GPU as done in setup-nvidia does
--- a/.github/actions/setup-linux/action.yml
+++ b/.github/actions/setup-linux/action.yml
@ -28,14 +28,14 @@ runs:
        echo "instance-type: $(get_ec2_metadata instance-type)"
        echo "system info $(uname -a)"

-    - name: Check if in a container runner
+    - name: Check if in a ARC runner
      shell: bash
-      id: check_container_runner
-      run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
+      id: check_arc_runner
+      run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)"  >> $GITHUB_OUTPUT

    - name: Start docker if docker deamon is not running
      shell: bash
-      if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
+      if: ${{ steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
      run: |
        if systemctl is-active --quiet docker; then
            echo "Docker daemon is running...";
@ -73,7 +73,7 @@ runs:
        env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"

    - name: Kill any existing containers, clean up images
-      if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
+      if: ${{ steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
      shell: bash
      run: |
        # ignore expansion of "docker ps -q" since it could be empty
@ -116,7 +116,7 @@ runs:
    - name: Check that the docker daemon is running
      shell: bash
      continue-on-error: true
-      if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
+      if: ${{ steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'true' }}
      run: |
        set +x

--- a/.github/ci_commit_pins/audio.txt
+++ b/.github/ci_commit_pins/audio.txt
@ -1 +1 @@
-3f0569939c4369bec943fc27d1c9d8dfbc828c26
+97ed7b36b7a741253d4e41e4da3c901d83294503
--- a/.github/label_to_label.yml
+++ b/.github/label_to_label.yml
@ -31,10 +31,6 @@
  - "module: flex attention"
  then:
  - "module: higher order operators"
- any:
-  - "module: aotinductor"
-  then:
-  - "oncall: export"
 - any:
  - "module: dynamo"
  - "module: pt2-dispatcher"
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@ -98,9 +98,3 @@
 "module: distributed_checkpoint":
 - torch/distributed/checkpoint/**
 - test/distributed/checkpoint/**
-
-"module: compiled autograd":
- torch/csrc/dynamo/python_compiled_autograd.cpp
- torch/csrc/dynamo/compiled_autograd.h
- torch/_dynamo/compiled_autograd.py
- torch/inductor/test_compiled_autograd.py
--- a/.github/lf-canary-scale-config.yml
+++ b/.github/lf-canary-scale-config.yml
@ -7,14 +7,10 @@
 #   runners. Runners listed here will be available as self hosted
 #   runners, configuration is directly pulled from the main branch.
 #
+# NOTE (Apr, 5, 2021): Linux runners are currently all an amazonlinux2
 #
-# NOTES:
-#  - Linux runners are by default non-ephemeral to reduce the amount of CreateInstaces calls
-#    to avoid RequestLimitExceeded issues
-#  - When updating this file, run the following command to validate the YAML and to generate
-#    corresponding versions of scale-config for the pytorch/pytorch repo and merge the
-#    pytorch/pytorch changes before merging these changes.
-#    `python .github/scripts/validate_scale_config.py --test-infra-repo-root [path_to_test-infra_root] --pytorch-repo-root [path_to_pytorch_root]``
+# NOTE (Jan 5, 2021): Linux runners are all non-ephemeral to reduce the amount of CreateInstaces calls
+#                     to avoid RequestLimitExceeded issues
 #
 # TODO: Add some documentation on how the auto-scaling works
 #
@ -33,38 +29,60 @@ runner_types:
    disk_size: 200
    instance_type: c5.12xlarge
    is_ephemeral: false
-    max_available: 2000
+    max_available: 1000
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.10xlarge.avx2:
    disk_size: 200
    instance_type: m4.10xlarge
    is_ephemeral: false
    max_available: 450
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.24xl.spr-metal:
    disk_size: 200
    instance_type: c7i.metal-24xl
    is_ephemeral: false
    max_available: 150
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.16xlarge.spr:
    disk_size: 200
    instance_type: c7i.16xlarge
    is_ephemeral: false
    max_available: 150
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.9xlarge.ephemeral:
    disk_size: 200
    instance_type: c5.9xlarge
    is_ephemeral: true
    max_available: 50
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.12xlarge.ephemeral:
@ -73,140 +91,240 @@ runner_types:
    is_ephemeral: true
    max_available: 300
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.16xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g3.16xlarge
    is_ephemeral: false
    max_available: 150
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.24xlarge:
    disk_size: 150
    instance_type: c5.24xlarge
    is_ephemeral: false
    max_available: 500
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.24xlarge.ephemeral:
    disk_size: 150
    instance_type: c5.24xlarge
    is_ephemeral: true
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.2xlarge:
    disk_size: 150
    instance_type: c5.2xlarge
    is_ephemeral: false
    max_available: 3120
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.4xlarge:
    disk_size: 150
    instance_type: c5.4xlarge
    is_ephemeral: false
    max_available: 1000
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.4xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g3.4xlarge
    is_ephemeral: false
    max_available: 1000
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.8xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g3.8xlarge
    is_ephemeral: false
    max_available: 400
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.g4dn.12xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g4dn.12xlarge
    is_ephemeral: false
    max_available: 250
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.g4dn.metal.nvidia.gpu:
    disk_size: 150
    instance_type: g4dn.metal
    is_ephemeral: false
    max_available: 300
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.g5.48xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g5.48xlarge
    is_ephemeral: false
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.g5.12xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g5.12xlarge
    is_ephemeral: false
    max_available: 150
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.g5.4xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g5.4xlarge
    is_ephemeral: false
    max_available: 2400
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.g6.4xlarge.experimental.nvidia.gpu:
    disk_size: 150
    instance_type: g6.4xlarge
    is_ephemeral: false
    max_available: 50
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.large:
    max_available: 1200
    disk_size: 15
    instance_type: c5.large
    is_ephemeral: false
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.arm64.2xlarge:
    disk_size: 256
    instance_type: t4g.2xlarge
    is_ephemeral: false
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-arm64-gp2
  lf.c.linux.arm64.m7g.4xlarge:
    disk_size: 256
    instance_type: m7g.4xlarge
    is_ephemeral: false
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-arm64-gp2
  lf.c.linux.arm64.2xlarge.ephemeral:
    disk_size: 256
    instance_type: t4g.2xlarge
    is_ephemeral: true
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-arm64-gp2
  lf.c.linux.arm64.m7g.4xlarge.ephemeral:
    disk_size: 256
    instance_type: m7g.4xlarge
    is_ephemeral: true
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-arm64-gp2
  lf.c.linux.arm64.m7g.metal:
    disk_size: 256
    instance_type: m7g.metal
    is_ephemeral: false
    max_available: 100
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-arm64-gp2
  lf.c.windows.g4dn.xlarge:
    disk_size: 256
    instance_type: g4dn.xlarge
@ -241,7 +359,7 @@ runner_types:
    disk_size: 256
    instance_type: p3.2xlarge
    is_ephemeral: false
-    max_available: 300
+    max_available: 150
    os: windows
  lf.c.windows.g5.4xlarge.nvidia.gpu:
    disk_size: 256
--- a/.github/lf-scale-config.yml
+++ b/.github/lf-scale-config.yml
@ -7,14 +7,10 @@
 #   runners. Runners listed here will be available as self hosted
 #   runners, configuration is directly pulled from the main branch.
 #
+# NOTE (Apr, 5, 2021): Linux runners are currently all an amazonlinux2
 #
-# NOTES:
-#  - Linux runners are by default non-ephemeral to reduce the amount of CreateInstaces calls
-#    to avoid RequestLimitExceeded issues
-#  - When updating this file, run the following command to validate the YAML and to generate
-#    corresponding versions of scale-config for the pytorch/pytorch repo and merge the
-#    pytorch/pytorch changes before merging these changes.
-#    `python .github/scripts/validate_scale_config.py --test-infra-repo-root [path_to_test-infra_root] --pytorch-repo-root [path_to_pytorch_root]``
+# NOTE (Jan 5, 2021): Linux runners are all non-ephemeral to reduce the amount of CreateInstaces calls
+#                     to avoid RequestLimitExceeded issues
 #
 # TODO: Add some documentation on how the auto-scaling works
 #
@ -33,38 +29,60 @@ runner_types:
    disk_size: 200
    instance_type: c5.12xlarge
    is_ephemeral: false
-    max_available: 2000
+    max_available: 1000
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.10xlarge.avx2:
    disk_size: 200
    instance_type: m4.10xlarge
    is_ephemeral: false
    max_available: 450
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.24xl.spr-metal:
    disk_size: 200
    instance_type: c7i.metal-24xl
    is_ephemeral: false
    max_available: 150
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.16xlarge.spr:
    disk_size: 200
    instance_type: c7i.16xlarge
    is_ephemeral: false
    max_available: 150
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.9xlarge.ephemeral:
    disk_size: 200
    instance_type: c5.9xlarge
    is_ephemeral: true
    max_available: 50
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.12xlarge.ephemeral:
@ -73,140 +91,240 @@ runner_types:
    is_ephemeral: true
    max_available: 300
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.16xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g3.16xlarge
    is_ephemeral: false
    max_available: 150
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.24xlarge:
    disk_size: 150
    instance_type: c5.24xlarge
    is_ephemeral: false
    max_available: 500
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.24xlarge.ephemeral:
    disk_size: 150
    instance_type: c5.24xlarge
    is_ephemeral: true
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.2xlarge:
    disk_size: 150
    instance_type: c5.2xlarge
    is_ephemeral: false
    max_available: 3120
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.4xlarge:
    disk_size: 150
    instance_type: c5.4xlarge
    is_ephemeral: false
    max_available: 1000
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.4xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g3.4xlarge
    is_ephemeral: false
    max_available: 1000
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.8xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g3.8xlarge
    is_ephemeral: false
    max_available: 400
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.g4dn.12xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g4dn.12xlarge
    is_ephemeral: false
    max_available: 250
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.g4dn.metal.nvidia.gpu:
    disk_size: 150
    instance_type: g4dn.metal
    is_ephemeral: false
    max_available: 300
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.g5.48xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g5.48xlarge
    is_ephemeral: false
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.g5.12xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g5.12xlarge
    is_ephemeral: false
    max_available: 150
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.g5.4xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g5.4xlarge
    is_ephemeral: false
    max_available: 2400
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.g6.4xlarge.experimental.nvidia.gpu:
    disk_size: 150
    instance_type: g6.4xlarge
    is_ephemeral: false
    max_available: 50
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.large:
    max_available: 1200
    disk_size: 15
    instance_type: c5.large
    is_ephemeral: false
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.arm64.2xlarge:
    disk_size: 256
    instance_type: t4g.2xlarge
    is_ephemeral: false
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-arm64-gp2
  lf.linux.arm64.m7g.4xlarge:
    disk_size: 256
    instance_type: m7g.4xlarge
    is_ephemeral: false
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-arm64-gp2
  lf.linux.arm64.2xlarge.ephemeral:
    disk_size: 256
    instance_type: t4g.2xlarge
    is_ephemeral: true
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-arm64-gp2
  lf.linux.arm64.m7g.4xlarge.ephemeral:
    disk_size: 256
    instance_type: m7g.4xlarge
    is_ephemeral: true
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-arm64-gp2
  lf.linux.arm64.m7g.metal:
    disk_size: 256
    instance_type: m7g.metal
    is_ephemeral: false
    max_available: 100
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
+    variants:
+      amz2023:
+        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
+      am2:
+        ami: amzn2-ami-hvm-2.0.20240306.2-arm64-gp2
  lf.windows.g4dn.xlarge:
    disk_size: 256
    instance_type: g4dn.xlarge
@ -241,7 +359,7 @@ runner_types:
    disk_size: 256
    instance_type: p3.2xlarge
    is_ephemeral: false
-    max_available: 300
+    max_available: 150
    os: windows
  lf.windows.g5.4xlarge.nvidia.gpu:
    disk_size: 256
--- a/.github/merge_rules.yaml
+++ b/.github/merge_rules.yaml
@ -86,18 +86,6 @@
  - pull
  - inductor

- name: OSS CI / pytorchbot / slow tests
-  patterns:
-  - test/slow_tests.json
-  approved_by:
-  - pytorchbot
-  ignore_flaky_failures: false
-  mandatory_checks_name:
-  - EasyCLA
-  - Lint
-  - pull
-  - slow
-
 - name: OSS CI /pytorchbot / Executorch
  patterns:
  - .ci/docker/ci_commit_pins/executorch.txt
@ -119,8 +107,8 @@
  mandatory_checks_name:
  - EasyCLA
  - Lint
-  - pull / linux-focal-py3_9-clang9-xla / build
-  - pull / linux-focal-py3_9-clang9-xla / test (xla, 1, 1, linux.12xlarge)
+  - pull / linux-focal-py3_8-clang9-xla / build
+  - pull / linux-focal-py3_8-clang9-xla / test (xla, 1, 1, linux.12xlarge)

 - name: Documentation
  patterns:
@ -544,7 +532,6 @@
  - anijain2305
  - bdhirsh
  - zou3519
-  - isuruf
  mandatory_checks_name:
  - EasyCLA
  - Lint
--- a/.github/pytorch-probot.yml
+++ b/.github/pytorch-probot.yml
@ -9,20 +9,17 @@ ciflow_push_tags:
 - ciflow/inductor-rocm
 - ciflow/inductor-perf-compare
 - ciflow/inductor-micro-benchmark
- ciflow/inductor-micro-benchmark-cpu-x86
 - ciflow/inductor-cu124
 - ciflow/linux-aarch64
 - ciflow/mps
 - ciflow/nightly
 - ciflow/periodic
 - ciflow/rocm
- ciflow/s390
 - ciflow/slow
 - ciflow/trunk
 - ciflow/unstable
 - ciflow/xpu
 - ciflow/torchbench
- ciflow/autoformat
 retryable_workflows:
 - pull
 - trunk
--- a/.github/requirements/pip-requirements-iOS.txt
+++ b/.github/requirements/pip-requirements-iOS.txt
@ -1,4 +1,4 @@
 # iOS simulator requirements
 coremltools==5.0b5
 protobuf==3.20.2
-optree==0.13.0
+optree==0.12.1
--- a/.github/requirements/pip-requirements-macOS.txt
+++ b/.github/requirements/pip-requirements-macOS.txt
@ -1,7 +1,6 @@
 boto3==1.19.12
 hypothesis==6.56.4
-expecttest==0.2.1
-fbscribelogger==0.1.6
+expecttest==0.1.6
 librosa>=0.6.2
 mpmath==1.3.0
 networkx==2.8.7
@ -27,8 +26,7 @@ pytest-cpp==2.3.0
 rockset==1.0.3
 z3-solver==4.12.2.0
 tensorboard==2.13.0
-optree==0.13.0
+optree==0.12.1
 # NB: test_hparams_* from test_tensorboard is failing with protobuf 5.26.0 in
 # which the stringify metadata is wrong when escaping double quote
 protobuf==3.20.2
-parameterized==0.8.1
--- a/.github/scripts/build_triton_wheel.py
+++ b/.github/scripts/build_triton_wheel.py
@ -15,7 +15,9 @@ REPO_DIR = SCRIPT_DIR.parent.parent

 def read_triton_pin(device: str = "cuda") -> str:
    triton_file = "triton.txt"
-    if device == "xpu":
+    if device == "rocm":
+        triton_file = "triton-rocm.txt"
+    elif device == "xpu":
        triton_file = "triton-xpu.txt"
    with open(REPO_DIR / ".ci" / "docker" / "ci_commit_pins" / triton_file) as f:
        return f.read().strip()
--- a/.github/scripts/check_labels.py
+++ b/.github/scripts/check_labels.py
@ -27,12 +27,6 @@ def parse_args() -> Any:

    parser = ArgumentParser("Check PR labels")
    parser.add_argument("pr_num", type=int)
-    # add a flag to return a non-zero exit code if the PR does not have the required labels
-    parser.add_argument(
-        "--exit-non-zero",
-        action="store_true",
-        help="Return a non-zero exit code if the PR does not have the required labels",
-    )

    return parser.parse_args()

@ -47,13 +41,10 @@ def main() -> None:
        if not has_required_labels(pr):
            print(LABEL_ERR_MSG)
            add_label_err_comment(pr)
-            if args.exit_non_zero:
-                sys.exit(1)
        else:
            delete_all_label_err_comments(pr)
    except Exception as e:
-        if args.exit_non_zero:
-            sys.exit(1)
+        pass

    sys.exit(0)

--- a/.github/scripts/generate_binary_build_matrix.py
+++ b/.github/scripts/generate_binary_build_matrix.py
@ -77,7 +77,6 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
        "nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | "
        "nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | "
        "nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | "
-        "nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | "
        "nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | "
        "nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | "
        "nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'"
@ -326,7 +325,6 @@ def generate_wheels_matrix(
    os: str,
    arches: Optional[List[str]] = None,
    python_versions: Optional[List[str]] = None,
-    use_split_build: bool = False,
 ) -> List[Dict[str, str]]:
    package_type = "wheel"
    if os == "linux" or os == "linux-aarch64" or os == "linux-s390x":
@ -334,7 +332,7 @@ def generate_wheels_matrix(
        package_type = "manywheel"

    if python_versions is None:
-        python_versions = FULL_PYTHON_VERSIONS + ["3.13", "3.13t"]
+        python_versions = FULL_PYTHON_VERSIONS + ["3.13"]

    if arches is None:
        # Define default compute archivectures
@ -342,7 +340,7 @@ def generate_wheels_matrix(
        if os == "linux":
            arches += CPU_CXX11_ABI_ARCH + CUDA_ARCHES + ROCM_ARCHES + XPU_ARCHES
        elif os == "windows":
-            arches += CUDA_ARCHES + XPU_ARCHES
+            arches += CUDA_ARCHES
        elif os == "linux-aarch64":
            # Only want the one arch as the CPU type is different and
            # uses different build/test scripts
@ -369,28 +367,11 @@ def generate_wheels_matrix(

            # TODO: Enable python 3.13 on rocm, aarch64, windows
            if (
-                gpu_arch_type == "rocm"
-                or os not in ["linux", "linux-s390x", "macos-arm64"]
-            ) and python_version in ["3.13", "3.13t"]:
+                gpu_arch_type == "rocm" or (os != "linux" and os != "linux-s390x")
+            ) and python_version == "3.13":
                continue

-            # TODO: Enable python 3.13t on xpu and cpu-s390x or MacOS
-            if (
-                gpu_arch_type in ["xpu", "cpu-s390x"] or os == "macos-arm64"
-            ) and python_version == "3.13t":
-                continue
-
-            if use_split_build and (
-                arch_version not in ["12.4", "12.1", "11.8", "cpu"] or os != "linux"
-            ):
-                raise RuntimeError(
-                    "Split build is only supported on linux with cuda 12.4, 12.1, 11.8, and cpu.\n"
-                    f"Currently attempting to build on arch version {arch_version} and os {os}.\n"
-                    "Please modify the matrix generation to exclude this combination."
-                )
-
            # 12.1 linux wheels require PYTORCH_EXTRA_INSTALL_REQUIREMENTS to install
-
            if (
                arch_version in ["12.4", "12.1", "11.8"]
                and os == "linux"
@ -404,14 +385,13 @@ def generate_wheels_matrix(
                        "desired_cuda": translate_desired_cuda(
                            gpu_arch_type, gpu_arch_version
                        ),
-                        "use_split_build": "True" if use_split_build else "False",
                        "devtoolset": (
                            "cxx11-abi" if arch_version == "cuda-aarch64" else ""
                        ),
                        "container_image": WHEEL_CONTAINER_IMAGES[arch_version],
                        "package_type": package_type,
                        "pytorch_extra_install_requirements": (
-                            PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version]
+                            PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version]  # fmt: skip
                            if os != "linux-aarch64"
                            else ""
                        ),
@ -420,8 +400,7 @@ def generate_wheels_matrix(
                        ),
                    }
                )
-                # Special build building to use on Colab. Python 3.11 for 12.1 CUDA
-                if python_version == "3.11" and arch_version == "12.1":
+                if arch_version != "cuda-aarch64":
                    ret.append(
                        {
                            "python_version": python_version,
@ -430,16 +409,40 @@ def generate_wheels_matrix(
                            "desired_cuda": translate_desired_cuda(
                                gpu_arch_type, gpu_arch_version
                            ),
-                            "use_split_build": "True" if use_split_build else "False",
+                            "use_split_build": "True",
                            "devtoolset": "",
                            "container_image": WHEEL_CONTAINER_IMAGES[arch_version],
                            "package_type": package_type,
-                            "pytorch_extra_install_requirements": "",
-                            "build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}-full".replace(  # noqa: B950
+                            "pytorch_extra_install_requirements": (
+                                PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version]  # fmt: skip
+                                if os != "linux-aarch64"
+                                else ""
+                            ),
+                            "build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}-split".replace(  # noqa: B950
                                ".", "_"
                            ),
                        }
                    )
+                    # Special build building to use on Colab. PyThon 3.10 for 12.1 CUDA
+                    if python_version == "3.10" and arch_version == "12.1":
+                        ret.append(
+                            {
+                                "python_version": python_version,
+                                "gpu_arch_type": gpu_arch_type,
+                                "gpu_arch_version": gpu_arch_version,
+                                "desired_cuda": translate_desired_cuda(
+                                    gpu_arch_type, gpu_arch_version
+                                ),
+                                "use_split_build": "False",
+                                "devtoolset": "",
+                                "container_image": WHEEL_CONTAINER_IMAGES[arch_version],
+                                "package_type": package_type,
+                                "pytorch_extra_install_requirements": "",
+                                "build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}-full".replace(  # noqa: B950
+                                    ".", "_"
+                                ),
+                            }
+                        )
            else:
                ret.append(
                    {
@ -449,7 +452,6 @@ def generate_wheels_matrix(
                        "desired_cuda": translate_desired_cuda(
                            gpu_arch_type, gpu_arch_version
                        ),
-                        "use_split_build": "True" if use_split_build else "False",
                        "devtoolset": (
                            "cxx11-abi" if arch_version == "cpu-cxx11-abi" else ""
                        ),
@ -459,13 +461,12 @@ def generate_wheels_matrix(
                            ".", "_"
                        ),
                        "pytorch_extra_install_requirements": (
-                            PYTORCH_EXTRA_INSTALL_REQUIREMENTS["12.1"]
-                            if os != "linux" and gpu_arch_type != "xpu"
+                            PYTORCH_EXTRA_INSTALL_REQUIREMENTS["12.1"]  # fmt: skip
+                            if os != "linux"
                            else ""
                        ),
                    }
                )
-
    return ret


--- a/.github/scripts/generate_ci_workflows.py
+++ b/.github/scripts/generate_ci_workflows.py
@ -61,7 +61,6 @@ class BinaryBuildWorkflow:
    # Mainly for macos
    cross_compile_arm64: bool = False
    macos_runner: str = "macos-14-xlarge"
-    use_split_build: bool = False

    def __post_init__(self) -> None:
        if self.abi_version:
@ -70,9 +69,6 @@ class BinaryBuildWorkflow:
            )
        else:
            self.build_environment = f"{self.os}-binary-{self.package_type}"
-        if self.use_split_build:
-            # added to distinguish concurrency groups
-            self.build_environment += "-split"

    def generate_workflow_file(self, workflow_template: jinja2.Template) -> None:
        output_file_path = (
@ -114,20 +110,6 @@ LINUX_BINARY_BUILD_WORFKLOWS = [
            isolated_workflow=True,
        ),
    ),
-    BinaryBuildWorkflow(
-        os=OperatingSystem.LINUX,
-        package_type="manywheel",
-        build_configs=generate_binary_build_matrix.generate_wheels_matrix(
-            OperatingSystem.LINUX,
-            use_split_build=True,
-            arches=["11.8", "12.1", "12.4", "cpu"],
-        ),
-        ciflow_config=CIFlowConfig(
-            labels={LABEL_CIFLOW_BINARIES, LABEL_CIFLOW_BINARIES_WHEEL},
-            isolated_workflow=True,
-        ),
-        use_split_build=True,
-    ),
    BinaryBuildWorkflow(
        os=OperatingSystem.LINUX,
        package_type="conda",
@ -180,21 +162,6 @@ LINUX_BINARY_SMOKE_WORKFLOWS = [
        ),
        branches="main",
    ),
-    BinaryBuildWorkflow(
-        os=OperatingSystem.LINUX,
-        package_type="manywheel",
-        build_configs=generate_binary_build_matrix.generate_wheels_matrix(
-            OperatingSystem.LINUX,
-            arches=["11.8", "12.1", "12.4"],
-            python_versions=["3.9"],
-            use_split_build=True,
-        ),
-        ciflow_config=CIFlowConfig(
-            labels={LABEL_CIFLOW_PERIODIC},
-        ),
-        branches="main",
-        use_split_build=True,
-    ),
    BinaryBuildWorkflow(
        os=OperatingSystem.LINUX,
        package_type="libtorch",
--- a/.github/scripts/github_utils.py
+++ b/.github/scripts/github_utils.py
@ -46,24 +46,16 @@ def gh_fetch_url_and_headers(
        with urlopen(Request(url, headers=headers, data=data_, method=method)) as conn:
            return conn.headers, reader(conn)
    except HTTPError as err:
-        if (
-            err.code == 403
-            and all(
-                key in err.headers
-                for key in ["X-RateLimit-Limit", "X-RateLimit-Remaining"]
-            )
-            and int(err.headers["X-RateLimit-Remaining"]) == 0
+        if err.code == 403 and all(
+            key in err.headers for key in ["X-RateLimit-Limit", "X-RateLimit-Used"]
        ):
            print(
-                f"""{url}
-                Rate limit exceeded:
+                f"""Rate limit exceeded:
                Used: {err.headers['X-RateLimit-Used']}
                Limit: {err.headers['X-RateLimit-Limit']}
                Remaining: {err.headers['X-RateLimit-Remaining']}
                Resets at: {err.headers['x-RateLimit-Reset']}"""
            )
-        else:
-            print(f"Error fetching {url} {err}")
        raise


@ -168,14 +160,6 @@ def gh_post_commit_comment(
    )


-def gh_close_pr(org: str, repo: str, pr_num: int, dry_run: bool = False) -> None:
-    url = f"{GITHUB_API_URL}/repos/{org}/{repo}/pulls/{pr_num}"
-    if dry_run:
-        print(f"Dry run closing PR {pr_num}")
-    else:
-        gh_fetch_url(url, method="PATCH", data={"state": "closed"})
-
-
 def gh_delete_comment(org: str, repo: str, comment_id: int) -> None:
    url = f"{GITHUB_API_URL}/repos/{org}/{repo}/issues/comments/{comment_id}"
    gh_fetch_url(url, method="DELETE")
--- a/.github/scripts/lintrunner.sh
+++ b/.github/scripts/lintrunner.sh
@ -17,11 +17,6 @@ if [[ -d "${CACHE_DIRECTORY}" ]]; then
    cp -r "${CACHE_DIRECTORY}" . || true
 fi

-# if lintrunner is not installed, install it
-if ! command -v lintrunner &> /dev/null; then
-    python3 -m pip install lintrunner==0.12.5
-fi
-
 # This has already been cached in the docker image
 lintrunner init 2> /dev/null

@ -38,11 +33,10 @@ python3 torch/utils/data/datapipes/gen_pyi.py

 RC=0
 # Run lintrunner on all files
-if ! lintrunner --force-color --tee-json=lint.json ${ADDITIONAL_LINTRUNNER_ARGS} 2> /dev/null; then
+if ! lintrunner --force-color --all-files --tee-json=lint.json ${ADDITIONAL_LINTRUNNER_ARGS} 2> /dev/null; then
    echo ""
    echo -e "\e[1m\e[36mYou can reproduce these results locally by using \`lintrunner -m origin/main\`. (If you don't get the same results, run \'lintrunner init\' to update your local linter)\e[0m"
-    echo -e "\e[1m\e[36mSee https://github.com/pytorch/pytorch/wiki/lintrunner for setup instructions. To apply suggested patches automatically, use the -a flag. Before pushing another commit,\e[0m"
-    echo -e "\e[1m\e[36mplease verify locally and ensure everything passes.\e[0m"
+    echo -e "\e[1m\e[36mSee https://github.com/pytorch/pytorch/wiki/lintrunner for setup instructions.\e[0m"
    RC=1
 fi

--- a/.github/scripts/runner_determinator.py
+++ b/.github/scripts/runner_determinator.py
@ -1,107 +1,51 @@
 # flake8: noqa: G004

-# Note: Copies of this script in runner_determinator.py and _runner-determinator.yml
-#       must be kept in sync. You can do it easily by running the following command:
-#           python .github/scripts/update_runner_determinator.py
-
 """
 This runner determinator is used to determine which set of runners to run a
 GitHub job on. It uses the first comment of a GitHub issue (by default
-https://github.com/pytorch/test-infra/issues/5132) to define the configuration
-of which runners should be used to run which job.
-
-The configuration has two parts, the settings and a list of opted-in users,
-separated by a line containing "---".  If the line is not present, the
-settings are considered to be empty with only the second part, the user
-list, defined.
-
-The first part is a YAML block that defines the rollout settings. This can be
-used to define any settings that are needed to determine which runners to use.
-It's fields are defined by the RolloutSettings class below.
-
-The second part is a list of users who are explicitly opted in to the LF fleet.
-The user list is also a comma separated list of additional features or
-experiments which the user could be opted in to.
+https://github.com/pytorch/test-infra/issues/5132) as a user list to determine
+which users will get their jobs to run on experimental runners. This user list
+is also a comma separated list of additional features or experiments which the
+user could be opted in to.

 The user list has the following rules:

- Users are GitHub usernames, which must start with the @ prefix
+- Users are GitHub usernames with the @ prefix
+- If the first line is a "*" then all users will use the new runners
+- If the first line is a "!" then all users will use the old runners
 - Each user is also a comma-separated list of features/experiments to enable
- A "#" prefix opts the user out of all experiments
+- A "#" prefix indicates the user is opted out of the new runners but is opting
+  into features/experiments.

-Example config:
-    # A list of experiments that can be opted into.
-    # This defines the behavior they'll induce when opted into.
-    # Expected syntax is:
-    #   [experiment_name]: # Name of the experiment. Also used for the label prefix.
-    #      rollout_perc: [int] # % of workflows to run with this experiment when users are not opted in.
+Example user list:

-    experiments:
-      lf:
-        rollout_percent: 25
-        all_branches: false
-        default: true
-    ---
-
-    # Opt-ins:
-    # Users can opt into the LF fleet by adding their GitHub username to this list
-    # and specifying experiments to enable in a comma-separated list.
-    # Experiments should be from the above list.
-
-    @User1,lf,split_build
-    @User2,lf
-    @User3,split_build
+    @User1
+    @User2,amz2023
+    #@UserOptOutOfNewRunner,amz2023
 """

 import logging
 import os
-import random
 from argparse import ArgumentParser
 from logging import LogRecord
-from typing import Any, Dict, FrozenSet, Iterable, List, NamedTuple, Tuple
+from typing import Any, Iterable

-import yaml
 from github import Auth, Github
 from github.Issue import Issue


-DEFAULT_LABEL_PREFIX = ""  # use meta runners
+WORKFLOW_LABEL_META = ""  # use meta runners
 WORKFLOW_LABEL_LF = "lf."  # use runners from the linux foundation
 WORKFLOW_LABEL_LF_CANARY = "lf.c."  # use canary runners from the linux foundation

+RUNNER_AMI_LEGACY = ""
+RUNNER_AMI_AMZ2023 = "amz2023"
+
 GITHUB_OUTPUT = os.getenv("GITHUB_OUTPUT", "")
 GH_OUTPUT_KEY_AMI = "runner-ami"
 GH_OUTPUT_KEY_LABEL_TYPE = "label-type"


-SETTING_EXPERIMENTS = "experiments"
-
-LF_FLEET_EXPERIMENT = "lf"
-CANARY_FLEET_SUFFIX = ".c"
-
-
-class Experiment(NamedTuple):
-    rollout_perc: float = (
-        0  # Percentage of workflows to experiment on when user is not opted-in.
-    )
-    all_branches: bool = (
-        False  # If True, the experiment is also enabled on the exception branches
-    )
-    default: bool = (
-        True  # If True, the experiment is enabled by default for all queries
-    )
-
-    # Add more fields as needed
-
-
-class Settings(NamedTuple):
-    """
-    Settings for the experiments that can be opted into.
-    """
-
-    experiments: Dict[str, Experiment] = {}
-
-
 class ColorFormatter(logging.Formatter):
    """Color codes the log messages based on the log level"""

@ -144,12 +88,6 @@ def set_github_output(key: str, value: str) -> None:
        f.write(f"{key}={value}\n")


-def _str_comma_separated_to_set(value: str) -> FrozenSet[str]:
-    return frozenset(
-        filter(lambda itm: itm != "", map(str.strip, value.strip(" \n\t").split(",")))
-    )
-
-
 def parse_args() -> Any:
    parser = ArgumentParser("Get dynamic rollout settings")
    parser.add_argument("--github-token", type=str, required=True, help="GitHub token")
@ -184,13 +122,6 @@ def parse_args() -> Any:
        required=True,
        help="Current GitHub ref type, branch or tag",
    )
-    parser.add_argument(
-        "--eligible-experiments",
-        type=_str_comma_separated_to_set,
-        required=False,
-        default="",
-        help="comma separated list of experiments to check, if omitted all experiments marked with default=True are checked",
-    )

    return parser.parse_args()

@ -206,14 +137,11 @@ def get_issue(gh: Github, repo: str, issue_num: int) -> Issue:


 def get_potential_pr_author(
-    github_token: str, repo: str, username: str, ref_type: str, ref_name: str
+    gh: Github, repo: str, username: str, ref_type: str, ref_name: str
 ) -> str:
    # If the trigger was a new tag added by a bot, this is a ciflow case
    # Fetch the actual username from the original PR. The PR number is
    # embedded in the tag name: ciflow/<name>/<pr-number>
-
-    gh = get_gh_client(github_token)
-
    if username == "pytorch-bot[bot]" and ref_type == "tag":
        split_tag = ref_name.split("/")
        if (
@ -235,256 +163,126 @@ def get_potential_pr_author(


 def is_exception_branch(branch: str) -> bool:
-    """
-    Branches that get opted out of experiments by default, until they're explicitly enabled.
-    """
    return branch.split("/")[0] in {"main", "nightly", "release", "landchecks"}


-def load_yaml(yaml_text: str) -> Any:
+def get_workflow_type(issue: Issue, workflow_requestors: Iterable[str]) -> str:
    try:
-        data = yaml.safe_load(yaml_text)
-        return data
-    except yaml.YAMLError as exc:
-        log.exception("Error loading YAML")
-        raise
+        first_comment = issue.get_comments()[0].body.strip("\n\t ")

-
-def extract_settings_user_opt_in_from_text(rollout_state: str) -> Tuple[str, str]:
-    """
-    Extracts the text with settings, if any, and the opted in users from the rollout state.
-
-    If the issue body contains "---" then the text above that is the settings
-    and the text below is the list of opted in users.
-
-    If it doesn't contain "---" then the settings are empty and the rest is the users.
-    """
-    rollout_state_parts = rollout_state.split("---")
-    if len(rollout_state_parts) >= 2:
-        return rollout_state_parts[0], rollout_state_parts[1]
-    else:
-        return "", rollout_state
-
-
-class UserOptins(Dict[str, List[str]]):
-    """
-    Dictionary of users with a list of features they have opted into
-    """
-
-
-def parse_user_opt_in_from_text(user_optin_text: str) -> UserOptins:
-    """
-    Parse the user opt-in text into a key value pair of username and the list of features they have opted into
-
-    Users are GitHub usernames with the @ prefix. Each user is also a comma-separated list of features/experiments to enable.
-        - Example line: "@User1,lf,split_build"
-        - A "#" prefix indicates the user is opted out of all experiments
-
-
-    """
-    optins = UserOptins()
-    for user in user_optin_text.split("\n"):
-        user = user.strip("\r\n\t -")
-        if not user or not user.startswith("@"):
-            # Not a valid user. Skip
-            continue
-
-        if user:
-            usr_name = user.split(",")[0].strip("@")
-            optins[usr_name] = [exp.strip(" ") for exp in user.split(",")[1:]]
-
-    return optins
-
-
-def parse_settings_from_text(settings_text: str) -> Settings:
-    """
-    Parse the experiments from the issue body into a list of ExperimentSettings
-    """
-    try:
-        if settings_text:
-            # Escape the backtick as well so that we can have the settings in a code block on the GH issue
-            # for easy reading
-            # Note: Using ascii for the backtick so that the cat step in _runner-determinator.yml doesn't choke on
-            #       the backtick character in shell commands.
-            backtick = chr(96)  # backtick character
-            settings_text = settings_text.strip(f"\r\n\t{backtick} ")
-            settings = load_yaml(settings_text)
-
-            # For now we just load experiments. We can expand this if/when we add more settings
-            experiments = {}
-
-            for exp_name, exp_settings in settings.get(SETTING_EXPERIMENTS).items():
-                valid_settings = {}
-                for setting in exp_settings:
-                    if setting not in Experiment._fields:
-                        log.warning(
-                            f"Unexpected setting in experiment: {setting} = {exp_settings[setting]}"
-                        )
-                    else:
-                        valid_settings[setting] = exp_settings[setting]
-
-                experiments[exp_name] = Experiment(**valid_settings)
-            return Settings(experiments)
-
-    except Exception:
-        log.exception("Failed to parse settings")
-
-    return Settings()
-
-
-def parse_settings(rollout_state: str) -> Settings:
-    """
-    Parse settings, if any, from the rollout state.
-
-    If the issue body contains "---" then the text above that is the settings
-    and the text below is the list of opted in users.
-
-    If it doesn't contain "---" then the settings are empty and the default values are used.
-    """
-    settings_text, _ = extract_settings_user_opt_in_from_text(rollout_state)
-    return parse_settings_from_text(settings_text)
-
-
-def parse_users(rollout_state: str) -> UserOptins:
-    """
-    Parse users from the rollout state.
-
-    """
-    _, users_text = extract_settings_user_opt_in_from_text(rollout_state)
-    return parse_user_opt_in_from_text(users_text)
-
-
-def is_user_opted_in(user: str, user_optins: UserOptins, experiment_name: str) -> bool:
-    """
-    Check if a user is opted into an experiment
-    """
-    return experiment_name in user_optins.get(user, [])
-
-
-def get_runner_prefix(
-    rollout_state: str,
-    workflow_requestors: Iterable[str],
-    branch: str,
-    eligible_experiments: FrozenSet[str] = frozenset(),
-    is_canary: bool = False,
-) -> str:
-    settings = parse_settings(rollout_state)
-    user_optins = parse_users(rollout_state)
-
-    fleet_prefix = ""
-    prefixes = []
-    for experiment_name, experiment_settings in settings.experiments.items():
-        if not experiment_settings.all_branches and is_exception_branch(branch):
-            log.info(
-                f"Branch {branch} is an exception branch. Not enabling experiment {experiment_name}."
-            )
-            continue
-
-        if eligible_experiments:
-            if experiment_name not in eligible_experiments:
-                exp_list = ", ".join(eligible_experiments)
+        if first_comment[0] == "!":
+            log.info("LF Workflows are disabled for everyone. Using meta runners.")
+            return WORKFLOW_LABEL_META
+        elif first_comment[0] == "*":
+            log.info("LF Workflows are enabled for everyone. Using LF runners.")
+            return WORKFLOW_LABEL_LF
+        else:
+            all_opted_in_users = {
+                usr_raw.strip("\n\t@ ").split(",")[0]
+                for usr_raw in first_comment.split()
+            }
+            opted_in_requestors = {
+                usr for usr in workflow_requestors if usr in all_opted_in_users
+            }
+            if opted_in_requestors:
                log.info(
-                    f"Skipping experiment '{experiment_name}', as it is not in the eligible_experiments list: {exp_list}"
+                    f"LF Workflows are enabled for {', '.join(opted_in_requestors)}. Using LF runners."
                )
-                continue
-        elif not experiment_settings.default:
-            log.info(
-                f"Skipping experiment '{experiment_name}', as it is not a default experiment"
-            )
-            continue
-
-        # Is any workflow_requestor opted in to this experiment?
-        opted_in_users = [
-            requestor
-            for requestor in workflow_requestors
-            if is_user_opted_in(requestor, user_optins, experiment_name)
-        ]
-
-        enabled = False
-        if opted_in_users:
-            log.info(
-                f"{', '.join(opted_in_users)} have opted into experiment {experiment_name}."
-            )
-            enabled = True
-
-        elif experiment_settings.rollout_perc:
-            # If no user is opted in, then we randomly enable the experiment based on the rollout percentage
-            if random.uniform(0, 100) <= experiment_settings.rollout_perc:
-                log.info(
-                    f"Based on rollout percentage of {experiment_settings.rollout_perc}%, enabling experiment {experiment_name}."
-                )
-                enabled = True
-
-        if enabled:
-            label = experiment_name
-            if experiment_name == LF_FLEET_EXPERIMENT:
-                # We give some special treatment to the "lf" experiment since determines the fleet we use
-                #  - If it's enabled, then we always list it's prefix first
-                #  - If we're in the canary branch, then we append ".c" to the lf prefix
-                if is_canary:
-                    label += CANARY_FLEET_SUFFIX
-                fleet_prefix = label
+                return WORKFLOW_LABEL_LF
            else:
-                prefixes.append(label)
+                log.info(
+                    f"LF Workflows are disabled for {', '.join(workflow_requestors)}. Using meta runners."
+                )
+                return WORKFLOW_LABEL_META

-    if len(prefixes) > 1:
+    except Exception as e:
        log.error(
-            f"Only a fleet and one other experiment can be enabled for a job at any time. Enabling {prefixes[0]} and ignoring the rest, which are {', '.join(prefixes[1:])}"
+            f"Failed to get determine workflow type. Falling back to meta runners. Exception: {e}"
        )
-        prefixes = prefixes[:1]
-
-    # Fleet always comes first
-    if fleet_prefix:
-        prefixes.insert(0, fleet_prefix)
-
-    return ".".join(prefixes) + "." if prefixes else ""
+        return WORKFLOW_LABEL_META


-def get_rollout_state_from_issue(github_token: str, repo: str, issue_num: int) -> str:
-    """
-    Gets the first comment of the issue, which contains the desired rollout state.
+def get_optin_feature(
+    issue: Issue, workflow_requestors: Iterable[str], feature: str, fallback: str
+) -> str:
+    try:
+        first_comment = issue.get_comments()[0].body.strip("\n\t ")
+        userlist = {u.lstrip("#").strip("\n\t@ ") for u in first_comment.split()}
+        all_opted_in_users = set()
+        for user in userlist:
+            for i in user.split(","):
+                if i == feature:
+                    all_opted_in_users.add(user.split(",")[0])
+        opted_in_requestors = {
+            usr for usr in workflow_requestors if usr in all_opted_in_users
+        }

-    The default issue we use - https://github.com/pytorch/test-infra/issues/5132
-    """
-    gh = get_gh_client(github_token)
-    issue = get_issue(gh, repo, issue_num)
-    return str(issue.get_comments()[0].body.strip("\n\t "))
+        if opted_in_requestors:
+            log.info(
+                f"Feature {feature} is enabled for {', '.join(opted_in_requestors)}. Using feature {feature}."
+            )
+            return feature
+        else:
+            log.info(
+                f"Feature {feature} is disabled for {', '.join(workflow_requestors)}. Using fallback \"{fallback}\"."
+            )
+            return fallback
+
+    except Exception as e:
+        log.error(
+            f'Failed to determine if user has opted-in to feature {feature}. Using fallback "{fallback}". Exception: {e}'
+        )
+        return fallback


 def main() -> None:
    args = parse_args()

-    runner_label_prefix = DEFAULT_LABEL_PREFIX
+    if args.github_ref_type == "branch" and is_exception_branch(args.github_branch):
+        log.info(f"Exception branch: '{args.github_branch}', using meta runners")
+        label_type = WORKFLOW_LABEL_META
+        runner_ami = RUNNER_AMI_LEGACY
+    else:
+        try:
+            gh = get_gh_client(args.github_token)
+            # The default issue we use - https://github.com/pytorch/test-infra/issues/5132
+            issue = get_issue(gh, args.github_issue_repo, args.github_issue)
+            username = get_potential_pr_author(
+                gh,
+                args.github_repo,
+                args.github_actor,
+                args.github_ref_type,
+                args.github_branch,
+            )
+            label_type = get_workflow_type(
+                issue,
+                (
+                    args.github_issue_owner,
+                    username,
+                ),
+            )
+            runner_ami = get_optin_feature(
+                issue=issue,
+                workflow_requestors=(
+                    args.github_issue_owner,
+                    username,
+                ),
+                feature=RUNNER_AMI_AMZ2023,
+                fallback=RUNNER_AMI_LEGACY,
+            )
+        except Exception as e:
+            log.error(
+                f"Failed to get issue. Falling back to meta runners. Exception: {e}"
+            )
+            label_type = WORKFLOW_LABEL_META
+            runner_ami = RUNNER_AMI_LEGACY

-    try:
-        rollout_state = get_rollout_state_from_issue(
-            args.github_token, args.github_issue_repo, args.github_issue
-        )
+    # For Canary builds use canary runners
+    if args.github_repo == "pytorch/pytorch-canary" and label_type == WORKFLOW_LABEL_LF:
+        label_type = WORKFLOW_LABEL_LF_CANARY

-        username = get_potential_pr_author(
-            args.github_token,
-            args.github_repo,
-            args.github_actor,
-            args.github_ref_type,
-            args.github_branch,
-        )
-
-        is_canary = args.github_repo == "pytorch/pytorch-canary"
-
-        runner_label_prefix = get_runner_prefix(
-            rollout_state,
-            (args.github_issue_owner, username),
-            args.github_branch,
-            args.eligible_experiments,
-            is_canary,
-        )
-
-    except Exception as e:
-        log.error(
-            f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
-        )
-
-    set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, runner_label_prefix)
+    set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, label_type)
+    set_github_output(GH_OUTPUT_KEY_AMI, runner_ami)


 if __name__ == "__main__":
--- a/.github/scripts/s390x-ci/README.md
+++ b/.github/scripts/s390x-ci/README.md
@ -3,7 +3,7 @@
 ## Install prerequisites.

 ```
-$ sudo dnf install podman podman-docker jq
+$ sudo dnf install docker
 ```

 ## Add services.
@ -27,48 +27,23 @@ $ sudo systemctl enable --now qemu-user-static

 ## Rebuild the image

-First build s390x builder image `docker.io/pytorch/manylinuxs390x-builder`,
-using following commands:
-
-```
-$ cd ~
-$ git clone https://github.com/pytorch/pytorch
-$ cd pytorch
-$ git submodule update --init --recursive
-$ GPU_ARCH_TYPE=cpu-s390x "$(pwd)/.ci/docker/manywheel/build.sh" manylinuxs390x-builder
-$ docker image tag localhost/pytorch/manylinuxs390x-builder docker.io/pytorch/manylinuxs390x-builder:cpu-s390x
-$ docker image save -o ~/manywheel-s390x.tar docker.io/pytorch/manylinuxs390x-builder:cpu-s390x
-```
-
-Next step is to build `actions-runner` image using:
+In order to build or update the `iiilinuxibmcom/actions-runner` image, e.g. to get the
+latest OS security fixes, use the following commands:

 ```
 $ cd self-hosted-builder
 $ sudo docker build \
+      --build-arg repo=<owner>/<name> \
+      --build-arg token=<***> \
      --pull \
      -f actions-runner.Dockerfile \
-      -t iiilinuxibmcom/actions-runner.<name> \
+      -t iiilinuxibmcom/actions-runner \
      .
 ```

-If there are failures, ensure that selinux doesn't prevent it from working.
+If it fails, ensure that selinux doesn't prevent it from working.
 In worst case, selinux can be disabled with `setenforce 0`.

-Now prepare all necessary files for runner registration:
-
-```
-$ sudo mkdir -p /etc/actions-runner/<name>
-$ sudo chmod 700 /etc/actions-runner/<name>
-$ sudo /bin/cp <github_app_private_key_file> /etc/actions-runner/<name>/key_private.pem
-$ sudo echo <github_app_id> | sudo tee /etc/actions-runner/<name>/appid.env
-$ sudo echo <github_app_install_id> | sudo tee /etc/actions-runner/<name>/installid.env
-$ sudo echo NAME=<worker_name> | sudo tee    /etc/actions-runner/<name>/env
-$ sudo echo ORG=<github_org>   | sudo tee -a /etc/actions-runner/<name>/env
-$ cd self-hosted-builder
-$ sudo /bin/cp helpers/*.sh /usr/local/bin/
-$ sudo chmod 755 /usr/local/bin/app_token.sh /usr/local/bin/gh_token_generator.sh
-```
-
 ## Autostart the runner.

 ```
--- a/.github/scripts/s390x-ci/self-hosted-builder/actions-runner.Dockerfile
+++ b/.github/scripts/s390x-ci/self-hosted-builder/actions-runner.Dockerfile
@ -1,12 +1,12 @@
 # Self-Hosted IBM Z Github Actions Runner.

 # Temporary image: amd64 dependencies.
-FROM docker.io/amd64/ubuntu:23.10 as ld-prefix
+FROM docker.io/amd64/ubuntu:22.04 as ld-prefix
 ENV DEBIAN_FRONTEND=noninteractive
-RUN apt-get update && apt-get -y install ca-certificates libicu72 libssl3
+RUN apt-get update && apt-get -y install ca-certificates libicu70 libssl3

 # Main image.
-FROM docker.io/s390x/ubuntu:23.10
+FROM docker.io/s390x/ubuntu:22.04

 # Packages for pytorch building and testing.
 ENV DEBIAN_FRONTEND=noninteractive
@ -16,7 +16,6 @@ RUN apt-get update && apt-get -y install \
        gcc \
        git \
        jq \
-        zip \
        libxml2-dev \
        libxslt-dev \
        ninja-build \
@ -44,28 +43,24 @@ COPY fs/ /

 RUN chmod +x /usr/bin/actions-runner /usr/bin/entrypoint

-# install podman
-RUN apt -y install podman podman-docker
-
 # amd64 Github Actions Runner.
 RUN useradd -m actions-runner
 USER actions-runner
 WORKDIR /home/actions-runner
+RUN curl -L https://github.com/actions/runner/releases/download/v2.309.0/actions-runner-linux-x64-2.309.0.tar.gz | tar -xz

-# set up python virtual environment which is later used by runner.
-# build workflows use "python -m pip install ...",
-# and it doesn't work for non-root user
-RUN virtualenv --system-site-packages venv
+# repository
+ARG repo

-# copy prebuilt manywheel docker image for builds and tests
-# build command is:
-# GPU_ARCH_TYPE=cpu-s390x "$(pwd)/manywheel/build_docker.sh"
-# and save command is:
-# docker image save -o manywheel-s390x.tar pytorch/manylinuxs390x-builder:cpu-s390x
-#
-COPY --chown=actions-runner:actions-runner manywheel-s390x.tar /home/actions-runner/manywheel-s390x.tar
+# repository token
+ARG token

-RUN curl -L https://github.com/actions/runner/releases/download/v2.317.0/actions-runner-linux-x64-2.317.0.tar.gz | tar -xz
+RUN ./config.sh \
+        --unattended \
+        --url "https://github.com/${repo}" \
+        --token "${token}" \
+        --no-default-labels \
+        --labels self-hosted,linux.s390x

 ENTRYPOINT ["/usr/bin/entrypoint"]
 CMD ["/usr/bin/actions-runner"]
--- a/.github/scripts/s390x-ci/self-hosted-builder/actions-runner@.service
+++ b/.github/scripts/s390x-ci/self-hosted-builder/actions-runner@.service
@ -8,16 +8,12 @@ StartLimitIntervalSec=0
 Type=simple
 Restart=always
 ExecStartPre=-/usr/bin/docker rm --force actions-runner.%i
-ExecStartPre=-/usr/local/bin/gh_token_generator.sh /etc/actions-runner/%i/appid.env /etc/actions-runner/%i/installid.env /etc/actions-runner/%i/key_private.pem /etc/actions-runner/%i/ghtoken.env
 ExecStart=/usr/bin/docker run \
-              --env-file=/etc/actions-runner/%i/env \
-              --env-file=/etc/actions-runner/%i/ghtoken.env \
              --init \
              --interactive \
              --name=actions-runner.%i \
              --rm \
-              --privileged \
-              iiilinuxibmcom/actions-runner.%i
+              iiilinuxibmcom/actions-runner
 ExecStop=/bin/sh -c "docker exec actions-runner.%i kill -INT -- -1"
 ExecStop=/bin/sh -c "docker wait actions-runner.%i"
 ExecStop=/bin/sh -c "docker rm actions-runner.%i"
--- a/.github/scripts/s390x-ci/self-hosted-builder/fs/usr/bin/actions-runner
+++ b/.github/scripts/s390x-ci/self-hosted-builder/fs/usr/bin/actions-runner
@ -2,45 +2,5 @@

 set -e -u

-# first import docker image
-if [ -f ./manywheel-s390x.tar ] ; then
-        docker image load --input manywheel-s390x.tar
-        docker image tag docker.io/pytorch/manylinuxs390x-builder:cpu-s390x docker.io/pytorch/manylinuxs390x-builder:cpu-s390x-main
-        rm -f manywheel-s390x.tar
-fi
-
-token_file=registration-token.json
-
-# Generate registration token
-curl \
-        -X POST \
-        -H "Accept: application/vnd.github.v3+json" \
-        -H "Authorization: Bearer ${ACCESS_TOKEN}" \
-        "https://api.github.com/orgs/${ORG}/actions/runners/registration-token" \
-        -o "$token_file"
-
-unset ACCESS_TOKEN
-
-# register runner as ephemeral runner
-# it does one job, stops and unregisters
-registration_token=$(jq --raw-output .token "$token_file")
-
-./config.sh \
-        --unattended \
-        --ephemeral \
-        --url "https://github.com/${ORG}" \
-        --token "${registration_token}" \
-        --name "${NAME}" \
-        --no-default-labels \
-        --labels self-hosted,linux.s390x
-
-unset registration_token
-rm -f "$token_file"
-
-# enter into python virtual environment.
-# build workflows use "python -m pip install ...",
-# and it doesn't work for non-root user
-source venv/bin/activate
-
 # Run one job.
-./run.sh
+./run.sh --once
--- a/.github/scripts/s390x-ci/self-hosted-builder/helpers/app_token.sh
+++ b/.github/scripts/s390x-ci/self-hosted-builder/helpers/app_token.sh
@ -1,84 +0,0 @@
-#!/usr/bin/env bash
-#
-# Request an ACCESS_TOKEN to be used by a GitHub APP
-# Environment variable that need to be set up:
-# * APP_ID, the GitHub's app ID
-# * INSTALL_ID, the Github's app's installation ID
-# * APP_PRIVATE_KEY, the content of GitHub app's private key in PEM format.
-#
-# https://github.com/orgs/community/discussions/24743#discussioncomment-3245300
-#
-
-set -o pipefail
-
-_GITHUB_HOST=${GITHUB_HOST:="github.com"}
-
-# If URL is not github.com then use the enterprise api endpoint
-if [[ ${GITHUB_HOST} = "github.com" ]]; then
-  URI="https://api.${_GITHUB_HOST}"
-else
-  URI="https://${_GITHUB_HOST}/api/v3"
-fi
-
-API_VERSION=v3
-API_HEADER="Accept: application/vnd.github.${API_VERSION}+json"
-CONTENT_LENGTH_HEADER="Content-Length: 0"
-APP_INSTALLATIONS_URI="${URI}/app/installations"
-
-
-# JWT parameters based off
-# https://docs.github.com/en/developers/apps/building-github-apps/authenticating-with-github-apps#authenticating-as-a-github-app
-#
-# JWT token issuance and expiration parameters
-JWT_IAT_DRIFT=60
-JWT_EXP_DELTA=600
-
-JWT_JOSE_HEADER='{
-    "alg": "RS256",
-    "typ": "JWT"
-}'
-
-
-build_jwt_payload() {
-    now=$(date +%s)
-    iat=$((now - JWT_IAT_DRIFT))
-    jq -c \
-        --arg iat_str "${iat}" \
-        --arg exp_delta_str "${JWT_EXP_DELTA}" \
-        --arg app_id_str "${APP_ID}" \
-    '
-        ($iat_str | tonumber) as $iat
-        | ($exp_delta_str | tonumber) as $exp_delta
-        | ($app_id_str | tonumber) as $app_id
-        | .iat = $iat
-        | .exp = ($iat + $exp_delta)
-        | .iss = $app_id
-    ' <<< "{}" | tr -d '\n'
-}
-
-base64url() {
-    base64 | tr '+/' '-_' | tr -d '=\n'
-}
-
-rs256_sign() {
-    openssl dgst -binary -sha256 -sign <(echo "$1")
-}
-
-request_access_token() {
-    jwt_payload=$(build_jwt_payload)
-    encoded_jwt_parts=$(base64url <<<"${JWT_JOSE_HEADER}").$(base64url <<<"${jwt_payload}")
-    encoded_mac=$(echo -n "$encoded_jwt_parts" | rs256_sign "${APP_PRIVATE_KEY}" | base64url)
-    generated_jwt="${encoded_jwt_parts}.${encoded_mac}"
-
-    auth_header="Authorization: Bearer ${generated_jwt}"
-
-    app_installations_response=$(curl -sX POST \
-        -H "${auth_header}" \
-        -H "${API_HEADER}" \
-        --header "X-GitHub-Api-Version: 2022-11-28" \
-        --url "https://api.github.com/app/installations/${INSTALL_ID}/access_tokens" \
-    )
-    echo "$app_installations_response" | jq --raw-output '.token'
-}
-
-request_access_token
--- a/.github/scripts/s390x-ci/self-hosted-builder/helpers/gh_token_generator.sh
+++ b/.github/scripts/s390x-ci/self-hosted-builder/helpers/gh_token_generator.sh
@ -1,10 +0,0 @@
-#!/usr/bin/env bash
-
-SCRIPT_DIR=$(dirname "$0")
-APP_ID=$1
-INSTALL_ID=$2
-APP_PRIVATE_KEY=$3
-DST_FILE="$4"
-
-ACCESS_TOKEN="$(APP_ID="$(<"${APP_ID}")" INSTALL_ID="$(<"${INSTALL_ID}")" APP_PRIVATE_KEY="$(<"${APP_PRIVATE_KEY}")" "${SCRIPT_DIR}/app_token.sh")"
-echo "ACCESS_TOKEN=${ACCESS_TOKEN}" > "${DST_FILE}"
--- a/.github/scripts/sync_distributed_folder_prototype.sh
+++ b/.github/scripts/sync_distributed_folder_prototype.sh
@ -0,0 +1,35 @@
+#!/bin/bash
+
+set -eoux pipefail
+
+SYNC_BRANCH=pytorch-stable-prototype
+
+git config user.email "fake@example.com"
+git config user.name  "PyTorch Stable Bot"
+
+git fetch origin main
+git fetch origin "$SYNC_BRANCH"
+git checkout "$SYNC_BRANCH"
+
+# Using a hardcoded SHA here is a massive speedup as we can skip the entire history of the pytorch GitHub repo.
+# This specific SHA was chosen as it was before the "branch point" of the stable branch
+for SHA in $(git log ba3b05fdf37ddbc3c301294d6a560a816335e717..origin/main --pretty="%h" -- torch/distributed torch/csrc/distributed test/distributed test/cpp/c10d benchmarks/distributed)
+do
+    # `git merge-base --is-ancestor` exits with code 0 if the given SHA is an ancestor, and non-0 otherwise
+    if git merge-base --is-ancestor $SHA HEAD || [[ $(git log --grep="(cherry picked from commit $SHA") ]]
+    then
+        echo "Skipping $SHA"
+        continue
+    fi
+    echo "Copying $SHA"
+    git cherry-pick -x "$SHA" -X theirs
+    git reset --soft HEAD~1
+    git add torch/distributed torch/csrc/distributed test/distributed test/cpp/c10d benchmarks/distributed
+    git checkout .
+    git commit --reuse-message=HEAD@{1}
+    git clean -f
+done
+
+if [[ "${WITH_PUSH}" == true ]]; then
+  git push
+fi
--- a/.github/scripts/tag_docker_images_for_release.py
+++ b/.github/scripts/tag_docker_images_for_release.py
@ -51,8 +51,6 @@ def main() -> None:

    for platform_image in platform_images:  # type: ignore[attr-defined]
        for arch in platform_image.keys():  # type: ignore[attr-defined]
-            if arch == "cpu-s390x":
-                continue
            tag_image(
                platform_image[arch],  # type: ignore[index]
                default_tag,
--- a/.github/scripts/test_check_labels.py
+++ b/.github/scripts/test_check_labels.py
@ -18,7 +18,6 @@ def mock_parse_args() -> object:
    class Object:
        def __init__(self) -> None:
            self.pr_num = 76123
-            self.exit_non_zero = False

    return Object()

--- a/.github/scripts/test_runner_determinator.py
+++ b/.github/scripts/test_runner_determinator.py
@ -1,440 +0,0 @@
-from unittest import main, TestCase
-from unittest.mock import Mock, patch
-
-import runner_determinator as rd
-
-
-USER_BRANCH = "somebranch"
-EXCEPTION_BRANCH = "main"
-
-
-class TestRunnerDeterminatorIssueParser(TestCase):
-    def test_parse_settings(self) -> None:
-        settings_text = """
-        experiments:
-            lf:
-                rollout_perc: 25
-            otherExp:
-                rollout_perc: 0
-                default: false
-        ---
-
-        Users:
-        @User1,lf
-        @User2,lf,otherExp
-
-        """
-
-        settings = rd.parse_settings(settings_text)
-
-        self.assertTupleEqual(
-            rd.Experiment(rollout_perc=25),
-            settings.experiments["lf"],
-            "lf settings not parsed correctly",
-        )
-        self.assertTupleEqual(
-            rd.Experiment(rollout_perc=0, default=False),
-            settings.experiments["otherExp"],
-            "otherExp settings not parsed correctly",
-        )
-
-    def test_parse_settings_in_code_block(self) -> None:
-        settings_text = """
-
-        ```
-        experiments:
-            lf:
-                rollout_perc: 25
-            otherExp:
-                rollout_perc: 0
-                default: false
-        ```
-
-        ---
-
-        Users:
-        @User1,lf
-        @User2,lf,otherExp
-
-        """
-
-        settings = rd.parse_settings(settings_text)
-
-        self.assertTupleEqual(
-            rd.Experiment(rollout_perc=25),
-            settings.experiments["lf"],
-            "lf settings not parsed correctly",
-        )
-        self.assertTupleEqual(
-            rd.Experiment(rollout_perc=0, default=False),
-            settings.experiments["otherExp"],
-            "otherExp settings not parsed correctly",
-        )
-
-    def test_parse_all_branches_setting(self) -> None:
-        settings_text = """
-        ```
-        experiments:
-            lf:
-                rollout_perc: 25
-                all_branches: true
-            otherExp:
-                all_branches: True
-                rollout_perc: 0
-        ```
-
-        ---
-
-        Users:
-        @User1,lf
-        @User2,lf,otherExp
-
-        """
-
-        settings = rd.parse_settings(settings_text)
-
-        self.assertTupleEqual(
-            rd.Experiment(rollout_perc=25, all_branches=True),
-            settings.experiments["lf"],
-            "lf settings not parsed correctly",
-        )
-        self.assertTrue(settings.experiments["otherExp"].all_branches)
-        self.assertTupleEqual(
-            rd.Experiment(rollout_perc=0, all_branches=True),
-            settings.experiments["otherExp"],
-            "otherExp settings not parsed correctly",
-        )
-
-    def test_parse_users(self) -> None:
-        settings_text = """
-        experiments:
-            lf:
-                rollout_perc: 0
-            otherExp:
-                rollout_perc: 0
-        ---
-
-        Users:
-        @User1,lf
-        @User2,lf,otherExp
-
-        """
-
-        users = rd.parse_users(settings_text)
-        self.assertDictEqual(
-            {"User1": ["lf"], "User2": ["lf", "otherExp"]},
-            users,
-            "Users not parsed correctly",
-        )
-
-    def test_parse_users_without_settings(self) -> None:
-        settings_text = """
-
-        @User1,lf
-        @User2,lf,otherExp
-
-        """
-
-        users = rd.parse_users(settings_text)
-        self.assertDictEqual(
-            {"User1": ["lf"], "User2": ["lf", "otherExp"]},
-            users,
-            "Users not parsed correctly",
-        )
-
-
-class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
-    def test_opted_in_user(self) -> None:
-        settings_text = """
-        experiments:
-            lf:
-                rollout_perc: 0
-            otherExp:
-                rollout_perc: 0
-        ---
-
-        Users:
-        @User1,lf
-        @User2,lf,otherExp
-
-        """
-        prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
-        self.assertEqual("lf.", prefix, "Runner prefix not correct for User1")
-
-    def test_opted_in_user_two_experiments(self) -> None:
-        settings_text = """
-        experiments:
-            lf:
-                rollout_perc: 0
-            otherExp:
-                rollout_perc: 0
-        ---
-
-        Users:
-        @User1,lf
-        @User2,lf,otherExp
-
-        """
-        prefix = rd.get_runner_prefix(settings_text, ["User2"], USER_BRANCH)
-        self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for User2")
-
-    def test_opted_in_user_two_experiments_default(self) -> None:
-        settings_text = """
-        experiments:
-            lf:
-                rollout_perc: 0
-            otherExp:
-                rollout_perc: 0
-                default: false
-        ---
-
-        Users:
-        @User1,lf
-        @User2,lf,otherExp
-
-        """
-        prefix = rd.get_runner_prefix(settings_text, ["User2"], USER_BRANCH)
-        self.assertEqual("lf.", prefix, "Runner prefix not correct for User2")
-
-    def test_opted_in_user_two_experiments_default_exp(self) -> None:
-        settings_text = """
-        experiments:
-            lf:
-                rollout_perc: 0
-            otherExp:
-                rollout_perc: 0
-                default: false
-        ---
-
-        Users:
-        @User1,lf
-        @User2,lf,otherExp
-
-        """
-        prefix = rd.get_runner_prefix(
-            settings_text, ["User2"], USER_BRANCH, frozenset(["lf", "otherExp"])
-        )
-        self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for User2")
-
-    def test_opted_in_user_two_experiments_default_exp_2(self) -> None:
-        settings_text = """
-        experiments:
-            lf:
-                rollout_perc: 0
-            otherExp:
-                rollout_perc: 0
-                default: false
-        ---
-
-        Users:
-        @User1,lf
-        @User2,lf,otherExp
-
-        """
-        prefix = rd.get_runner_prefix(
-            settings_text, ["User2"], USER_BRANCH, frozenset(["otherExp"])
-        )
-        self.assertEqual("otherExp.", prefix, "Runner prefix not correct for User2")
-
-    @patch("random.uniform", return_value=50)
-    def test_opted_out_user(self, mock_uniform: Mock) -> None:
-        settings_text = """
-        experiments:
-            lf:
-                rollout_perc: 25
-            otherExp:
-                rollout_perc: 25
-        ---
-
-        Users:
-        @User1,lf
-        @User2,lf,otherExp
-
-        """
-        prefix = rd.get_runner_prefix(settings_text, ["User3"], USER_BRANCH)
-        self.assertEqual("", prefix, "Runner prefix not correct for user")
-
-    @patch("random.uniform", return_value=10)
-    def test_opted_out_user_was_pulled_in_by_rollout(self, mock_uniform: Mock) -> None:
-        settings_text = """
-        experiments:
-            lf:
-                rollout_perc: 25
-            otherExp:
-                rollout_perc: 25
-        ---
-
-        Users:
-        @User1,lf
-        @User2,lf,otherExp
-
-        """
-
-        # User3 is opted out, but is pulled into both experiments by the 10% rollout
-        prefix = rd.get_runner_prefix(settings_text, ["User3"], USER_BRANCH)
-        self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for user")
-
-    @patch("random.uniform", return_value=10)
-    def test_opted_out_user_was_pulled_in_by_rollout_excl_nondefault(
-        self, mock_uniform: Mock
-    ) -> None:
-        settings_text = """
-        experiments:
-            lf:
-                rollout_perc: 25
-            otherExp:
-                rollout_perc: 25
-                default: false
-        ---
-
-        Users:
-        @User1,lf
-        @User2,lf,otherExp
-
-        """
-
-        # User3 is opted out, but is pulled into default experiments by the 10% rollout
-        prefix = rd.get_runner_prefix(settings_text, ["User3"], USER_BRANCH)
-        self.assertEqual("lf.", prefix, "Runner prefix not correct for user")
-
-    @patch("random.uniform", return_value=10)
-    def test_opted_out_user_was_pulled_in_by_rollout_filter_exp(
-        self, mock_uniform: Mock
-    ) -> None:
-        settings_text = """
-        experiments:
-            lf:
-                rollout_perc: 25
-            otherExp:
-                rollout_perc: 25
-                default: false
-        ---
-
-        Users:
-        @User1,lf
-        @User2,lf,otherExp
-
-        """
-
-        # User3 is opted out, but is pulled into default experiments by the 10% rollout
-        prefix = rd.get_runner_prefix(
-            settings_text, ["User3"], USER_BRANCH, frozenset(["otherExp"])
-        )
-        self.assertEqual("otherExp.", prefix, "Runner prefix not correct for user")
-
-    @patch("random.uniform", return_value=25)
-    def test_opted_out_user_was_pulled_out_by_rollout_filter_exp(
-        self, mock_uniform: Mock
-    ) -> None:
-        settings_text = """
-        experiments:
-            lf:
-                rollout_perc: 10
-            otherExp:
-                rollout_perc: 50
-                default: false
-        ---
-
-        Users:
-        @User1,lf
-        @User2,lf,otherExp
-
-        """
-
-        # User3 is opted out, but is pulled into default experiments by the 10% rollout
-        prefix = rd.get_runner_prefix(settings_text, ["User3"], USER_BRANCH)
-        self.assertEqual("", prefix, "Runner prefix not correct for user")
-
-    def test_lf_prefix_always_comes_first(self) -> None:
-        settings_text = """
-        experiments:
-            otherExp:
-                rollout_perc: 0
-            lf:
-                rollout_perc: 0
-        ---
-
-        Users:
-        @User1,lf
-        @User2,otherExp,lf
-
-        """
-
-        prefix = rd.get_runner_prefix(settings_text, ["User2"], USER_BRANCH)
-        self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for user")
-
-    def test_ignores_commented_users(self) -> None:
-        settings_text = """
-        experiments:
-            lf:
-                rollout_perc: 0
-            otherExp:
-                rollout_perc: 0
-        ---
-
-        Users:
-        #@User1,lf
-        @User2,lf,otherExp
-
-        """
-
-        prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
-        self.assertEqual("", prefix, "Runner prefix not correct for user")
-
-    def test_ignores_extra_experiments(self) -> None:
-        settings_text = """
-        experiments:
-            lf:
-                rollout_perc: 0
-            otherExp:
-                rollout_perc: 0
-            foo:
-                rollout_perc: 0
-        ---
-
-        Users:
-        @User1,lf,otherExp,foo
-
-        """
-
-        prefix = rd.get_runner_prefix(settings_text, ["User1"], USER_BRANCH)
-        self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for user")
-
-    def test_disables_experiment_on_exception_branches_when_not_explicitly_opted_in(
-        self,
-    ) -> None:
-        settings_text = """
-        experiments:
-            lf:
-                rollout_perc: 100
-        ---
-
-        Users:
-        @User,lf,otherExp
-
-        """
-
-        prefix = rd.get_runner_prefix(settings_text, ["User1"], EXCEPTION_BRANCH)
-        self.assertEqual("", prefix, "Runner prefix not correct for user")
-
-    def test_allows_experiment_on_exception_branches_when_explicitly_opted_in(
-        self,
-    ) -> None:
-        settings_text = """
-        experiments:
-            lf:
-                rollout_perc: 100
-                all_branches: true
-        ---
-
-        Users:
-        @User,lf,otherExp
-
-        """
-
-        prefix = rd.get_runner_prefix(settings_text, ["User1"], EXCEPTION_BRANCH)
-        self.assertEqual("lf.", prefix, "Runner prefix not correct for user")
-
-
-if __name__ == "__main__":
-    main()
--- a/.github/scripts/test_trymerge.py
+++ b/.github/scripts/test_trymerge.py
@ -12,7 +12,7 @@ import json
 import os
 import warnings
 from hashlib import sha256
-from typing import Any, List, Optional
+from typing import Any, Dict, List, Optional
 from unittest import main, mock, skip, TestCase
 from urllib.error import HTTPError

@ -24,6 +24,7 @@ from trymerge import (
    find_matching_merge_rule,
    get_classifications,
    get_drci_classifications,
+    get_rockset_results,
    gh_get_team_members,
    GitHubPR,
    JobCheckState,
@ -41,6 +42,7 @@ if "GIT_REMOTE_URL" not in os.environ:
    os.environ["GIT_REMOTE_URL"] = "https://github.com/pytorch/pytorch"

 GQL_MOCKS = "gql_mocks.json.gz"
+ROCKSET_MOCKS = "rockset_mocks.json.gz"
 DRCI_MOCKS = "drci_mocks.json.gz"


@ -75,11 +77,16 @@ def mock_query(
        if err.code == 401 or err.code == 403:
            err_msg = f"If you are seeing this message during workflow run, please make sure to update {file_name}"
            err_msg += f" locally, by deleting it and running {os.path.basename(__file__)} with"
-            err_msg += " GitHub Personal Access Token passed via GITHUB_TOKEN"
+            err_msg += " GitHub Personal Access Token passed via GITHUB_TOKEN,"
+            err_msg += " the rockset api key passed via ROCKSET_API_KEY,"
            err_msg += " and drci api key passed via DRCI_BOT_KEY environment variables"
-            if os.getenv("GITHUB_TOKEN") is None or os.getenv("DRCI_BOT_KEY") is None:
+            if (
+                os.getenv("GITHUB_TOKEN") is None
+                or os.getenv("ROCKSET_API_KEY") is None
+                or os.getenv("DRCI_BOT_KEY") is None
+            ):
                err_msg = (
-                    "Failed to update cached queries as GITHUB_TOKEN or DRCI_BOT_KEY "
+                    "Failed to update cached queries as GITHUB_TOKEN or ROCKSET_API_KEY or DRCI_BOT_KEY "
                    + "is not defined. "
                    + err_msg
                )
@ -103,6 +110,16 @@ def mocked_gh_graphql(query: str, **kwargs: Any) -> Any:
    return mock_query(gh_graphql_wrapper, GQL_MOCKS, key_function, query, kwargs)


+def mocked_rockset_results(head_sha: str, merge_base: str, num_retries: int = 3) -> Any:
+    return mock_query(
+        get_rockset_results,
+        ROCKSET_MOCKS,
+        lambda x, y: f"{x} {y}",
+        head_sha,
+        merge_base,
+    )
+
+
 def mocked_drci_classifications(pr_num: int, project: str, num_retries: int = 3) -> Any:
    return mock_query(
        get_drci_classifications,
@ -256,6 +273,10 @@ def xla_merge_rules(repo: Any, org: str, project: str) -> List[MergeRule]:
    ]


+def empty_rockset_results(head_sha: str, merge_base: str) -> List[Dict[str, Any]]:
+    return []
+
+
 class DummyGitRepo(GitRepo):
    def __init__(self) -> None:
        super().__init__(get_git_repo_dir(), get_git_remote_name())
@ -267,6 +288,7 @@ class DummyGitRepo(GitRepo):
        return "super awsome commit message"


+@mock.patch("trymerge.get_rockset_results", side_effect=empty_rockset_results)
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
@mock.patch(
    "trymerge.get_drci_classifications", side_effect=mocked_drci_classifications
@ -582,6 +604,7 @@ class TestTryMerge(TestCase):
            mocked_gh_fetch_merge_base.assert_called_once()


+@mock.patch("trymerge.get_rockset_results", side_effect=mocked_rockset_results)
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
@mock.patch(
@ -820,7 +843,7 @@ class TestBypassFailures(TestCase):
        checks = pr.get_checkrun_conclusions()

        # Known flaky failure takes precedence over ignore current (need to set the
-        # merge base here to get the results from Dr. CI, and that categorize the
+        # merge base here to get the results from Rockset, and that categorize the
        # broken trunk failure too
        checks = get_classifications(
            pr.pr_num,
@ -906,6 +929,7 @@ class TestBypassFailures(TestCase):
        )


+@mock.patch("trymerge.get_rockset_results", side_effect=mocked_rockset_results)
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
@mock.patch("trymerge.get_drci_classifications", return_value={})
@ -984,6 +1008,7 @@ class TestBypassFailuresOnSandCastle(TestCase):
        self.assertTrue(len(failed) == 2)


+@mock.patch("trymerge.get_rockset_results", side_effect=mocked_rockset_results)
@mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql)
@mock.patch("trymerge.gh_fetch_merge_base", return_value="")
@mock.patch(
--- a/.github/scripts/trymerge.py
+++ b/.github/scripts/trymerge.py
@ -36,7 +36,6 @@ from warnings import warn

 import yaml
 from github_utils import (
-    gh_close_pr,
    gh_fetch_json_list,
    gh_fetch_merge_base,
    gh_fetch_url,
@ -452,6 +451,8 @@ RE_DIFF_REV = re.compile(r"^Differential Revision:.+?(D[0-9]+)", re.MULTILINE)
 CIFLOW_LABEL = re.compile(r"^ciflow/.+")
 CIFLOW_TRUNK_LABEL = re.compile(r"^ciflow/trunk")
 MERGE_RULE_PATH = Path(".github") / "merge_rules.yaml"
+ROCKSET_MERGES_COLLECTION = "merges"
+ROCKSET_MERGES_WORKSPACE = "commons"
 REMOTE_MAIN_BRANCH = "origin/main"
 DRCI_CHECKRUN_NAME = "Dr.CI"
 INTERNAL_CHANGES_CHECKRUN_NAME = "Meta Internal-Only Changes Check"
@ -1173,12 +1174,12 @@ class GitHubPR:
            for pr in additional_merged_prs:
                pr.add_numbered_label(MERGE_COMPLETE_LABEL, dry_run)

-        # When the merge process reaches this part, we can assume that the commit
-        # has been successfully pushed to trunk
-        merge_commit_sha = repo.rev_parse(name=self.default_branch())
-
        if comment_id and self.pr_num:
-            # Finally, upload the record to s3. The list of pending and failed
+            # When the merge process reaches this part, we can assume that the commit
+            # has been successfully pushed to trunk
+            merge_commit_sha = repo.rev_parse(name=REMOTE_MAIN_BRANCH)
+
+            # Finally, upload the record to Rockset. The list of pending and failed
            # checks are at the time of the merge
            save_merge_record(
                comment_id=comment_id,
@ -1200,18 +1201,7 @@ class GitHubPR:
                ignore_current=bool(ignore_current_checks),
            )
        else:
-            print("Missing comment ID or PR number, couldn't upload to s3")
-
-        # Usually Github will see that the commit has "resolves <pr_num>" in the
-        # commit message and close the PR, but sometimes it doesn't, leading to
-        # confusion.  When it doesn't, we close it manually.
-        time.sleep(60)  # Give Github some time to close the PR
-        manually_close_merged_pr(
-            pr=self,
-            additional_merged_prs=additional_merged_prs,
-            merge_commit_sha=merge_commit_sha,
-            dry_run=dry_run,
-        )
+            print("Missing comment ID or PR number, couldn't upload to Rockset")

    def merge_changes(
        self,
@ -1479,7 +1469,7 @@ def find_matching_merge_rule(

        # Categorize all checks when skip_mandatory_checks (force merge) is set. Do it here
        # where the list of checks is readily available. These records will be saved into
-        # s3 merge records
+        # Rockset merge records
        (
            pending_mandatory_checks,
            failed_mandatory_checks,
@ -1506,41 +1496,13 @@ def checks_to_str(checks: List[Tuple[str, Optional[str]]]) -> str:


 def checks_to_markdown_bullets(
-    checks: List[Tuple[str, Optional[str], Optional[int]]],
+    checks: List[Tuple[str, Optional[str], Optional[int]]]
 ) -> List[str]:
    return [
        f"- [{c[0]}]({c[1]})" if c[1] is not None else f"- {c[0]}" for c in checks[:5]
    ]


-def manually_close_merged_pr(
-    pr: GitHubPR,
-    additional_merged_prs: List[GitHubPR],
-    merge_commit_sha: str,
-    dry_run: bool,
-) -> None:
-    def _comment_and_close(pr: GitHubPR, comment: str) -> None:
-        pr = GitHubPR(pr.org, pr.project, pr.pr_num)  # Refresh the PR
-        if not pr.is_closed():
-            gh_post_pr_comment(pr.org, pr.project, pr.pr_num, comment, dry_run)
-            gh_close_pr(pr.org, pr.project, pr.pr_num, dry_run)
-
-    message = (
-        f"This PR (#{pr.pr_num}) was merged in {merge_commit_sha} but it is still open, likely due to a Github bug, "
-        "so mergebot is closing it manually.  If you think this is a mistake, please feel free to reopen and contact Dev Infra."
-    )
-    _comment_and_close(pr, message)
-    for additional_pr in additional_merged_prs:
-        message = (
-            f"This PR (#{additional_pr.pr_num}) was merged as part of PR #{pr.pr_num} in the stack under {merge_commit_sha} "
-            "but it is still open, likely due to a Github bug, so mergebot is closing it manually. "
-            "If you think this is a mistake, please feel free to reopen and contact Dev Infra."
-        )
-        _comment_and_close(additional_pr, message)
-
-    print(f"PR {pr.pr_num} and all additional PRs in the stack have been closed.")
-
-
@retries_decorator()
 def save_merge_record(
    comment_id: int,
@ -1566,7 +1528,7 @@ def save_merge_record(
    This saves the merge records as a json, which can later be uploaded to s3
    """

-    # Prepare the record to be written into s3
+    # Prepare the record to be written into Rockset
    data = [
        {
            "comment_id": comment_id,
@ -1588,8 +1550,7 @@ def save_merge_record(
            "ignore_current": ignore_current,
            "error": error,
            # This is a unique identifier for the record for deduping purposes
-            # in Rockset.  Any unique string would work.  This will not be used
-            # after we migrate off Rockset
+            # in rockset.  Any unique string would work
            "_id": f"{project}-{pr_num}-{comment_id}-{os.environ.get('GITHUB_RUN_ID')}",
        }
    ]
@ -1599,6 +1560,36 @@ def save_merge_record(
        json.dump(data, f)


+@retries_decorator(rc=[])
+def get_rockset_results(head_sha: str, merge_base: str) -> List[Dict[str, Any]]:
+    query = f"""
+SELECT
+    w.name as workflow_name,
+    j.id,
+    j.name,
+    j.conclusion,
+    j.completed_at,
+    j.html_url,
+    j.head_sha,
+    j.torchci_classification.captures as failure_captures,
+    LENGTH(j.steps) as steps,
+FROM
+    commons.workflow_job j join commons.workflow_run w on w.id = j.run_id
+where
+    j.head_sha in ('{head_sha}','{merge_base}')
+"""
+    try:
+        import rockset  # type: ignore[import]
+
+        res = rockset.RocksetClient(
+            host="api.usw2a1.rockset.com", api_key=os.environ["ROCKSET_API_KEY"]
+        ).sql(query)
+        return cast(List[Dict[str, Any]], res.results)
+    except ModuleNotFoundError:
+        print("Could not use RockSet as rocket dependency is missing")
+        return []
+
+
@retries_decorator()
 def get_drci_classifications(pr_num: int, project: str = "pytorch") -> Any:
    """
@ -2036,7 +2027,7 @@ def categorize_checks(
    pending_checks: List[Tuple[str, Optional[str], Optional[int]]] = []
    failed_checks: List[Tuple[str, Optional[str], Optional[int]]] = []

-    # failed_checks_categorization is used to keep track of all ignorable failures when saving the merge record on s3
+    # failed_checks_categorization is used to keep track of all ignorable failures when saving the merge record on Rockset
    failed_checks_categorization: Dict[str, List[Any]] = defaultdict(list)

    # If required_checks is not set or empty, consider all names are relevant
@ -2095,7 +2086,7 @@ def categorize_checks(
    ):
        failed_checks = failed_checks + flaky_or_broken_trunk

-    # The list of failed_checks_categorization is returned so that it can be saved into the s3 merge record
+    # The list of failed_checks_categorization is returned so that it can be saved into the Rockset merge record
    return (pending_checks, failed_checks, failed_checks_categorization)


@ -2379,7 +2370,7 @@ def main() -> None:
        handle_exception(e)

        if args.comment_id and args.pr_num:
-            # Finally, upload the record to s3, we don't have access to the
+            # Finally, upload the record to Rockset, we don't have access to the
            # list of pending and failed checks here, but they are not really
            # needed at the moment
            save_merge_record(
@ -2402,7 +2393,7 @@ def main() -> None:
                error=str(e),
            )
        else:
-            print("Missing comment ID or PR number, couldn't upload to s3")
+            print("Missing comment ID or PR number, couldn't upload to Rockset")
    finally:
        if not args.check_mergeability:
            gh_remove_label(
--- a/.github/scripts/update_runner_determinator.py
+++ b/.github/scripts/update_runner_determinator.py
@ -1,31 +0,0 @@
-#!/usr/bin/env python3
-
-import re
-
-
-# Read the contents of runner_determinator.py
-with open(".github/scripts/runner_determinator.py") as script_file:
-    script_content = script_file.read()
-
-# Indent the script content by 10 spaces to match destination indentation
-indented_script_content = "\n".join(
-    [" " * 10 + line if line else line for line in script_content.splitlines()]
-)
-
-# Read the contents of _runner-determinator.yml
-with open(".github/workflows/_runner-determinator.yml") as yml_file:
-    yml_content = yml_file.read()
-
-# Replace the content between the markers
-new_yml_content = re.sub(
-    r"(cat <<EOF > runner_determinator.py\n)(.*?)(\n\s+EOF)",
-    lambda match: match.group(1) + indented_script_content + match.group(3),
-    yml_content,
-    flags=re.DOTALL,
-)
-
-# Save the modified content back to _runner-determinator.yml
-with open(".github/workflows/_runner-determinator.yml", "w") as yml_file:
-    yml_file.write(new_yml_content)
-
-print("Updated _runner-determinator.yml with the contents of runner_determinator.py")
--- a/.github/templates/common.yml.j2
+++ b/.github/templates/common.yml.j2
@ -1,7 +1,7 @@
 {%- set upload_artifact_s3_action = "seemethere/upload-artifact-s3@v5" -%}
 {%- set download_artifact_s3_action = "seemethere/download-artifact-s3@v4" -%}
-{%- set upload_artifact_action = "actions/upload-artifact@v4.4.0" -%}
-{%- set download_artifact_action = "actions/download-artifact@v4.1.7" -%}
+{%- set upload_artifact_action = "actions/upload-artifact@v3" -%}
+{%- set download_artifact_action = "actions/download-artifact@v3" -%}

 {%- set timeout_minutes = 240 -%}

--- a/.github/templates/linux_binary_build_workflow.yml.j2
+++ b/.github/templates/linux_binary_build_workflow.yml.j2
@ -54,7 +54,7 @@ env:
 jobs:
  get-label-type:
    name: get-label-type
-    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+    uses: ./.github/workflows/_runner-determinator.yml
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@ -68,7 +68,6 @@ jobs:
    needs: get-label-type
    with:!{{ upload.binary_env_as_input(config) }}
      {%- if "aarch64" in build_environment %}
-      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      runs_on: linux.arm64.m7g.4xlarge.ephemeral
      ALPINE_IMAGE: "arm64v8/alpine"
      {%- elif "s390x" in build_environment %}
@ -103,7 +102,6 @@ jobs:
      build_name: !{{ config["build_name"] }}
      build_environment: !{{ build_environment }}
      {%- if "aarch64" in build_environment %}
-      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      runs_on: linux.arm64.2xlarge
      ALPINE_IMAGE: "arm64v8/alpine"
      {%- elif "s390x" in build_environment %}
--- a/.github/templates/macos_binary_build_workflow.yml.j2
+++ b/.github/templates/macos_binary_build_workflow.yml.j2
@ -101,7 +101,7 @@ jobs:
          # shellcheck disable=SC1091
          source "${RUNNER_TEMP}/anaconda/bin/activate"
          "${PYTORCH_ROOT}/.circleci/scripts/binary_macos_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: !{{ config["build_name"] }}
--- a/.github/templates/upload.yml.j2
+++ b/.github/templates/upload.yml.j2
@ -45,7 +45,7 @@
  {%- if is_windows %}
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
  {%- endif %}

 {%- else %}
--- a/.github/templates/windows_binary_build_workflow.yml.j2
+++ b/.github/templates/windows_binary_build_workflow.yml.j2
@ -55,7 +55,7 @@ env:
 jobs:
  get-label-type:
    name: get-label-type
-    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+    uses: ./.github/workflows/_runner-determinator.yml
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@ -104,9 +104,9 @@ jobs:
      - get-label-type
 {%- if config["gpu_arch_type"] == "cuda" %}
 {%- if branches == "nightly" %}
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
+    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.8xlarge.nvidia.gpu"
 {%- else %}
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge.nonephemeral"
+    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.8xlarge.nvidia.gpu.nonephemeral"
 {%- endif %}
 {%- else %}
    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
--- a/.github/workflows/_bazel-build-test.yml
+++ b/.github/workflows/_bazel-build-test.yml
@ -91,14 +91,14 @@ jobs:
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

-      - name: Check if in a container runner
+      - name: Check if in a ARC runner
        shell: bash
-        id: check_container_runner
-        run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
+        id: check_arc_runner
+        run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> "$GITHUB_OUTPUT"

      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
        uses: pytorch/test-infra/.github/actions/setup-nvidia@main
-        if: ${{ inputs.cuda-version != 'cpu' && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
+        if: ${{ inputs.cuda-version != 'cpu' && steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}

      - name: Output disk space left
        run: |
--- a/.github/workflows/_binary-build-linux.yml
+++ b/.github/workflows/_binary-build-linux.yml
@ -272,8 +272,6 @@ jobs:
          docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
          if [[ ${BUILD_ENVIRONMENT} == *"aarch64"* ]]; then
            docker exec -t "${container_name}" bash -c "bash /builder/aarch64_linux/aarch64_ci_build.sh"
-          elif [[ ${{ inputs.PACKAGE_TYPE }} == "manywheel" || ${{ inputs.PACKAGE_TYPE }} == "libtorch" ]]; then
-            docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /pytorch/.ci/${{ inputs.PACKAGE_TYPE }}/build.sh"
          else
            docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/${{ inputs.PACKAGE_TYPE }}/build.sh"
          fi
@ -285,7 +283,7 @@ jobs:
          # Ensure the working directory gets chowned back to the current user
          docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .

-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' }}
        with:
          name: ${{ inputs.build_name }}
--- a/.github/workflows/_binary-test-linux.yml
+++ b/.github/workflows/_binary-test-linux.yml
@ -210,7 +210,7 @@ jobs:

      - name: Download Build Artifacts
        if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' }}
-        uses: actions/download-artifact@v4.1.7
+        uses: actions/download-artifact@v3
        with:
          name: ${{ inputs.build_name }}
          path: "${{ runner.temp }}/artifacts/"
--- a/.github/workflows/_binary-upload.yml
+++ b/.github/workflows/_binary-upload.yml
@ -126,7 +126,7 @@ jobs:
        # NB: When the previous build job is skipped, there won't be any artifacts and
        # this step will fail. Binary build jobs can only be skipped on CI, not nightly
        continue-on-error: true
-        uses: actions/download-artifact@v4.1.7
+        uses: actions/download-artifact@v3
        with:
          name: ${{ inputs.build_name }}
          path: "${{ runner.temp }}/artifacts/"
--- a/.github/workflows/_ios-build-test.yml
+++ b/.github/workflows/_ios-build-test.yml
@ -292,7 +292,7 @@ jobs:
          bundler-cache: true

      - name: Download arm64 artifacts
-        uses: actions/download-artifact@v4.1.7
+        uses: actions/download-artifact@v3
        with:
          name: pytorch-ios-build-artifacts-arm64

--- a/.github/workflows/_linux-build.yml
+++ b/.github/workflows/_linux-build.yml
@ -109,7 +109,6 @@ jobs:
    steps:
      - name: Setup SSH (Click me for login details)
        uses: pytorch/test-infra/.github/actions/setup-ssh@main
-        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}

@ -119,16 +118,13 @@ jobs:
      # checkout. In other cases you should prefer a local checkout.
      - name: Checkout PyTorch
        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
-        with:
-          no-sudo: ${{ inputs.build-environment == 'linux-s390x-binary-manywheel' }}

      - name: Setup Linux
        uses: ./.github/actions/setup-linux
-        if: inputs.build-environment != 'linux-s390x-binary-manywheel'

      - name: configure aws credentials
        uses: aws-actions/configure-aws-credentials@v3
-        if: ${{ inputs.aws-role-to-assume != '' && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
+        if: ${{ inputs.aws-role-to-assume != '' }}
        with:
          role-to-assume: ${{ inputs.aws-role-to-assume }}
          role-session-name: gha-linux-build
@ -137,13 +133,11 @@ jobs:
      - name: Calculate docker image
        id: calculate-docker-image
        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
-        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
        with:
          docker-image-name: ${{ inputs.docker-image-name }}

      - name: Use following to pull public copy of the image
        id: print-ghcr-mirror
-        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
        env:
          ECR_DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
        shell: bash
@ -153,7 +147,6 @@ jobs:

      - name: Pull docker image
        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
-        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

@ -181,7 +174,6 @@ jobs:
      - name: Download pytest cache
        uses: ./.github/actions/pytest-cache-download
        continue-on-error: true
-        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
        with:
          cache_dir: .pytest_cache
          job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
@ -203,7 +195,6 @@ jobs:
          PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
          TORCH_CUDA_ARCH_LIST: ${{ inputs.cuda-arch-list }}
          DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
-          DOCKER_IMAGE_S390X: ${{ inputs.docker-image-name }}
          XLA_CUDA: ${{ contains(inputs.build-environment, 'xla') && '0' || '' }}
          DEBUG: ${{ inputs.build-with-debug && '1' || '0' }}
          OUR_GITHUB_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
@ -211,21 +202,7 @@ jobs:
          SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
          USE_SPLIT_BUILD: ${{ inputs.use_split_build }}
        run: |
-          if [[ ${BUILD_ENVIRONMENT} == *"s390x"* ]]; then
-            JENKINS_USER=
-            USED_IMAGE="${DOCKER_IMAGE_S390X}"
-
-            # since some steps are skipped on s390x, if they are necessary, run them here
-            env | grep '^GITHUB' >> "/tmp/github_env_${GITHUB_RUN_ID}"
-            env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"
-          else
-            JENKINS_USER="--user jenkins"
-            USED_IMAGE="${DOCKER_IMAGE}"
-          fi
-
          # detached container should get cleaned up by teardown_ec2_linux
-          # Used for JENKINS_USER, which can be empty
-          # shellcheck disable=SC2086
          container_name=$(docker run \
            -e BUILD_ENVIRONMENT \
            -e MAX_JOBS="$(nproc --ignore=2)" \
@ -248,10 +225,10 @@ jobs:
            --cap-add=SYS_PTRACE \
            --tty \
            --detach \
-            ${JENKINS_USER} \
+            --user jenkins \
            -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
            -w /var/lib/jenkins/workspace \
-            "${USED_IMAGE}"
+            "${DOCKER_IMAGE}"
          )
          docker exec -t "${container_name}" sh -c '.ci/pytorch/build.sh'

@ -262,7 +239,7 @@ jobs:

      - name: Store PyTorch Build Artifacts on S3
        uses: seemethere/upload-artifact-s3@v5
-        if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build && inputs.build-environment != 'linux-s390x-binary-manywheel'
+        if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build
        with:
          name: ${{ inputs.build-environment }}
          retention-days: 14
@ -272,7 +249,7 @@ jobs:

      - name: Store PyTorch Build Artifacts on S3 for split build
        uses: seemethere/upload-artifact-s3@v5
-        if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build && inputs.build-environment != 'linux-s390x-binary-manywheel'
+        if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build
        with:
          name: ${{ inputs.build-environment }}-experimental-split-build
          retention-days: 14
@ -280,26 +257,8 @@ jobs:
          path: artifacts.zip
          s3-bucket: ${{ inputs.s3-bucket }}

-      - name: Store PyTorch Build Artifacts for s390x
-        uses: actions/upload-artifact@v3
-        if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build && inputs.build-environment == 'linux-s390x-binary-manywheel'
-        with:
-          name: ${{ inputs.build-environment }}
-          retention-days: 14
-          if-no-files-found: error
-          path: artifacts.zip
-
-      - name: Store PyTorch Build Artifacts for s390x for split build
-        uses: actions/upload-artifact@v3
-        if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build && inputs.build-environment == 'linux-s390x-binary-manywheel'
-        with:
-          name: ${{ inputs.build-environment }}-experimental-split-build
-          retention-days: 14
-          if-no-files-found: error
-          path: artifacts.zip
-
      - name: Upload sccache stats
-        if: steps.build.outcome != 'skipped' && inputs.build-environment != 'linux-s390x-binary-manywheel'
+        if: steps.build.outcome != 'skipped'
        uses: seemethere/upload-artifact-s3@v5
        with:
          s3-prefix: |
@ -311,13 +270,4 @@ jobs:

      - name: Teardown Linux
        uses: pytorch/test-infra/.github/actions/teardown-linux@main
-        if: always() && inputs.build-environment != 'linux-s390x-binary-manywheel'
-
-      - name: Cleanup docker
-        if: always() && inputs.build-environment == 'linux-s390x-binary-manywheel'
-        shell: bash
-        run: |
-          # on s390x stop the container for clean worker stop
-          # ignore expansion of "docker ps -q" since it could be empty
-          # shellcheck disable=SC2046
-          docker stop $(docker ps -q) || true
+        if: always()
--- a/.github/workflows/_linux-test.yml
+++ b/.github/workflows/_linux-test.yml
@ -114,32 +114,22 @@ jobs:
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

-      - name: Check if in a container runner
+      - name: Check if in a ARC runner
        shell: bash
-        id: check_container_runner
-        run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
+        id: check_arc_runner
+        run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> "$GITHUB_OUTPUT"

      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
        id: install-nvidia-driver
        uses: pytorch/test-infra/.github/actions/setup-nvidia@main
-        if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
-
-      - name: Setup GPU_FLAG for docker run
-        id: setup-gpu-flag
-        run: echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
-        if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
-
-      - name: Setup SCCACHE_SERVER_PORT environment for docker run when on container
-        id: setup-sscache-port-flag
-        run: echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"
-        if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' }}
+        if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}

      - name: Lock NVIDIA A100 40GB Frequency
        run: |
          sudo nvidia-smi -pm 1
          sudo nvidia-smi -ac 1215,1410
          nvidia-smi
-        if: ${{ contains(matrix.runner, 'a100') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
+        if: contains(matrix.runner, 'a100')

      - name: Start monitoring script
        id: monitor-script
@ -218,7 +208,6 @@ jobs:
          NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
          TD_DISTRIBUTED: ${{ steps.keep-going.outputs.ci-td-distributed }}
          SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
-          SCCACHE_REGION: us-east-1
          SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
          SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
          DOCKER_IMAGE: ${{ inputs.docker-image }}
@ -229,7 +218,6 @@ jobs:
          DASHBOARD_TAG: ${{ inputs.dashboard-tag }}
          HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
          SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
-          IS_A100_RUNNER: ${{ contains(matrix.runner, 'a100') && '1' || '0' }}

        run: |
          set -x
@ -248,7 +236,6 @@ jobs:
          # shellcheck disable=SC2086,SC2090
          container_name=$(docker run \
            ${GPU_FLAG:-} \
-            ${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \
            -e BUILD_ENVIRONMENT \
            -e PR_NUMBER \
            -e GITHUB_ACTIONS \
@ -278,7 +265,6 @@ jobs:
            -e PR_LABELS \
            -e MAX_JOBS="$(nproc --ignore=2)" \
            -e SCCACHE_BUCKET \
-            -e SCCACHE_REGION \
            -e SCCACHE_S3_KEY_PREFIX \
            -e XLA_CUDA \
            -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
@ -288,7 +274,6 @@ jobs:
            -e HUGGING_FACE_HUB_TOKEN \
            -e SCRIBE_GRAPHQL_ACCESS_TOKEN \
            -e DASHBOARD_TAG \
-            -e IS_A100_RUNNER \
            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
            --security-opt seccomp=unconfined \
            --cap-add=SYS_PTRACE \
@ -358,7 +343,7 @@ jobs:

      - name: Teardown Linux
        uses: pytorch/test-infra/.github/actions/teardown-linux@main
-        if: always() && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false'
+        if: always()

      # NB: We are currently having an intermittent GPU-related issue on G5 runners with
      # A10G GPU. Once this happens, trying to reset the GPU as done in setup-nvidia does
--- a/.github/workflows/_mac-test-mps.yml
+++ b/.github/workflows/_mac-test-mps.yml
@ -88,13 +88,6 @@ jobs:
          environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}
          pip-requirements-file: .github/requirements/pip-requirements-${{ runner.os }}.txt

-      - name: Get workflow job id
-        id: get-job-id
-        uses: ./.github/actions/get-workflow-job-id
-        if: always()
-        with:
-          github-token: ${{ secrets.GITHUB_TOKEN }}
-
      - name: Install PyTorch and run MPS tests
        id: test
        env:
@ -110,14 +103,6 @@ jobs:
          NO_TEST_TIMEOUT: ${{ needs.filter.outputs.ci-no-test-timeout }}
          NO_TD: ${{ needs.filter.outputs.ci-no-td }}
          PIP_REQUIREMENTS_FILE: .github/requirements/pip-requirements-${{ runner.os }}.txt
-          GITHUB_REPOSITORY: ${{ github.repository }}
-          GITHUB_WORKFLOW: ${{ github.workflow }}
-          GITHUB_JOB: ${{ github.job }}
-          GITHUB_RUN_ID: ${{ github.run_id }}
-          GITHUB_RUN_NUMBER: ${{ github.run_number }}
-          GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
-          JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
-          JOB_NAME: ${{ steps.get-job-id.outputs.job-name }}
          REENABLED_ISSUES: ${{ needs.filter.outputs.reenabled-issues }}
        run: |
          # shellcheck disable=SC1090
@ -159,6 +144,13 @@ jobs:
        run: |
          cat test/**/*_toprint.log || true

+      - name: Get workflow job id
+        id: get-job-id
+        uses: ./.github/actions/get-workflow-job-id
+        if: always()
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+
      - name: Upload test artifacts
        uses: ./.github/actions/upload-test-artifacts
        if: always() && steps.test.conclusion && steps.test.conclusion != 'skipped'
--- a/.github/workflows/_runner-determinator.yml
+++ b/.github/workflows/_runner-determinator.yml
@ -3,11 +3,6 @@ name: Check whether the workflow owner can use ARC runners
 on:
  workflow_call:
    inputs:
-      check_experiments:
-        required: false
-        type: string
-        description: |
-          List of experiments for this workfow. If not defined, all default experiments are included.
      triggering_actor:
        required: true
        type: string
@ -48,7 +43,6 @@ jobs:
      ISSUE_NUMBER: ${{ inputs.issue_number }}
      TRIGGERING_ACTOR: ${{ inputs.triggering_actor }}
      ISSUE_OWNER: ${{ inputs.issue_owner }}
-      CHECK_EXPERIMENTS: ${{ inputs.check_experiments }}
    steps:
      # - name: Checkout PyTorch
      #   uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
@ -65,108 +59,52 @@ jobs:
          cat <<EOF > runner_determinator.py
          # flake8: noqa: G004

-          # Note: Copies of this script in runner_determinator.py and _runner-determinator.yml
-          #       must be kept in sync. You can do it easily by running the following command:
-          #           python .github/scripts/update_runner_determinator.py
-
          """
          This runner determinator is used to determine which set of runners to run a
          GitHub job on. It uses the first comment of a GitHub issue (by default
-          https://github.com/pytorch/test-infra/issues/5132) to define the configuration
-          of which runners should be used to run which job.
-
-          The configuration has two parts, the settings and a list of opted-in users,
-          separated by a line containing "---".  If the line is not present, the
-          settings are considered to be empty with only the second part, the user
-          list, defined.
-
-          The first part is a YAML block that defines the rollout settings. This can be
-          used to define any settings that are needed to determine which runners to use.
-          It's fields are defined by the RolloutSettings class below.
-
-          The second part is a list of users who are explicitly opted in to the LF fleet.
-          The user list is also a comma separated list of additional features or
-          experiments which the user could be opted in to.
+          https://github.com/pytorch/test-infra/issues/5132) as a user list to determine
+          which users will get their jobs to run on experimental runners. This user list
+          is also a comma separated list of additional features or experiments which the
+          user could be opted in to.

          The user list has the following rules:

-          - Users are GitHub usernames, which must start with the @ prefix
+          - Users are GitHub usernames with the @ prefix
+          - If the first line is a "*" then all users will use the new runners
+          - If the first line is a "!" then all users will use the old runners
          - Each user is also a comma-separated list of features/experiments to enable
-          - A "#" prefix opts the user out of all experiments
+          - A "#" prefix indicates the user is opted out of the new runners but is opting
+            into features/experiments.

-          Example config:
-              # A list of experiments that can be opted into.
-              # This defines the behavior they'll induce when opted into.
-              # Expected syntax is:
-              #   [experiment_name]: # Name of the experiment. Also used for the label prefix.
-              #      rollout_perc: [int] # % of workflows to run with this experiment when users are not opted in.
+          Example user list:

-              experiments:
-                lf:
-                  rollout_percent: 25
-                  all_branches: false
-                  default: true
-              ---
-
-              # Opt-ins:
-              # Users can opt into the LF fleet by adding their GitHub username to this list
-              # and specifying experiments to enable in a comma-separated list.
-              # Experiments should be from the above list.
-
-              @User1,lf,split_build
-              @User2,lf
-              @User3,split_build
+              @User1
+              @User2,amz2023
+              #@UserOptOutOfNewRunner,amz2023
          """

          import logging
          import os
-          import random
          from argparse import ArgumentParser
          from logging import LogRecord
-          from typing import Any, Dict, FrozenSet, Iterable, List, NamedTuple, Tuple
+          from typing import Any, Iterable

-          import yaml
          from github import Auth, Github
          from github.Issue import Issue


-          DEFAULT_LABEL_PREFIX = ""  # use meta runners
+          WORKFLOW_LABEL_META = ""  # use meta runners
          WORKFLOW_LABEL_LF = "lf."  # use runners from the linux foundation
          WORKFLOW_LABEL_LF_CANARY = "lf.c."  # use canary runners from the linux foundation

+          RUNNER_AMI_LEGACY = ""
+          RUNNER_AMI_AMZ2023 = "amz2023"
+
          GITHUB_OUTPUT = os.getenv("GITHUB_OUTPUT", "")
          GH_OUTPUT_KEY_AMI = "runner-ami"
          GH_OUTPUT_KEY_LABEL_TYPE = "label-type"


-          SETTING_EXPERIMENTS = "experiments"
-
-          LF_FLEET_EXPERIMENT = "lf"
-          CANARY_FLEET_SUFFIX = ".c"
-
-
-          class Experiment(NamedTuple):
-              rollout_perc: float = (
-                  0  # Percentage of workflows to experiment on when user is not opted-in.
-              )
-              all_branches: bool = (
-                  False  # If True, the experiment is also enabled on the exception branches
-              )
-              default: bool = (
-                  True  # If True, the experiment is enabled by default for all queries
-              )
-
-              # Add more fields as needed
-
-
-          class Settings(NamedTuple):
-              """
-              Settings for the experiments that can be opted into.
-              """
-
-              experiments: Dict[str, Experiment] = {}
-
-
          class ColorFormatter(logging.Formatter):
              """Color codes the log messages based on the log level"""

@ -209,12 +147,6 @@ jobs:
                  f.write(f"{key}={value}\n")


-          def _str_comma_separated_to_set(value: str) -> FrozenSet[str]:
-              return frozenset(
-                  filter(lambda itm: itm != "", map(str.strip, value.strip(" \n\t").split(",")))
-              )
-
-
          def parse_args() -> Any:
              parser = ArgumentParser("Get dynamic rollout settings")
              parser.add_argument("--github-token", type=str, required=True, help="GitHub token")
@ -249,13 +181,6 @@ jobs:
                  required=True,
                  help="Current GitHub ref type, branch or tag",
              )
-              parser.add_argument(
-                  "--eligible-experiments",
-                  type=_str_comma_separated_to_set,
-                  required=False,
-                  default="",
-                  help="comma separated list of experiments to check, if omitted all experiments marked with default=True are checked",
-              )

              return parser.parse_args()

@ -271,14 +196,11 @@ jobs:


          def get_potential_pr_author(
-              github_token: str, repo: str, username: str, ref_type: str, ref_name: str
+              gh: Github, repo: str, username: str, ref_type: str, ref_name: str
          ) -> str:
              # If the trigger was a new tag added by a bot, this is a ciflow case
              # Fetch the actual username from the original PR. The PR number is
              # embedded in the tag name: ciflow/<name>/<pr-number>
-
-              gh = get_gh_client(github_token)
-
              if username == "pytorch-bot[bot]" and ref_type == "tag":
                  split_tag = ref_name.split("/")
                  if (
@ -300,261 +222,130 @@ jobs:


          def is_exception_branch(branch: str) -> bool:
-              """
-              Branches that get opted out of experiments by default, until they're explicitly enabled.
-              """
              return branch.split("/")[0] in {"main", "nightly", "release", "landchecks"}


-          def load_yaml(yaml_text: str) -> Any:
+          def get_workflow_type(issue: Issue, workflow_requestors: Iterable[str]) -> str:
              try:
-                  data = yaml.safe_load(yaml_text)
-                  return data
-              except yaml.YAMLError as exc:
-                  log.exception("Error loading YAML")
-                  raise
+                  first_comment = issue.get_comments()[0].body.strip("\n\t ")

-
-          def extract_settings_user_opt_in_from_text(rollout_state: str) -> Tuple[str, str]:
-              """
-              Extracts the text with settings, if any, and the opted in users from the rollout state.
-
-              If the issue body contains "---" then the text above that is the settings
-              and the text below is the list of opted in users.
-
-              If it doesn't contain "---" then the settings are empty and the rest is the users.
-              """
-              rollout_state_parts = rollout_state.split("---")
-              if len(rollout_state_parts) >= 2:
-                  return rollout_state_parts[0], rollout_state_parts[1]
-              else:
-                  return "", rollout_state
-
-
-          class UserOptins(Dict[str, List[str]]):
-              """
-              Dictionary of users with a list of features they have opted into
-              """
-
-
-          def parse_user_opt_in_from_text(user_optin_text: str) -> UserOptins:
-              """
-              Parse the user opt-in text into a key value pair of username and the list of features they have opted into
-
-              Users are GitHub usernames with the @ prefix. Each user is also a comma-separated list of features/experiments to enable.
-                  - Example line: "@User1,lf,split_build"
-                  - A "#" prefix indicates the user is opted out of all experiments
-
-
-              """
-              optins = UserOptins()
-              for user in user_optin_text.split("\n"):
-                  user = user.strip("\r\n\t -")
-                  if not user or not user.startswith("@"):
-                      # Not a valid user. Skip
-                      continue
-
-                  if user:
-                      usr_name = user.split(",")[0].strip("@")
-                      optins[usr_name] = [exp.strip(" ") for exp in user.split(",")[1:]]
-
-              return optins
-
-
-          def parse_settings_from_text(settings_text: str) -> Settings:
-              """
-              Parse the experiments from the issue body into a list of ExperimentSettings
-              """
-              try:
-                  if settings_text:
-                      # Escape the backtick as well so that we can have the settings in a code block on the GH issue
-                      # for easy reading
-                      # Note: Using ascii for the backtick so that the cat step in _runner-determinator.yml doesn't choke on
-                      #       the backtick character in shell commands.
-                      backtick = chr(96)  # backtick character
-                      settings_text = settings_text.strip(f"\r\n\t{backtick} ")
-                      settings = load_yaml(settings_text)
-
-                      # For now we just load experiments. We can expand this if/when we add more settings
-                      experiments = {}
-
-                      for exp_name, exp_settings in settings.get(SETTING_EXPERIMENTS).items():
-                          valid_settings = {}
-                          for setting in exp_settings:
-                              if setting not in Experiment._fields:
-                                  log.warning(
-                                      f"Unexpected setting in experiment: {setting} = {exp_settings[setting]}"
-                                  )
-                              else:
-                                  valid_settings[setting] = exp_settings[setting]
-
-                          experiments[exp_name] = Experiment(**valid_settings)
-                      return Settings(experiments)
-
-              except Exception:
-                  log.exception("Failed to parse settings")
-
-              return Settings()
-
-
-          def parse_settings(rollout_state: str) -> Settings:
-              """
-              Parse settings, if any, from the rollout state.
-
-              If the issue body contains "---" then the text above that is the settings
-              and the text below is the list of opted in users.
-
-              If it doesn't contain "---" then the settings are empty and the default values are used.
-              """
-              settings_text, _ = extract_settings_user_opt_in_from_text(rollout_state)
-              return parse_settings_from_text(settings_text)
-
-
-          def parse_users(rollout_state: str) -> UserOptins:
-              """
-              Parse users from the rollout state.
-
-              """
-              _, users_text = extract_settings_user_opt_in_from_text(rollout_state)
-              return parse_user_opt_in_from_text(users_text)
-
-
-          def is_user_opted_in(user: str, user_optins: UserOptins, experiment_name: str) -> bool:
-              """
-              Check if a user is opted into an experiment
-              """
-              return experiment_name in user_optins.get(user, [])
-
-
-          def get_runner_prefix(
-              rollout_state: str,
-              workflow_requestors: Iterable[str],
-              branch: str,
-              eligible_experiments: FrozenSet[str] = frozenset(),
-              is_canary: bool = False,
-          ) -> str:
-              settings = parse_settings(rollout_state)
-              user_optins = parse_users(rollout_state)
-
-              fleet_prefix = ""
-              prefixes = []
-              for experiment_name, experiment_settings in settings.experiments.items():
-                  if not experiment_settings.all_branches and is_exception_branch(branch):
-                      log.info(
-                          f"Branch {branch} is an exception branch. Not enabling experiment {experiment_name}."
-                      )
-                      continue
-
-                  if eligible_experiments:
-                      if experiment_name not in eligible_experiments:
-                          exp_list = ", ".join(eligible_experiments)
+                  if first_comment[0] == "!":
+                      log.info("LF Workflows are disabled for everyone. Using meta runners.")
+                      return WORKFLOW_LABEL_META
+                  elif first_comment[0] == "*":
+                      log.info("LF Workflows are enabled for everyone. Using LF runners.")
+                      return WORKFLOW_LABEL_LF
+                  else:
+                      all_opted_in_users = {
+                          usr_raw.strip("\n\t@ ").split(",")[0]
+                          for usr_raw in first_comment.split()
+                      }
+                      opted_in_requestors = {
+                          usr for usr in workflow_requestors if usr in all_opted_in_users
+                      }
+                      if opted_in_requestors:
                          log.info(
-                              f"Skipping experiment '{experiment_name}', as it is not in the eligible_experiments list: {exp_list}"
+                              f"LF Workflows are enabled for {', '.join(opted_in_requestors)}. Using LF runners."
                          )
-                          continue
-                  elif not experiment_settings.default:
-                      log.info(
-                          f"Skipping experiment '{experiment_name}', as it is not a default experiment"
-                      )
-                      continue
-
-                  # Is any workflow_requestor opted in to this experiment?
-                  opted_in_users = [
-                      requestor
-                      for requestor in workflow_requestors
-                      if is_user_opted_in(requestor, user_optins, experiment_name)
-                  ]
-
-                  enabled = False
-                  if opted_in_users:
-                      log.info(
-                          f"{', '.join(opted_in_users)} have opted into experiment {experiment_name}."
-                      )
-                      enabled = True
-
-                  elif experiment_settings.rollout_perc:
-                      # If no user is opted in, then we randomly enable the experiment based on the rollout percentage
-                      if random.uniform(0, 100) <= experiment_settings.rollout_perc:
-                          log.info(
-                              f"Based on rollout percentage of {experiment_settings.rollout_perc}%, enabling experiment {experiment_name}."
-                          )
-                          enabled = True
-
-                  if enabled:
-                      label = experiment_name
-                      if experiment_name == LF_FLEET_EXPERIMENT:
-                          # We give some special treatment to the "lf" experiment since determines the fleet we use
-                          #  - If it's enabled, then we always list it's prefix first
-                          #  - If we're in the canary branch, then we append ".c" to the lf prefix
-                          if is_canary:
-                              label += CANARY_FLEET_SUFFIX
-                          fleet_prefix = label
+                          return WORKFLOW_LABEL_LF
                      else:
-                          prefixes.append(label)
+                          log.info(
+                              f"LF Workflows are disabled for {', '.join(workflow_requestors)}. Using meta runners."
+                          )
+                          return WORKFLOW_LABEL_META

-              if len(prefixes) > 1:
+              except Exception as e:
                  log.error(
-                      f"Only a fleet and one other experiment can be enabled for a job at any time. Enabling {prefixes[0]} and ignoring the rest, which are {', '.join(prefixes[1:])}"
+                      f"Failed to get determine workflow type. Falling back to meta runners. Exception: {e}"
                  )
-                  prefixes = prefixes[:1]
-
-              # Fleet always comes first
-              if fleet_prefix:
-                  prefixes.insert(0, fleet_prefix)
-
-              return ".".join(prefixes) + "." if prefixes else ""
+                  return WORKFLOW_LABEL_META


-          def get_rollout_state_from_issue(github_token: str, repo: str, issue_num: int) -> str:
-              """
-              Gets the first comment of the issue, which contains the desired rollout state.
+          def get_optin_feature(
+              issue: Issue, workflow_requestors: Iterable[str], feature: str, fallback: str
+          ) -> str:
+              try:
+                  first_comment = issue.get_comments()[0].body.strip("\n\t ")
+                  userlist = {u.lstrip("#").strip("\n\t@ ") for u in first_comment.split()}
+                  all_opted_in_users = set()
+                  for user in userlist:
+                      for i in user.split(","):
+                          if i == feature:
+                              all_opted_in_users.add(user.split(",")[0])
+                  opted_in_requestors = {
+                      usr for usr in workflow_requestors if usr in all_opted_in_users
+                  }

-              The default issue we use - https://github.com/pytorch/test-infra/issues/5132
-              """
-              gh = get_gh_client(github_token)
-              issue = get_issue(gh, repo, issue_num)
-              return str(issue.get_comments()[0].body.strip("\n\t "))
+                  if opted_in_requestors:
+                      log.info(
+                          f"Feature {feature} is enabled for {', '.join(opted_in_requestors)}. Using feature {feature}."
+                      )
+                      return feature
+                  else:
+                      log.info(
+                          f"Feature {feature} is disabled for {', '.join(workflow_requestors)}. Using fallback \"{fallback}\"."
+                      )
+                      return fallback
+
+              except Exception as e:
+                  log.error(
+                      f'Failed to determine if user has opted-in to feature {feature}. Using fallback "{fallback}". Exception: {e}'
+                  )
+                  return fallback


          def main() -> None:
              args = parse_args()

-              runner_label_prefix = DEFAULT_LABEL_PREFIX
+              if args.github_ref_type == "branch" and is_exception_branch(args.github_branch):
+                  log.info(f"Exception branch: '{args.github_branch}', using meta runners")
+                  label_type = WORKFLOW_LABEL_META
+                  runner_ami = RUNNER_AMI_LEGACY
+              else:
+                  try:
+                      gh = get_gh_client(args.github_token)
+                      # The default issue we use - https://github.com/pytorch/test-infra/issues/5132
+                      issue = get_issue(gh, args.github_issue_repo, args.github_issue)
+                      username = get_potential_pr_author(
+                          gh,
+                          args.github_repo,
+                          args.github_actor,
+                          args.github_ref_type,
+                          args.github_branch,
+                      )
+                      label_type = get_workflow_type(
+                          issue,
+                          (
+                              args.github_issue_owner,
+                              username,
+                          ),
+                      )
+                      runner_ami = get_optin_feature(
+                          issue=issue,
+                          workflow_requestors=(
+                              args.github_issue_owner,
+                              username,
+                          ),
+                          feature=RUNNER_AMI_AMZ2023,
+                          fallback=RUNNER_AMI_LEGACY,
+                      )
+                  except Exception as e:
+                      log.error(
+                          f"Failed to get issue. Falling back to meta runners. Exception: {e}"
+                      )
+                      label_type = WORKFLOW_LABEL_META
+                      runner_ami = RUNNER_AMI_LEGACY

-              try:
-                  rollout_state = get_rollout_state_from_issue(
-                      args.github_token, args.github_issue_repo, args.github_issue
-                  )
+              # For Canary builds use canary runners
+              if args.github_repo == "pytorch/pytorch-canary" and label_type == WORKFLOW_LABEL_LF:
+                  label_type = WORKFLOW_LABEL_LF_CANARY

-                  username = get_potential_pr_author(
-                      args.github_token,
-                      args.github_repo,
-                      args.github_actor,
-                      args.github_ref_type,
-                      args.github_branch,
-                  )
-
-                  is_canary = args.github_repo == "pytorch/pytorch-canary"
-
-                  runner_label_prefix = get_runner_prefix(
-                      rollout_state,
-                      (args.github_issue_owner, username),
-                      args.github_branch,
-                      args.eligible_experiments,
-                      is_canary,
-                  )
-
-              except Exception as e:
-                  log.error(
-                      f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
-                  )
-
-              set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, runner_label_prefix)
+              set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, label_type)
+              set_github_output(GH_OUTPUT_KEY_AMI, runner_ami)


          if __name__ == "__main__":
              main()
-
          EOF

          cat runner_determinator.py
@ -576,5 +367,4 @@ jobs:
            --github-actor "$TRIGGERING_ACTOR" \
            --github-issue-owner "$ISSUE_OWNER" \
            --github-ref-type "$curr_ref_type" \
-            --github-repo "$GITHUB_REPOSITORY" \
-            --eligible-experiments "$CHECK_EXPERIMENTS" \
+            --github-repo "$GITHUB_REPOSITORY"
--- a/Show More
+++ b/Show More
 @ -1 +1 @@
 .1.0
 .0.0