Triton wheel build using 2.3.x branch (#122403 )

* Triton build 2.3.x * Revert "[Release Only] Build triton using pinned version rather branch (#121765)" This reverts commit d69c4219127e2cf5d9637b0daacc0a24e65f8133. * Triton wheel change * release
necessary change to make torch2.3 work with triton2.2 (#122139 )
2025-11-02 23:15:01 +08:00 · 2024-03-21 12:52:21 -04:00 · 2024-03-21 08:24:53 -04:00 · 2024-03-14 17:56:46 -04:00 · 2024-03-13 19:50:57 -04:00 · 2024-03-13 11:03:48 -04:00
5933 changed files with 507345 additions and 212800 deletions
--- a/.bazelignore
+++ b/.bazelignore
@ -1,4 +1,3 @@
 # We do not use this library in our Bazel build. It contains an
 # infinitely recursing symlink that makes Bazel very unhappy.
 third_party/ittapi/
-third_party/opentelemetry-cpp
--- a/.ci/docker/build.sh
+++ b/.ci/docker/build.sh
@ -84,27 +84,13 @@ fi
 # CMake 3.18 is needed to support CUDA17 language variant
 CMAKE_VERSION=3.18.5

-_UCX_COMMIT=7bb2722ff2187a0cad557ae4a6afa090569f83fb
-_UCC_COMMIT=20eae37090a4ce1b32bcce6144ccad0b49943e0b
+_UCX_COMMIT=00bcc6bb18fc282eb160623b4c0d300147f579af
+_UCC_COMMIT=7cb07a76ccedad7e56ceb136b865eb9319c258ea

 # It's annoying to rename jobs every time you want to rewrite a
 # configuration, so we hardcode everything here rather than do it
 # from scratch
 case "$image" in
-  pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9)
-    CUDA_VERSION=12.4.0
-    CUDNN_VERSION=8
-    ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=9
-    PROTOBUF=yes
-    DB=yes
-    VISION=yes
-    KATEX=yes
-    UCX_COMMIT=${_UCX_COMMIT}
-    UCC_COMMIT=${_UCC_COMMIT}
-    CONDA_CMAKE=yes
-    TRITON=yes
-    ;;
  pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9)
    CUDA_VERSION=12.1.1
    CUDNN_VERSION=8
@ -119,21 +105,6 @@ case "$image" in
    CONDA_CMAKE=yes
    TRITON=yes
    ;;
-  pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9-inductor-benchmarks)
-    CUDA_VERSION=12.4.0
-    CUDNN_VERSION=8
-    ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=9
-    PROTOBUF=yes
-    DB=yes
-    VISION=yes
-    KATEX=yes
-    UCX_COMMIT=${_UCX_COMMIT}
-    UCC_COMMIT=${_UCC_COMMIT}
-    CONDA_CMAKE=yes
-    TRITON=yes
-    INDUCTOR_BENCHMARKS=yes
-    ;;
  pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9-inductor-benchmarks)
    CUDA_VERSION=12.1.1
    CUDNN_VERSION=8
@ -163,20 +134,6 @@ case "$image" in
    CONDA_CMAKE=yes
    TRITON=yes
    ;;
-  pytorch-linux-focal-cuda12.4-cudnn8-py3-gcc9)
-    CUDA_VERSION=12.4.0
-    CUDNN_VERSION=8
-    ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=9
-    PROTOBUF=yes
-    DB=yes
-    VISION=yes
-    KATEX=yes
-    UCX_COMMIT=${_UCX_COMMIT}
-    UCC_COMMIT=${_UCC_COMMIT}
-    CONDA_CMAKE=yes
-    TRITON=yes
-    ;;
  pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9)
    CUDA_VERSION=12.1.1
    CUDNN_VERSION=8
@ -247,7 +204,7 @@ case "$image" in
    PROTOBUF=yes
    DB=yes
    VISION=yes
-    ROCM_VERSION=6.0
+    ROCM_VERSION=5.7
    NINJA_VERSION=1.9.0
    CONDA_CMAKE=yes
    TRITON=yes
@ -258,7 +215,7 @@ case "$image" in
    PROTOBUF=yes
    DB=yes
    VISION=yes
-    ROCM_VERSION=6.1
+    ROCM_VERSION=6.0
    NINJA_VERSION=1.9.0
    CONDA_CMAKE=yes
    TRITON=yes
@ -269,10 +226,9 @@ case "$image" in
    PROTOBUF=yes
    DB=yes
    VISION=yes
-    XPU_VERSION=0.5
+    BASEKIT_VERSION=2024.0.0-49522
    NINJA_VERSION=1.9.0
    CONDA_CMAKE=yes
-    TRITON=yes
    ;;
    pytorch-linux-jammy-py3.8-gcc11-inductor-benchmarks)
    ANACONDA_PYTHON_VERSION=3.8
@ -349,12 +305,6 @@ case "$image" in
    DB=yes
    VISION=yes
    CONDA_CMAKE=yes
-    # snadampal: skipping sccache due to the following issue
-    # https://github.com/pytorch/pytorch/issues/121559
-    SKIP_SCCACHE_INSTALL=yes
-    # snadampal: skipping llvm src build install because the current version
-    # from pytorch/llvm:9.0.1 is x86 specific
-    SKIP_LLVM_SRC_BUILD_INSTALL=yes
    ;;
  *)
    # Catch-all for builds that are not hardcoded.
@ -409,7 +359,7 @@ if [[ "$image" == *cuda*  && ${OS} == "ubuntu" ]]; then
 fi

 # Build image
-docker build \
+DOCKER_BUILDKIT=1 docker build \
       --no-cache \
       --progress=plain \
       --build-arg "BUILD_ENVIRONMENT=${image}" \
@ -446,10 +396,8 @@ docker build \
       --build-arg "DOCS=${DOCS}" \
       --build-arg "INDUCTOR_BENCHMARKS=${INDUCTOR_BENCHMARKS}" \
       --build-arg "EXECUTORCH=${EXECUTORCH}" \
-       --build-arg "XPU_VERSION=${XPU_VERSION}" \
+       --build-arg "BASEKIT_VERSION=${BASEKIT_VERSION}" \
       --build-arg "ACL=${ACL:-}" \
-       --build-arg "SKIP_SCCACHE_INSTALL=${SKIP_SCCACHE_INSTALL:-}" \
-       --build-arg "SKIP_LLVM_SRC_BUILD_INSTALL=${SKIP_LLVM_SRC_BUILD_INSTALL:-}" \
       -f $(dirname ${DOCKERFILE})/Dockerfile \
       -t "$tmp_tag" \
       "$@" \
--- a/.ci/docker/centos-rocm/Dockerfile
+++ b/.ci/docker/centos-rocm/Dockerfile
@ -62,7 +62,7 @@ RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
 RUN rm install_db.sh
 ENV INSTALLED_DB ${DB}

-# (optional) Install vision packages like OpenCV
+# (optional) Install vision packages like OpenCV and ffmpeg
 ARG VISION
 COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
 RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
--- a/.ci/docker/ci_commit_pins/executorch.txt
+++ b/.ci/docker/ci_commit_pins/executorch.txt
@ -1 +1 @@
-d4b3e5cc607e97afdba79dc90f8ef968142f347c
+e2a8f9548aecb62a68e264607174a7d207ed2929
--- a/.ci/docker/ci_commit_pins/triton-rocm.txt
+++ b/.ci/docker/ci_commit_pins/triton-rocm.txt
@ -1 +1 @@
-bbe6246e37d8aa791c67daaf9d9d61b26c9ccfdc
+0a22a91d04c2b4a029a69a198eac390089c3e891
--- a/.ci/docker/ci_commit_pins/triton-xpu.txt
+++ b/.ci/docker/ci_commit_pins/triton-xpu.txt
@ -1 +0,0 @@
-b8c64f64c18d8cac598b3adb355c21e7439c21de
--- a/.ci/docker/ci_commit_pins/triton.txt
+++ b/.ci/docker/ci_commit_pins/triton.txt
@ -1 +1 @@
-45fff310c891f5a92d55445adf8cc9d29df5841e
+79c6c9b209a5692b9a895398f4f3a033f8f80415
--- a/.ci/docker/common/install_base.sh
+++ b/.ci/docker/common/install_base.sh
@ -113,6 +113,7 @@ install_centos() {
    glibc-devel \
    glibc-headers \
    glog-devel \
+    hiredis-devel \
    libstdc++-devel \
    libsndfile-devel \
    make \
--- a/.ci/docker/common/install_cudnn.sh
+++ b/.ci/docker/common/install_cudnn.sh
@ -4,10 +4,7 @@ if [[ ${CUDNN_VERSION} == 8 ]]; then
    # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
    mkdir tmp_cudnn
    pushd tmp_cudnn
-    if [[ ${CUDA_VERSION:0:4} == "12.4" ]]; then
-        CUDNN_NAME="cudnn-linux-x86_64-8.9.7.29_cuda12-archive"
-        curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/${CUDNN_NAME}.tar.xz
-    elif [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then
+    if [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then
        CUDNN_NAME="cudnn-linux-x86_64-8.9.2.26_cuda12-archive"
        curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/${CUDNN_NAME}.tar.xz
    elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then
--- a/.ci/docker/common/install_cusparselt.sh
+++ b/.ci/docker/common/install_cusparselt.sh
@ -5,14 +5,9 @@ set -ex
 # cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html
 mkdir tmp_cusparselt && cd tmp_cusparselt

-if [[ ${CUDA_VERSION:0:4} =~ ^12\.[1-4]$ ]]; then
-    arch_path='sbsa'
-    export TARGETARCH=${TARGETARCH:-$(uname -m)}
-    if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
-        arch_path='x86_64'
-    fi
-    CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.5.2.1-archive"
-    curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz
+if [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then
+    CUSPARSELT_NAME="libcusparse_lt-linux-x86_64-0.5.2.1-archive"
+    curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/${CUSPARSELT_NAME}.tar.xz
 elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then
    CUSPARSELT_NAME="libcusparse_lt-linux-x86_64-0.4.0.7-archive"
    curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/${CUSPARSELT_NAME}.tar.xz
--- a/.ci/docker/common/install_db.sh
+++ b/.ci/docker/common/install_db.sh
@ -4,6 +4,11 @@ set -ex

 install_ubuntu() {
  apt-get update
+  apt-get install -y --no-install-recommends \
+          libhiredis-dev \
+          libleveldb-dev \
+          liblmdb-dev \
+          libsnappy-dev

  # Cleanup
  apt-get autoclean && apt-get clean
@ -15,6 +20,12 @@ install_centos() {
  # See http://fedoraproject.org/wiki/EPEL
  yum --enablerepo=extras install -y epel-release

+  yum install -y \
+      hiredis-devel \
+      leveldb-devel \
+      lmdb-devel \
+      snappy-devel
+
  # Cleanup
  yum clean all
  rm -rf /var/cache/yum
--- a/.ci/docker/common/install_onnx.sh
+++ b/.ci/docker/common/install_onnx.sh
@ -33,12 +33,12 @@ pip_install coloredlogs packaging
 pip_install onnxruntime==1.17.0
 pip_install onnx==1.15.0
 # pip_install "onnxscript@git+https://github.com/microsoft/onnxscript@3e869ef8ccf19b5ebd21c10d3e9c267c9a9fa729" --no-deps
-pip_install onnxscript==0.1.0.dev20240315 --no-deps
+pip_install onnxscript==0.1.0.dev20240301 --no-deps

 # Cache the transformers model to be used later by ONNX tests. We need to run the transformers
 # package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/
 IMPORT_SCRIPT_FILENAME="/tmp/onnx_import_script.py"
-as_jenkins echo 'import transformers; transformers.AutoModel.from_pretrained("sshleifer/tiny-gpt2"); transformers.AutoTokenizer.from_pretrained("sshleifer/tiny-gpt2"); transformers.AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large-v3");' > "${IMPORT_SCRIPT_FILENAME}"
+as_jenkins echo 'import transformers; transformers.AutoModel.from_pretrained("sshleifer/tiny-gpt2"); transformers.AutoTokenizer.from_pretrained("sshleifer/tiny-gpt2");' > "${IMPORT_SCRIPT_FILENAME}"

 # Need a PyTorch version for transformers to work
 pip_install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
--- a/.ci/docker/common/install_protobuf.sh
+++ b/.ci/docker/common/install_protobuf.sh
@ -11,8 +11,7 @@ mkdir -p $pb_dir
 ln -s /usr/lib64 "$pb_dir/lib64"

 curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v3.17.3/protobuf-all-3.17.3.tar.gz" --retry 3
-
-tar -xvz --no-same-owner -C "$pb_dir" --strip-components 1 -f protobuf-all-3.17.3.tar.gz
+tar -xvz -C "$pb_dir" --strip-components 1 -f protobuf-all-3.17.3.tar.gz
 NPROC=$[$(nproc) - 2]
 pushd "$pb_dir" && ./configure && make -j${NPROC} && make -j${NPROC} check && sudo make -j${NRPOC} install && sudo ldconfig
 popd
--- a/.ci/docker/common/install_rocm.sh
+++ b/.ci/docker/common/install_rocm.sh
@ -6,6 +6,9 @@ ver() {
    printf "%3d%03d%03d%03d" $(echo "$1" | tr '.' ' ');
 }

+# Map ROCm version to AMDGPU version
+declare -A AMDGPU_VERSIONS=( ["5.0"]="21.50" ["5.1.1"]="22.10.1" ["5.2"]="22.20" )
+
 install_ubuntu() {
    apt-get update
    if [[ $UBUNTU_VERSION == 18.04 ]]; then
@ -23,14 +26,31 @@ install_ubuntu() {
    apt-get install -y libc++1
    apt-get install -y libc++abi1

-    # Add amdgpu repository
-    UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'`
-    echo "deb [arch=amd64] https://repo.radeon.com/amdgpu/${ROCM_VERSION}/ubuntu ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list
+    if [[ $(ver $ROCM_VERSION) -ge $(ver 4.5) ]]; then
+        # Add amdgpu repository
+        UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'`
+        local amdgpu_baseurl
+        if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then
+          amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/ubuntu"
+        else
+          amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/ubuntu"
+        fi
+        echo "deb [arch=amd64] ${amdgpu_baseurl} ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list
+    fi
+
+    ROCM_REPO="ubuntu"
+    if [[ $(ver $ROCM_VERSION) -lt $(ver 4.2) ]]; then
+        ROCM_REPO="xenial"
+    fi
+
+    if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then
+        ROCM_REPO="${UBUNTU_VERSION_NAME}"
+    fi

    # Add rocm repository
    wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
    local rocm_baseurl="http://repo.radeon.com/rocm/apt/${ROCM_VERSION}"
-    echo "deb [arch=amd64] ${rocm_baseurl} ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/rocm.list
+    echo "deb [arch=amd64] ${rocm_baseurl} ${ROCM_REPO} main" > /etc/apt/sources.list.d/rocm.list
    apt-get update --allow-insecure-repositories

    DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
@ -41,25 +61,32 @@ install_ubuntu() {
                   rocprofiler-dev \
                   roctracer-dev

-    if [[ $(ver $ROCM_VERSION) -ge $(ver 6.1) ]]; then
-        DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated rocm-llvm-dev
-    fi
-
    # precompiled miopen kernels added in ROCm 3.5, renamed in ROCm 5.5
    # search for all unversioned packages
    # if search fails it will abort this script; use true to avoid case where search fails
-    MIOPENHIPGFX=$(apt-cache search --names-only miopen-hip-gfx | awk '{print $1}' | grep -F -v . || true)
-    if [[ "x${MIOPENHIPGFX}" = x ]]; then
-      echo "miopen-hip-gfx package not available" && exit 1
+    if [[ $(ver $ROCM_VERSION) -ge $(ver 5.5) ]]; then
+        MIOPENHIPGFX=$(apt-cache search --names-only miopen-hip-gfx | awk '{print $1}' | grep -F -v . || true)
+        if [[ "x${MIOPENHIPGFX}" = x ]]; then
+          echo "miopen-hip-gfx package not available" && exit 1
+        else
+          DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated ${MIOPENHIPGFX}
+        fi
    else
-      DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated ${MIOPENHIPGFX}
+        MIOPENKERNELS=$(apt-cache search --names-only miopenkernels | awk '{print $1}' | grep -F -v . || true)
+        if [[ "x${MIOPENKERNELS}" = x ]]; then
+          echo "miopenkernels package not available" && exit 1
+        else
+          DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated ${MIOPENKERNELS}
+        fi
    fi

    # ROCm 6.0 had a regression where journal_mode was enabled on the kdb files resulting in permission errors at runtime
-    for kdb in /opt/rocm/share/miopen/db/*.kdb
-    do
-        sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
-    done
+    if [[ $(ver $ROCM_VERSION) -ge $(ver 6.0) ]]; then
+        for kdb in /opt/rocm/share/miopen/db/*.kdb
+        do
+            sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
+        done
+    fi

    # Cleanup
    apt-get autoclean && apt-get clean
@ -76,19 +103,25 @@ install_centos() {
  yum install -y epel-release
  yum install -y dkms kernel-headers-`uname -r` kernel-devel-`uname -r`

-  # Add amdgpu repository
-  local amdgpu_baseurl
-  if [[ $OS_VERSION == 9 ]]; then
-      amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/9.0/main/x86_64"
-  else
-      amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/7.9/main/x86_64"
+  if [[ $(ver $ROCM_VERSION) -ge $(ver 4.5) ]]; then
+      # Add amdgpu repository
+      local amdgpu_baseurl
+      if [[ $OS_VERSION == 9 ]]; then
+          amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/rhel/9.0/main/x86_64"
+      else
+        if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then
+          amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/7.9/main/x86_64"
+        else
+          amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/rhel/7.9/main/x86_64"
+        fi
+      fi
+      echo "[AMDGPU]" > /etc/yum.repos.d/amdgpu.repo
+      echo "name=AMDGPU" >> /etc/yum.repos.d/amdgpu.repo
+      echo "baseurl=${amdgpu_baseurl}" >> /etc/yum.repos.d/amdgpu.repo
+      echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo
+      echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo
+      echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo
  fi
-  echo "[AMDGPU]" > /etc/yum.repos.d/amdgpu.repo
-  echo "name=AMDGPU" >> /etc/yum.repos.d/amdgpu.repo
-  echo "baseurl=${amdgpu_baseurl}" >> /etc/yum.repos.d/amdgpu.repo
-  echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo
-  echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo
-  echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo

  local rocm_baseurl="http://repo.radeon.com/rocm/yum/${ROCM_VERSION}"
  echo "[ROCm]" > /etc/yum.repos.d/rocm.repo
@ -110,18 +143,29 @@ install_centos() {

  # precompiled miopen kernels; search for all unversioned packages
  # if search fails it will abort this script; use true to avoid case where search fails
-  MIOPENHIPGFX=$(yum -q search miopen-hip-gfx | grep miopen-hip-gfx | awk '{print $1}'| grep -F kdb. || true)
-  if [[ "x${MIOPENHIPGFX}" = x ]]; then
-    echo "miopen-hip-gfx package not available" && exit 1
+  if [[ $(ver $ROCM_VERSION) -ge $(ver 5.5) ]]; then
+      MIOPENHIPGFX=$(yum -q search miopen-hip-gfx | grep miopen-hip-gfx | awk '{print $1}'| grep -F kdb. || true)
+      if [[ "x${MIOPENHIPGFX}" = x ]]; then
+        echo "miopen-hip-gfx package not available" && exit 1
+      else
+        yum install -y ${MIOPENHIPGFX}
+      fi
  else
-    yum install -y ${MIOPENHIPGFX}
+      MIOPENKERNELS=$(yum -q search miopenkernels | grep miopenkernels- | awk '{print $1}'| grep -F kdb. || true)
+      if [[ "x${MIOPENKERNELS}" = x ]]; then
+        echo "miopenkernels package not available" && exit 1
+      else
+        yum install -y ${MIOPENKERNELS}
+      fi
  fi

  # ROCm 6.0 had a regression where journal_mode was enabled on the kdb files resulting in permission errors at runtime
-  for kdb in /opt/rocm/share/miopen/db/*.kdb
-  do
-      sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
-  done
+  if [[ $(ver $ROCM_VERSION) -ge $(ver 6.0) ]]; then
+      for kdb in /opt/rocm/share/miopen/db/*.kdb
+      do
+          sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
+      done
+  fi

  # Cleanup
  yum clean all
--- a/.ci/docker/common/install_triton.sh
+++ b/.ci/docker/common/install_triton.sh
@ -13,11 +13,8 @@ conda_reinstall() {
 }

 if [ -n "${ROCM_VERSION}" ]; then
-  TRITON_REPO="https://github.com/openai/triton"
+  TRITON_REPO="https://github.com/ROCmSoftwarePlatform/triton"
  TRITON_TEXT_FILE="triton-rocm"
-elif [ -n "${XPU_VERSION}" ]; then
-  TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton"
-  TRITON_TEXT_FILE="triton-xpu"
 else
  TRITON_REPO="https://github.com/openai/triton"
  TRITON_TEXT_FILE="triton"
--- a/.ci/docker/common/install_vision.sh
+++ b/.ci/docker/common/install_vision.sh
@ -5,7 +5,8 @@ set -ex
 install_ubuntu() {
  apt-get update
  apt-get install -y --no-install-recommends \
-          libopencv-dev
+          libopencv-dev \
+          libavcodec-dev

  # Cleanup
  apt-get autoclean && apt-get clean
@ -18,7 +19,8 @@ install_centos() {
  yum --enablerepo=extras install -y epel-release

  yum install -y \
-      opencv-devel
+      opencv-devel \
+      ffmpeg-devel

  # Cleanup
  yum clean all
--- a/.ci/docker/common/install_xpu.sh
+++ b/.ci/docker/common/install_xpu.sh
@ -3,7 +3,10 @@ set -xe


 # Intel® software for general purpose GPU capabilities.
-# Refer to https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpus.html
+# Refer to https://dgpu-docs.intel.com/releases/stable_647_21_20230714.html
+
+# Intel® oneAPI Base Toolkit (version 2024.0.0) has been updated to include functional and security updates.
+# Refer to https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html

 # Users should update to the latest version as it becomes available

@ -14,16 +17,14 @@ function install_ubuntu() {
    # Set up the repository. To do this, download the key to the system keyring
    wget -qO - https://repositories.intel.com/gpu/intel-graphics.key \
        | gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg
-    wget -qO - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
-        | gpg --dearmor --output /usr/share/keyrings/intel-for-pytorch-gpu-dev-keyring.gpg
+    wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
+        | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null

    # Add the signed entry to APT sources and configure the APT client to use the Intel repository
-    echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] \
-        https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" \
+    echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/production/2328 unified" \
        | tee /etc/apt/sources.list.d/intel-gpu-jammy.list
-    echo "deb [signed-by=/usr/share/keyrings/intel-for-pytorch-gpu-dev-keyring.gpg] \
-        https://apt.repos.intel.com/intel-for-pytorch-gpu-dev all main" \
-        | tee /etc/apt/sources.list.d/intel-for-pytorch-gpu-dev.list
+    echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \
+        | tee /etc/apt/sources.list.d/oneAPI.list

    # Update the packages list and repository index
    apt-get update
@ -39,11 +40,11 @@ function install_ubuntu() {
        mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo
    # Development Packages
    apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev
-    # Install Intel Support Packages
-    if [ -n "$XPU_VERSION" ]; then
-        apt-get install -y intel-for-pytorch-gpu-dev-${XPU_VERSION}
+    # Install Intel® oneAPI Base Toolkit
+    if [ -n "$BASEKIT_VERSION" ]; then
+        apt-get install intel-basekit=$BASEKIT_VERSION -y
    else
-        apt-get install -y intel-for-pytorch-gpu-dev
+        apt-get install intel-basekit -y
    fi

    # Cleanup
--- a/.ci/docker/requirements-ci.txt
+++ b/.ci/docker/requirements-ci.txt
@ -85,10 +85,10 @@ librosa>=0.6.2 ; python_version < "3.11"
 #Pinned versions:
 #test that import:

-mypy==1.9.0
+mypy==1.8.0
 # Pin MyPy version because new errors are likely to appear with each release
 #Description: linter
-#Pinned versions: 1.9.0
+#Pinned versions: 1.8.0
 #test that import: test_typing.py, test_type_hints.py

 networkx==2.8.8
@ -134,9 +134,9 @@ opt-einsum==3.3
 #Pinned versions: 3.3
 #test that import: test_linalg.py

-optree==0.11.0
+optree==0.9.1
 #Description: A library for tree manipulation
-#Pinned versions: 0.11.0
+#Pinned versions: 0.9.1
 #test that import: test_vmap.py, test_aotdispatch.py, test_dynamic_shapes.py,
 #test_pytree.py, test_ops.py, test_control_flow.py, test_modules.py,
 #common_utils.py, test_eager_transforms.py, test_python_dispatch.py,
@ -147,9 +147,9 @@ optree==0.11.0
 #test_pointwise_ops.py, test_dtensor_ops.py, test_torchinductor.py, test_fx.py,
 #test_fake_tensor.py, test_mps.py

-pillow==10.3.0
+pillow==10.2.0
 #Description:  Python Imaging Library fork
-#Pinned versions: 10.3.0
+#Pinned versions: 10.2.0
 #test that import:

 protobuf==3.20.2
@ -228,11 +228,12 @@ scikit-image==0.20.0 ; python_version >= "3.10"
 #Pinned versions: 0.20.3
 #test that import:

-scipy==1.10.1 ; python_version <= "3.11"
-scipy==1.12.0 ; python_version == "3.12"
+scipy==1.6.3 ; python_version < "3.10"
+scipy==1.8.1 ; python_version == "3.10"
+scipy==1.10.1 ; python_version == "3.11"
 # Pin SciPy because of failing distribution tests (see #60347)
 #Description: scientific python
-#Pinned versions: 1.10.1
+#Pinned versions: 1.6.3
 #test that import: test_unary_ufuncs.py, test_torch.py,test_tensor_creation_ops.py
 #test_spectral_ops.py, test_sparse_csr.py, test_reductions.py,test_nn.py
 #test_linalg.py, test_binary_ufuncs.py
@ -263,10 +264,10 @@ unittest-xml-reporting<=3.2.0,>=2.0.0
 #Pinned versions:
 #test that import:

-#lintrunner is supported on aarch64-linux only from 0.12.4 version
-lintrunner==0.12.5
+#wheel not found on aarch64, and source build requires rust
+lintrunner==0.10.7 ; platform_machine == "x86_64"
 #Description: all about linters!
-#Pinned versions: 0.12.5
+#Pinned versions: 0.10.7
 #test that import:

 rockset==1.0.3
@ -279,9 +280,9 @@ ghstack==0.8.0
 #Pinned versions: 0.8.0
 #test that import:

-jinja2==3.1.4
+jinja2==3.1.3
 #Description: jinja2 template engine
-#Pinned versions: 3.1.4
+#Pinned versions: 3.1.3
 #test that import:

 pytest-cpp==2.3.0
@ -310,5 +311,3 @@ lxml==5.0.0.
 #Description: This is a requirement of unittest-xml-reporting

 # Python-3.9 binaries
-
-PyGithub==2.3.0
--- a/.ci/docker/triton_version.txt
+++ b/.ci/docker/triton_version.txt
@ -1 +1 @@
-3.0.0
+2.3.0
--- a/.ci/docker/ubuntu-cuda/Dockerfile
+++ b/.ci/docker/ubuntu-cuda/Dockerfile
@ -56,7 +56,7 @@ RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
 RUN rm install_db.sh
 ENV INSTALLED_DB ${DB}

-# (optional) Install vision packages like OpenCV
+# (optional) Install vision packages like OpenCV and ffmpeg
 ARG VISION
 COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
 RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
@ -152,7 +152,6 @@ RUN rm install_cusparselt.sh
 RUN if [ -h /usr/local/cuda-11.6/cuda-11.6 ]; then rm /usr/local/cuda-11.6/cuda-11.6; fi
 RUN if [ -h /usr/local/cuda-11.7/cuda-11.7 ]; then rm /usr/local/cuda-11.7/cuda-11.7; fi
 RUN if [ -h /usr/local/cuda-12.1/cuda-12.1 ]; then rm /usr/local/cuda-12.1/cuda-12.1; fi
-RUN if [ -h /usr/local/cuda-12.1/cuda-12.4 ]; then rm /usr/local/cuda-12.1/cuda-12.4; fi

 USER jenkins
 CMD ["bash"]
--- a/.ci/docker/ubuntu-rocm/Dockerfile
+++ b/.ci/docker/ubuntu-rocm/Dockerfile
@ -53,7 +53,7 @@ RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
 RUN rm install_db.sh
 ENV INSTALLED_DB ${DB}

-# (optional) Install vision packages like OpenCV
+# (optional) Install vision packages like OpenCV and ffmpeg
 ARG VISION
 COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
 RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
--- a/.ci/docker/ubuntu-xpu/Dockerfile
+++ b/.ci/docker/ubuntu-xpu/Dockerfile
@ -61,20 +61,15 @@ COPY ci_commit_pins/timm.txt timm.txt
 RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
 RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt

-# Install XPU Dependencies
-ARG XPU_VERSION
-COPY ./common/install_xpu.sh install_xpu.sh
-RUN bash ./install_xpu.sh && rm install_xpu.sh
-
 ARG TRITON
 # Install triton, this needs to be done before sccache because the latter will
 # try to reach out to S3, which docker build runners don't have access
 COPY ./common/install_triton.sh install_triton.sh
 COPY ./common/common_utils.sh common_utils.sh
-COPY ci_commit_pins/triton-xpu.txt triton-xpu.txt
-COPY triton_version.txt triton_version.txt
+# TODO: will add triton xpu commit
+COPY ci_commit_pins/triton.txt triton.txt
 RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
-RUN rm install_triton.sh common_utils.sh triton-xpu.txt triton_version.txt
+RUN rm install_triton.sh common_utils.sh triton.txt

 # (optional) Install database packages like LMDB and LevelDB
 ARG DB
@ -83,13 +78,18 @@ RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
 RUN rm install_db.sh
 ENV INSTALLED_DB ${DB}

-# (optional) Install vision packages like OpenCV
+# (optional) Install vision packages like OpenCV and ffmpeg
 ARG VISION
 COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
 RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
 RUN rm install_vision.sh cache_vision_models.sh common_utils.sh
 ENV INSTALLED_VISION ${VISION}

+# Install XPU Dependencies
+ARG BASEKIT_VERSION
+COPY ./common/install_xpu.sh install_xpu.sh
+RUN bash ./install_xpu.sh && rm install_xpu.sh
+
 # (optional) Install non-default CMake version
 ARG CMAKE_VERSION
 COPY ./common/install_cmake.sh install_cmake.sh
--- a/.ci/docker/ubuntu/Dockerfile
+++ b/.ci/docker/ubuntu/Dockerfile
@ -80,7 +80,7 @@ RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
 RUN rm install_db.sh
 ENV INSTALLED_DB ${DB}

-# (optional) Install vision packages like OpenCV
+# (optional) Install vision packages like OpenCV and ffmpeg
 ARG VISION
 COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
 RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
@ -169,11 +169,9 @@ RUN rm install_acl.sh
 ENV INSTALLED_ACL ${ACL}

 # Install ccache/sccache (do this last, so we get priority in PATH)
-ARG SKIP_SCCACHE_INSTALL
 COPY ./common/install_cache.sh install_cache.sh
 ENV PATH /opt/cache/bin:$PATH
-RUN if [ -z "${SKIP_SCCACHE_INSTALL}" ]; then bash ./install_cache.sh; fi
-RUN rm install_cache.sh
+RUN bash ./install_cache.sh && rm install_cache.sh

 # Add jni.h for java host build
 COPY ./common/install_jni.sh install_jni.sh
@ -190,9 +188,7 @@ ARG BUILD_ENVIRONMENT
 ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}

 # Install LLVM dev version (Defined in the pytorch/builder github repository)
-ARG SKIP_LLVM_SRC_BUILD_INSTALL
 COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
-RUN if [ -n "${SKIP_LLVM_SRC_BUILD_INSTALL}" ]; then set -eu; rm -rf /opt/llvm; fi

 # AWS specific CUDA build guidance
 ENV TORCH_CUDA_ARCH_LIST Maxwell
--- a/.ci/onnx/common.sh
+++ b/.ci/onnx/common.sh
@ -1,9 +1,5 @@
-#!/bin/bash
-
 set -ex

-source "$(dirname "${BASH_SOURCE[0]}")/../pytorch/common_utils.sh"
-
 LOCAL_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 ROOT_DIR=$(cd "$LOCAL_DIR"/../.. && pwd)
 TEST_DIR="$ROOT_DIR/test"
--- a/.ci/onnx/test.sh
+++ b/.ci/onnx/test.sh
@ -3,20 +3,6 @@
 # shellcheck source=./common.sh
 source "$(dirname "${BASH_SOURCE[0]}")/common.sh"

-# Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96)
-WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace")
-cleanup_workspace() {
-  echo "sudo may print the following warning message that can be ignored. The chown command will still run."
-  echo "    sudo: setrlimit(RLIMIT_STACK): Operation not permitted"
-  echo "For more details refer to https://github.com/sudo-project/sudo/issues/42"
-  sudo chown -R "$WORKSPACE_ORIGINAL_OWNER_ID" /var/lib/jenkins/workspace
-}
-# Disable shellcheck SC2064 as we want to parse the original owner immediately.
-# shellcheck disable=SC2064
-trap_add cleanup_workspace EXIT
-sudo chown -R jenkins /var/lib/jenkins/workspace
-git config --global --add safe.directory /var/lib/jenkins/workspace
-
 if [[ "$BUILD_ENVIRONMENT" == *onnx* ]]; then
  # TODO: This can be removed later once vision is also part of the Docker image
  pip install -q --user --no-use-pep517 "git+https://github.com/pytorch/vision.git@$(cat .github/ci_commit_pins/vision.txt)"
--- a/.ci/pytorch/build.sh
+++ b/.ci/pytorch/build.sh
@ -44,6 +44,11 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda11* ]]; then
  fi
 fi

+if [[ ${BUILD_ENVIRONMENT} == *"caffe2"* ]]; then
+  echo "Caffe2 build is ON"
+  export BUILD_CAFFE2=ON
+fi
+
 if [[ ${BUILD_ENVIRONMENT} == *"paralleltbb"* ]]; then
  export ATEN_THREADING=TBB
  export USE_TBB=1
@ -76,22 +81,7 @@ if ! which conda; then
    export USE_MKLDNN=0
  fi
 else
-  # CMAKE_PREFIX_PATH precedences
-  # 1. $CONDA_PREFIX, if defined. This follows the pytorch official build instructions.
-  # 2. /opt/conda/envs/py_${ANACONDA_PYTHON_VERSION}, if ANACONDA_PYTHON_VERSION defined.
-  #    This is for CI, which defines ANACONDA_PYTHON_VERSION but not CONDA_PREFIX.
-  # 3. $(conda info --base). The fallback value of pytorch official build
-  #    instructions actually refers to this.
-  #    Commonly this is /opt/conda/
-  if [[ -v CONDA_PREFIX ]]; then
-    export CMAKE_PREFIX_PATH=${CONDA_PREFIX}
-  elif [[ -v ANACONDA_PYTHON_VERSION ]]; then
-    export CMAKE_PREFIX_PATH="/opt/conda/envs/py_${ANACONDA_PYTHON_VERSION}"
-  else
-    # already checked by `! which conda`
-    CMAKE_PREFIX_PATH="$(conda info --base)"
-    export CMAKE_PREFIX_PATH
-  fi
+  export CMAKE_PREFIX_PATH=/opt/conda

  # Workaround required for MKL library linkage
  # https://github.com/pytorch/pytorch/issues/119557
@ -233,24 +223,6 @@ if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* ]]
  export BUILD_STATIC_RUNTIME_BENCHMARK=ON
 fi

-# Do not change workspace permissions for ROCm CI jobs
-# as it can leave workspace with bad permissions for cancelled jobs
-if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
-  # Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96)
-  WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace")
-  cleanup_workspace() {
-    echo "sudo may print the following warning message that can be ignored. The chown command will still run."
-    echo "    sudo: setrlimit(RLIMIT_STACK): Operation not permitted"
-    echo "For more details refer to https://github.com/sudo-project/sudo/issues/42"
-    sudo chown -R "$WORKSPACE_ORIGINAL_OWNER_ID" /var/lib/jenkins/workspace
-  }
-  # Disable shellcheck SC2064 as we want to parse the original owner immediately.
-  # shellcheck disable=SC2064
-  trap_add cleanup_workspace EXIT
-  sudo chown -R jenkins /var/lib/jenkins/workspace
-  git config --global --add safe.directory /var/lib/jenkins/workspace
-fi
-
 if [[ "$BUILD_ENVIRONMENT" == *-bazel-* ]]; then
  set -e

@ -276,17 +248,13 @@ else
  ( ! get_exit_code python setup.py clean bad_argument )

  if [[ "$BUILD_ENVIRONMENT" != *libtorch* ]]; then
+
    # rocm builds fail when WERROR=1
    # XLA test build fails when WERROR=1
    # set only when building other architectures
    # or building non-XLA tests.
    if [[ "$BUILD_ENVIRONMENT" != *rocm*  &&
          "$BUILD_ENVIRONMENT" != *xla* ]]; then
-      if [[ "$BUILD_ENVIRONMENT" != *py3.8* ]]; then
-        # Install numpy-2.0 release candidate for builds
-        # Which should be backward compatible with Numpy-1.X
-        python -mpip install --pre numpy==2.0.0rc1
-      fi
      WERROR=1 python setup.py bdist_wheel
    else
      python setup.py bdist_wheel
@ -386,8 +354,4 @@ if [[ "$BUILD_ENVIRONMENT" != *libtorch* && "$BUILD_ENVIRONMENT" != *bazel* ]];
  python tools/stats/export_test_times.py
 fi

-# snadampal: skipping it till sccache support added for aarch64
-# https://github.com/pytorch/pytorch/issues/121559
-if [[ "$BUILD_ENVIRONMENT" != *aarch64* ]]; then
-  print_sccache_stats
-fi
+print_sccache_stats
--- a/.ci/pytorch/common_utils.sh
+++ b/.ci/pytorch/common_utils.sh
@ -159,7 +159,7 @@ function install_torchvision() {
 }

 function install_tlparse() {
-  pip_install --user "tlparse==0.3.7"
+  pip_install --user "tlparse==0.3.5"
  PATH="$(python -m site --user-base)/bin:$PATH"
 }

@ -178,7 +178,7 @@ function install_torchrec_and_fbgemm() {

 function clone_pytorch_xla() {
  if [[ ! -d ./xla ]]; then
-    git clone --recursive --quiet https://github.com/pytorch/xla.git
+    git clone --recursive -b r2.3 https://github.com/pytorch/xla.git
    pushd xla
    # pin the xla hash so that we don't get broken by changes to xla
    git checkout "$(cat ../.github/ci_commit_pins/xla.txt)"
--- a/.ci/pytorch/docs-test.sh
+++ b/.ci/pytorch/docs-test.sh
@ -6,4 +6,4 @@ source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
 echo "Testing pytorch docs"

 cd docs
-TERM=vt100 make doctest
+make doctest
--- a/.ci/pytorch/multigpu-test.sh
+++ b/.ci/pytorch/multigpu-test.sh
@ -45,10 +45,6 @@ time python test/run_test.py --verbose -i distributed/test_device_mesh
 time python test/run_test.py --verbose -i distributed/tensor/parallel/test_ddp_2d_parallel
 time python test/run_test.py --verbose -i distributed/tensor/parallel/test_fsdp_2d_parallel
 time python test/run_test.py --verbose -i distributed/tensor/parallel/test_tp_examples
-time python test/run_test.py --verbose -i distributed/tensor/parallel/test_tp_random_state
-
-# FSDP2 tests
-time python test/run_test.py --verbose -i distributed/_composable/fsdp/test_fully_shard_training -- -k test_2d_mlp_with_nd_mesh

 # Other tests
 time python test/run_test.py --verbose -i test_cuda_primary_ctx
--- a/.ci/pytorch/perf_test/compare_with_baseline.py
+++ b/.ci/pytorch/perf_test/compare_with_baseline.py
@ -59,16 +59,16 @@ print("sample mean: ", sample_mean)
 print("sample sigma: ", sample_sigma)

 if math.isnan(sample_mean):
-    raise Exception("""Error: sample mean is NaN""")  # noqa: TRY002
+    raise Exception("""Error: sample mean is NaN""")
 elif math.isnan(sample_sigma):
-    raise Exception("""Error: sample sigma is NaN""")  # noqa: TRY002
+    raise Exception("""Error: sample sigma is NaN""")

 z_value = (sample_mean - mean) / sigma

 print("z-value: ", z_value)

 if z_value >= 3:
-    raise Exception(  # noqa: TRY002
+    raise Exception(
        f"""\n
 z-value >= 3, there is high chance of perf regression.\n
 To reproduce this regression, run
--- a/.ci/pytorch/python_doc_push_script.sh
+++ b/.ci/pytorch/python_doc_push_script.sh
@ -26,8 +26,8 @@ echo "error: python_doc_push_script.sh: version (arg2) not specified"
 fi

 # Argument 1: Where to copy the built documentation to
-# (pytorch_docs/$install_path)
-install_path="${1:-${DOCS_INSTALL_PATH:-${DOCS_VERSION}}}"
+# (pytorch.github.io/$install_path)
+install_path="${1:-${DOCS_INSTALL_PATH:-docs/${DOCS_VERSION}}}"
 if [ -z "$install_path" ]; then
 echo "error: python_doc_push_script.sh: install_path (arg1) not specified"
  exit 1
@ -68,8 +68,8 @@ build_docs () {
 }


-git clone https://github.com/pytorch/docs pytorch_docs -b "$branch" --depth 1
-pushd pytorch_docs
+git clone https://github.com/pytorch/pytorch.github.io -b "$branch" --depth 1
+pushd pytorch.github.io

 export LC_ALL=C
 export PATH=/opt/conda/bin:$PATH
@ -105,7 +105,6 @@ if [ "$is_main_doc" = true ]; then
    echo undocumented objects found:
    cat build/coverage/python.txt
    echo "Make sure you've updated relevant .rsts in docs/source!"
-    echo "You can reproduce locally by running 'cd docs && make coverage && cat build/coverage/python.txt'"
    exit 1
  fi
 else
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@ -6,27 +6,6 @@

 set -ex

-# shellcheck source=./common.sh
-source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
-
-# Do not change workspace permissions for ROCm CI jobs
-# as it can leave workspace with bad permissions for cancelled jobs
-if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
-  # Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96)
-  WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace")
-  cleanup_workspace() {
-    echo "sudo may print the following warning message that can be ignored. The chown command will still run."
-    echo "    sudo: setrlimit(RLIMIT_STACK): Operation not permitted"
-    echo "For more details refer to https://github.com/sudo-project/sudo/issues/42"
-    sudo chown -R "$WORKSPACE_ORIGINAL_OWNER_ID" /var/lib/jenkins/workspace
-  }
-  # Disable shellcheck SC2064 as we want to parse the original owner immediately.
-  # shellcheck disable=SC2064
-  trap_add cleanup_workspace EXIT
-  sudo chown -R jenkins /var/lib/jenkins/workspace
-  git config --global --add safe.directory /var/lib/jenkins/workspace
-fi
-
 echo "Environment variables:"
 env

@ -111,6 +90,9 @@ if [[ -n $TESTS_TO_INCLUDE ]]; then
  INCLUDE_CLAUSE="--include $TESTS_TO_INCLUDE"
 fi

+# shellcheck source=./common.sh
+source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
+
 echo "Environment variables"
 env

@ -181,11 +163,6 @@ if [[ "$BUILD_ENVIRONMENT" != *-bazel-* ]] ; then
  export PATH="$HOME/.local/bin:$PATH"
 fi

-if [[ "$BUILD_ENVIRONMENT" == *aarch64* ]]; then
-  # TODO: revisit this once the CI is stabilized on aarch64 linux
-  export VALGRIND=OFF
-fi
-
 install_tlparse

 # DANGER WILL ROBINSON.  The LD_PRELOAD here could cause you problems
@ -234,6 +211,8 @@ if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then
    export LD_PRELOAD=/usr/lib/llvm-15/lib/clang/15.0.7/lib/linux/libclang_rt.asan-x86_64.so
    # Disable valgrind for asan
    export VALGRIND=OFF
+    # Increase stack size, because ASAN red zones use more stack
+    ulimit -s 81920

    (cd test && python -c "import torch; print(torch.__version__, torch.version.git_version)")
    echo "The next four invocations are expected to crash; if they don't that means ASAN/UBSAN is misconfigured"
@ -310,23 +289,19 @@ test_dynamo_shard() {
 test_inductor_distributed() {
  # Smuggle a few multi-gpu tests here so that we don't have to request another large node
  echo "Testing multi_gpu tests in test_torchinductor"
-  python test/run_test.py -i inductor/test_torchinductor.py -k test_multi_gpu --verbose
-  python test/run_test.py -i inductor/test_aot_inductor.py -k test_non_default_cuda_device --verbose
-  python test/run_test.py -i inductor/test_aot_inductor.py -k test_replicate_on_devices --verbose
-  python test/run_test.py -i distributed/test_c10d_functional_native.py --verbose
-  python test/run_test.py -i distributed/_tensor/test_dtensor_compile.py --verbose
-  python test/run_test.py -i distributed/tensor/parallel/test_fsdp_2d_parallel.py --verbose
-  python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_comm.py --verbose
-  python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_multi_group --verbose
-  python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_with_activation_checkpointing --verbose
-  python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_2d_mlp --verbose
-  python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_hsdp --verbose
-  python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_2d_transformer_checkpoint_resume --verbose
-  python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_gradient_accumulation --verbose
-  python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_frozen.py --verbose
-  python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_compute_dtype --verbose
-  python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_reduce_dtype --verbose
-  python test/run_test.py -i distributed/fsdp/test_fsdp_tp_integration.py -k test_fsdp_tp_integration --verbose
+  pytest test/inductor/test_torchinductor.py -k test_multi_gpu
+  pytest test/inductor/test_aot_inductor.py -k test_non_default_cuda_device
+  pytest test/inductor/test_aot_inductor.py -k test_replicate_on_devices
+  pytest test/distributed/test_c10d_functional_native.py
+  pytest test/distributed/_tensor/test_dtensor_compile.py
+  pytest test/distributed/tensor/parallel/test_fsdp_2d_parallel.py
+  pytest test/distributed/_composable/fsdp/test_fully_shard_comm.py
+  pytest test/distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_multi_group
+  pytest test/distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_with_activation_checkpointing
+  pytest test/distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_2d_mlp
+  pytest test/distributed/_composable/fsdp/test_fully_shard_frozen.py
+  pytest test/distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_compute_dtype
+  pytest test/distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_reduce_dtype

  # this runs on both single-gpu and multi-gpu instance. It should be smart about skipping tests that aren't supported
  # with if required # gpus aren't available
@ -338,13 +313,13 @@ test_inductor() {
  python tools/dynamo/verify_dynamo.py
  python test/run_test.py --inductor --include test_modules test_ops test_ops_gradients test_torch --verbose
  # Do not add --inductor for the following inductor unit tests, otherwise we will fail because of nested dynamo state
-  python test/run_test.py --include inductor/test_torchinductor inductor/test_torchinductor_opinfo inductor/test_aot_inductor --verbose
+  python test/run_test.py --include inductor/test_torchinductor inductor/test_torchinductor_opinfo --verbose

  # docker build uses bdist_wheel which does not work with test_aot_inductor
  # TODO: need a faster way to build
  if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
      BUILD_AOT_INDUCTOR_TEST=1 python setup.py develop
-      CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference
+      CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aot_inductor
  fi
 }

@ -457,17 +432,6 @@ test_perf_for_dashboard() {
            "${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" "$@" \
            --output "$TEST_REPORTS_DIR/${backend}_max_autotune_${suite}_${dtype}_${mode}_cuda_${target}.csv"
      fi
-      if [[ "$DASHBOARD_TAG" == *cudagraphs_low_precision-true* ]] && [[ "$mode" == "inference" ]]; then
-        # TODO: This has a new dtype called quant and the benchmarks script needs to be updated to support this.
-        # The tentative command is as follows. It doesn't work now, but it's ok because we only need mock data
-        # to fill the dashboard.
-        python "benchmarks/dynamo/$suite.py" \
-          "${target_flag[@]}" --"$mode" --quant --backend "$backend" "$@" \
-          --output "$TEST_REPORTS_DIR/${backend}_cudagraphs_low_precision_${suite}_quant_${mode}_cuda_${target}.csv" || true
-        # Copy cudagraph results as mock data, easiest choice?
-        cp "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_${suite}_${dtype}_${mode}_cuda_${target}.csv" \
-          "$TEST_REPORTS_DIR/${backend}_cudagraphs_low_precision_${suite}_quant_${mode}_cuda_${target}.csv"
-      fi
    done
  done
 }
@ -522,11 +486,6 @@ test_single_dynamo_benchmark() {
  fi
 }

-test_inductor_micro_benchmark() {
-  TEST_REPORTS_DIR=$(pwd)/test/test-reports
-  python benchmarks/gpt_fast/benchmark.py --output "${TEST_REPORTS_DIR}/gpt_fast_benchmark.csv"
-}
-
 test_dynamo_benchmark() {
  # Usage: test_dynamo_benchmark huggingface 0
  TEST_REPORTS_DIR=$(pwd)/test/test-reports
@ -588,15 +547,6 @@ test_inductor_torchbench_smoketest_perf() {
      "$TEST_REPORTS_DIR/inductor_training_smoketest_$test.csv" \
      --expected benchmarks/dynamo/expected_ci_perf_inductor_torchbench.csv
  done
-
-  # Perform some "warm-start" runs for a few huggingface models.
-  for test in AlbertForQuestionAnswering AllenaiLongformerBase DistilBertForMaskedLM DistillGPT2 GoogleFnet YituTechConvBert; do
-    python benchmarks/dynamo/huggingface.py --accuracy --training --amp --inductor --device cuda --warm-start-latency \
-      --only $test --output "$TEST_REPORTS_DIR/inductor_warm_start_smoketest_$test.csv"
-    python benchmarks/dynamo/check_accuracy.py \
-      --actual "$TEST_REPORTS_DIR/inductor_warm_start_smoketest_$test.csv" \
-      --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_training.csv"
-  done
 }

 test_inductor_torchbench_cpu_smoketest_perf(){
@ -643,12 +593,6 @@ test_inductor_torchbench_cpu_smoketest_perf(){
  done
 }

-test_torchbench_gcp_smoketest(){
-  pushd "${TORCHBENCHPATH}"
-  python test.py -v
-  popd
-}
-
 test_python_gloo_with_tls() {
  source "$(dirname "${BASH_SOURCE[0]}")/run_glootls_test.sh"
  assert_git_not_dirty
@ -1172,33 +1116,11 @@ test_executorch() {
  assert_git_not_dirty
 }

-test_linux_aarch64(){
-  python test/run_test.py --include test_modules test_mkldnn test_mkldnn_fusion test_openmp test_torch test_dynamic_shapes \
-       test_transformers test_multiprocessing test_numpy_interop --verbose
-
-  # Dynamo tests
-  python test/run_test.py --include dynamo/test_compile dynamo/test_backends dynamo/test_comptime dynamo/test_config \
-       dynamo/test_functions dynamo/test_fx_passes_pre_grad dynamo/test_interop dynamo/test_model_output dynamo/test_modules \
-       dynamo/test_optimizers dynamo/test_recompile_ux dynamo/test_recompiles --verbose
-
-  # Inductor tests
-  python test/run_test.py --include inductor/test_torchinductor inductor/test_benchmark_fusion inductor/test_codecache \
-       inductor/test_config inductor/test_control_flow inductor/test_coordinate_descent_tuner inductor/test_fx_fusion \
-       inductor/test_group_batch_fusion inductor/test_inductor_freezing inductor/test_inductor_utils \
-       inductor/test_inplacing_pass inductor/test_kernel_benchmark inductor/test_layout_optim \
-       inductor/test_max_autotune inductor/test_memory_planning inductor/test_metrics inductor/test_multi_kernel inductor/test_pad_mm \
-       inductor/test_pattern_matcher inductor/test_perf inductor/test_profiler inductor/test_select_algorithm inductor/test_smoke \
-       inductor/test_split_cat_fx_passes inductor/test_standalone_compile inductor/test_torchinductor \
-       inductor/test_torchinductor_codegen_dynamic_shapes inductor/test_torchinductor_dynamic_shapes --verbose
-}
-
 if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then
  (cd test && python -c "import torch; print(torch.__config__.show())")
  (cd test && python -c "import torch; print(torch.__config__.parallel_info())")
 fi
-if [[ "$BUILD_ENVIRONMENT" == *aarch64* ]]; then
-  test_linux_aarch64
-elif [[ "${TEST_CONFIG}" == *backward* ]]; then
+if [[ "${TEST_CONFIG}" == *backward* ]]; then
  test_forward_backward_compatibility
  # Do NOT add tests after bc check tests, see its comment.
 elif [[ "${TEST_CONFIG}" == *xla* ]]; then
@ -1223,8 +1145,6 @@ elif [[ "$TEST_CONFIG" == deploy ]]; then
  test_torch_deploy
 elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then
  test_inductor_distributed
-elif [[ "${TEST_CONFIG}" == *inductor-micro-benchmark* ]]; then
-  test_inductor_micro_benchmark
 elif [[ "${TEST_CONFIG}" == *huggingface* ]]; then
  install_torchvision
  id=$((SHARD_NUMBER-1))
@ -1252,9 +1172,6 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
      llama_v2_7b_16h resnet50 timm_efficientnet mobilenet_v3_large timm_resnest \
      shufflenet_v2_x1_0 hf_GPT2
    PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_cpu_smoketest_perf
-  elif [[ "${TEST_CONFIG}" == *torchbench_gcp_smoketest* ]]; then
-    checkout_install_torchbench
-    TORCHBENCHPATH=$(pwd)/torchbench test_torchbench_gcp_smoketest
  else
    checkout_install_torchbench
    # Do this after checkout_install_torchbench to ensure we clobber any
@ -1278,10 +1195,6 @@ elif [[ "${TEST_CONFIG}" == *dynamo* && "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHAR
 elif [[ "${TEST_CONFIG}" == *dynamo* && $SHARD_NUMBER -gt 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
  install_torchvision
  test_dynamo_shard "${SHARD_NUMBER}"
-elif [[ "${BUILD_ENVIRONMENT}" == *rocm* && -n "$TESTS_TO_INCLUDE" ]]; then
-  install_torchvision
-  test_python_shard "$SHARD_NUMBER"
-  test_aten
 elif [[ "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
  test_without_numpy
  install_torchvision
@ -1311,6 +1224,10 @@ elif [[ "${BUILD_ENVIRONMENT}" == *-mobile-lightweight-dispatch* ]]; then
  test_libtorch
 elif [[ "${TEST_CONFIG}" = docs_test ]]; then
  test_docs_test
+elif [[ "${BUILD_ENVIRONMENT}" == *rocm* && -n "$TESTS_TO_INCLUDE" ]]; then
+  install_torchvision
+  test_python
+  test_aten
 elif [[ "${BUILD_ENVIRONMENT}" == *xpu* ]]; then
  install_torchvision
  test_python
--- a/.ci/pytorch/win-test-helpers/build_pytorch.bat
+++ b/.ci/pytorch/win-test-helpers/build_pytorch.bat
@ -17,22 +17,22 @@ set PATH=C:\Program Files\CMake\bin;C:\Program Files\7-Zip;C:\ProgramData\chocol
 set INSTALLER_DIR=%SCRIPT_HELPERS_DIR%\installation-helpers

 call %INSTALLER_DIR%\install_magma.bat
-if errorlevel 1 goto fail
-if not errorlevel 0 goto fail
+if errorlevel 1 exit /b
+if not errorlevel 0 exit /b

 call %INSTALLER_DIR%\install_sccache.bat
-if errorlevel 1 goto fail
-if not errorlevel 0 goto fail
+if errorlevel 1 exit /b
+if not errorlevel 0 exit /b

 :: Miniconda has been installed as part of the Windows AMI with all the dependencies.
 :: We just need to activate it here
 call %INSTALLER_DIR%\activate_miniconda3.bat
-if errorlevel 1 goto fail
-if not errorlevel 0 goto fail
+if errorlevel 1 exit /b
+if not errorlevel 0 exit /b

 call pip install mkl-include==2021.4.0 mkl-devel==2021.4.0
-if errorlevel 1 goto fail
-if not errorlevel 0 goto fail
+if errorlevel 1 exit /b
+if not errorlevel 0 exit /b

 :: Override VS env here
 pushd .
@ -41,8 +41,8 @@ if "%VC_VERSION%" == "" (
 ) else (
    call "C:\Program Files (x86)\Microsoft Visual Studio\%VC_YEAR%\%VC_PRODUCT%\VC\Auxiliary\Build\vcvarsall.bat" x64 -vcvars_ver=%VC_VERSION%
 )
-if errorlevel 1 goto fail
-if not errorlevel 0 goto fail
+if errorlevel 1 exit /b
+if not errorlevel 0 exit /b
@echo on
 popd

@ -52,12 +52,12 @@ set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION%

 if x%CUDA_VERSION:.=%==x%CUDA_VERSION% (
    echo CUDA version %CUDA_VERSION% format isn't correct, which doesn't contain '.'
-    goto fail
+    exit /b 1
 )
 rem version transformer, for example 10.1 to 10_1.
 if x%CUDA_VERSION:.=%==x%CUDA_VERSION% (
    echo CUDA version %CUDA_VERSION% format isn't correct, which doesn't contain '.'
-    goto fail
+    exit /b 1
 )
 set VERSION_SUFFIX=%CUDA_VERSION:.=_%
 set CUDA_PATH_V%VERSION_SUFFIX%=%CUDA_PATH%
@ -101,8 +101,8 @@ if "%USE_CUDA%"=="1" (
  :: CMake requires a single command as CUDA_NVCC_EXECUTABLE, so we push the wrappers
  :: randomtemp.exe and sccache.exe into a batch file which CMake invokes.
  curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.4/randomtemp.exe --output %TMP_DIR_WIN%\bin\randomtemp.exe
-  if errorlevel 1 goto fail
-  if not errorlevel 0 goto fail
+  if errorlevel 1 exit /b
+  if not errorlevel 0 exit /b
  echo @"%TMP_DIR_WIN%\bin\randomtemp.exe" "%TMP_DIR_WIN%\bin\sccache.exe" "%CUDA_PATH%\bin\nvcc.exe" %%* > "%TMP_DIR%/bin/nvcc.bat"
  cat %TMP_DIR%/bin/nvcc.bat
  set CUDA_NVCC_EXECUTABLE=%TMP_DIR%/bin/nvcc.bat
@ -114,8 +114,8 @@ if "%USE_CUDA%"=="1" (
 set

 python setup.py bdist_wheel
-if errorlevel 1 goto fail
-if not errorlevel 0 goto fail
+if errorlevel 1 exit /b
+if not errorlevel 0 exit /b
 sccache --show-stats
 python -c "import os, glob; os.system('python -mpip install --no-index --no-deps ' + glob.glob('dist/*.whl')[0])"
 (
@ -135,8 +135,3 @@ python -c "import os, glob; os.system('python -mpip install --no-index --no-deps

 sccache --show-stats --stats-format json | jq .stats > sccache-stats-%BUILD_ENVIRONMENT%-%OUR_GITHUB_JOB_ID%.json
 sccache --stop-server
-
-exit /b 0
-
-:fail
-exit /b 1
--- a/.circleci/scripts/binary_linux_test.sh
+++ b/.circleci/scripts/binary_linux_test.sh
@ -96,13 +96,8 @@ if [[ "$PACKAGE_TYPE" == conda ]]; then
    conda install \${EXTRA_CONDA_FLAGS} -y "\$pkg" --offline
  )
 elif [[ "$PACKAGE_TYPE" != libtorch ]]; then
-  if [[ "\$BUILD_ENVIRONMENT" != *s390x* ]]; then
-    pip install "\$pkg" --index-url "https://download.pytorch.org/whl/\${CHANNEL}/${DESIRED_CUDA}"
-    retry pip install -q numpy protobuf typing-extensions
-  else
-    pip install "\$pkg"
-    retry pip install -q numpy protobuf typing-extensions
-  fi
+  pip install "\$pkg" --index-url "https://download.pytorch.org/whl/\${CHANNEL}/${DESIRED_CUDA}"
+  retry pip install -q numpy protobuf typing-extensions
 fi
 if [[ "$PACKAGE_TYPE" == libtorch ]]; then
  pkg="\$(ls /final_pkgs/*-latest.zip)"
--- a/.clang-tidy
+++ b/.clang-tidy
@ -36,7 +36,6 @@ hicpp-exception-baseclass,
 hicpp-avoid-goto,
 misc-*,
 -misc-const-correctness,
-misc-include-cleaner,
 -misc-use-anonymous-namespace,
 -misc-unused-parameters,
 -misc-no-recursion,
@ -61,5 +60,6 @@ readability-simplify-subscript-expr,
 readability-string-compare,
 '
 HeaderFilterRegex: '^(aten/|c10/|torch/).*$'
+AnalyzeTemporaryDtors: false
 WarningsAsErrors: '*'
 ...
--- a/.flake8
+++ b/.flake8
@ -54,7 +54,6 @@ per-file-ignores =
    torch/ao/quantization/fx/_decomposed.py: TOR901
    torch/distributed/_functional_collectives.py: TOR901
    torch/distributed/_spmd/data_parallel.py: TOR901
-    torch/distributed/_tensor/_collective_utils.py: TOR901
 optional-ascii-coding = True
 exclude =
    ./.git,
--- a/.gitattributes
+++ b/.gitattributes
@ -4,4 +4,3 @@
 .github/generated-* linguist-generated=true
 .github/scripts/gql_mocks.json linguist-generated=true
 third_party/LICENSES_BUNDLED.txt linguist-generated=true
-tools/build/bazel/requirements.txt linguist-generated=true
--- a/.github/ISSUE_TEMPLATE/pt2-bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/pt2-bug-report.yml
@ -8,18 +8,7 @@ body:
      value: >
        #### Before submitting a bug, please make sure the issue hasn't been already addressed by searching through [the
        existing and past issues](https://github.com/pytorch/pytorch/issues)
-        It's likely that your bug will be resolved by checking our FAQ or troubleshooting guide [documentation](https://pytorch.org/docs/main/dynamo/index.html)
-
-        Note: if you're submitting an issue that you generated from a fuzzer. Please do the following:
-
-        - Ensure rtol/atol are at default tolerances
-
-        - Dont compare indices of max/min etc, because that avoids the above requirement
-
-        - If comparing eager and torch.compile at fp16/bf16, you should use fp32 as baseline
-
-        If the above requirements are met, add the label "topic: fuzzer" to your issue.
-
+        It's likely that your bug will be resolved by checking our FAQ or troubleshooting guide [documentation](https://pytorch.org/docs/master/dynamo/index.html)
  - type: textarea
    attributes:
      label: 🐛 Describe the bug
@ -44,7 +33,7 @@ body:
      label: Minified repro
      description: |
        Please run the minifier on your example and paste the minified code below
-        Learn more here https://pytorch.org/docs/main/torch.compiler_troubleshooting.html
+        Learn more here https://pytorch.org/docs/master/compile/troubleshooting.html
      placeholder: |
        env TORCHDYNAMO_REPRO_AFTER="aot" python your_model.py
        or
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@ -13,7 +13,6 @@ self-hosted-runner:
    - linux.8xlarge.nvidia.gpu
    - linux.16xlarge.nvidia.gpu
    - linux.g5.4xlarge.nvidia.gpu
-    - linux.s390x
    - windows.4xlarge.nonephemeral
    - windows.8xlarge.nvidia.gpu
    - windows.8xlarge.nvidia.gpu.nonephemeral
@ -22,7 +21,6 @@ self-hosted-runner:
    - linux.rocm.gpu
    - macos-m1-stable
    - macos-m1-13
-    - macos-m1-14
    - macos-12-xl
    - macos-12
    - macos12.3-m1
--- a/.github/actions/download-build-artifacts/action.yml
+++ b/.github/actions/download-build-artifacts/action.yml
@ -9,10 +9,6 @@ inputs:
  use-gha:
    description: If set to any value, use GHA to download the artifact. Otherwise use s3.
    required: false
-  s3-bucket:
-    description: S3 bucket to download builds
-    required: false
-    default: "gha-artifacts"

 runs:
  using: composite
@ -22,10 +18,9 @@ runs:
      uses: seemethere/download-artifact-s3@v4
      with:
        name: ${{ inputs.name }}
-        s3-bucket: ${{ inputs.s3-bucket }}

    - name: Download PyTorch Build Artifacts from GHA
-      if: ${{ inputs.use-gha }}
+      if: inputs.use-gha
      uses: actions/download-artifact@v3
      with:
        name: ${{ inputs.name }}
@ -34,10 +29,6 @@ runs:
      shell: bash
      run: unzip -o artifacts.zip

-    - name: Remove artifacts.zip
-      shell: bash
-      run: rm artifacts.zip
-
    - name: Output disk space left
      shell: bash
      run: df -H
--- a/.github/actions/filter-test-configs/action.yml
+++ b/.github/actions/filter-test-configs/action.yml
@ -13,13 +13,6 @@ inputs:
    required: true
    type: string
    description: JSON description of what test configs to run.
-  selected-test-configs:
-    required: false
-    type: string
-    description: |
-      A comma-separated list of test configurations from the test matrix to keep,
-      The empty list means we are going to keep every configurations by defaults
-    default: ""
  job-name:
    type: string
    required: false
@ -47,9 +40,6 @@ outputs:
  ci-no-td:
    description: True if ci-no-td label was on PR or [ci-no-td] in PR body.
    value: ${{ steps.filter.outputs.ci-no-td }}
-  ci-td-distributed:
-    description: True if ci-td-distributed label was on PR or [ci-td-distributed] in PR body.
-    value: ${{ steps.filter.outputs.ci-td-distributed }}

 runs:
  using: composite
@ -133,7 +123,6 @@ runs:
          --workflow "${GITHUB_WORKFLOW}" \
          --job-name "${JOB_NAME}" \
          --test-matrix "${{ inputs.test-matrix }}" \
-          --selected-test-configs "${{ inputs.selected-test-configs }}" \
          --pr-number "${PR_NUMBER}" \
          --tag "${TAG}" \
          --event-name "${EVENT_NAME}" \
--- a/.github/actions/linux-build/action.yml
+++ b/.github/actions/linux-build/action.yml
@ -1,207 +0,0 @@
-name: linux-build
-
-inputs:
-  build-environment:
-    required: true
-    description: Top-level label for what's being built/tested.
-  docker-image-name:
-    required: true
-    description: Name of the base docker image to build with.
-  build-generates-artifacts:
-    required: false
-    default: "true"
-    description: If set, upload generated build artifacts.
-  build-with-debug:
-    required: false
-    default: "false"
-    description: If set, build in debug mode.
-  sync-tag:
-    required: false
-    default: ""
-    description: |
-      If this is set, our linter will use this to make sure that every other
-      job with the same `sync-tag` is identical.
-  cuda-arch-list:
-    required: false
-    default: "5.2"
-    description: Runner label to select worker type
-  runner:
-    required: false
-    default: "linux.2xlarge"
-    description: |
-      List of CUDA architectures CI build should target.
-  test-matrix:
-    required: false
-    type: string
-    description: |
-      An option JSON description of what test configs to run later on. This
-      is moved here from the Linux test workflow so that we can apply filter
-      logic using test-config labels earlier and skip unnecessary builds
-  s3-bucket:
-    description: S3 bucket to download artifact
-    required: false
-    default: "gha-artifacts"
-  aws-role-to-assume:
-    description: role to assume for downloading artifacts
-    required: false
-    default: ""
-  GITHUB_TOKEN:
-    description: GitHub token
-    required: true
-  HUGGING_FACE_HUB_TOKEN:
-    description: Hugging Face Hub token
-    required: false
-    default: ""
-outputs:
-  docker-image:
-    value: ${{ steps.calculate-docker-image.outputs.docker-image }}
-    description: The docker image containing the built PyTorch.
-  test-matrix:
-    value: ${{ steps.filter.outputs.test-matrix }}
-    description: An optional JSON description of what test configs to run later on.
-
-runs:
-  using: composite
-  steps:
-    - name: Setup Linux
-      uses: ./.github/actions/setup-linux
-
-    - name: configure aws credentials
-      uses: aws-actions/configure-aws-credentials@v3
-      if: ${{ inputs.aws-role-to-assume != '' }}
-      with:
-        role-to-assume: ${{ inputs.aws-role-to-assume }}
-        role-session-name: gha-linux-build
-        role-duration-seconds: 10800
-        aws-region: us-east-1
-
-    - name: Calculate docker image
-      id: calculate-docker-image
-      uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
-      with:
-        docker-image-name: ${{ inputs.docker-image-name }}
-
-    - name: Use following to pull public copy of the image
-      id: print-ghcr-mirror
-      env:
-        ECR_DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
-      shell: bash
-      run: |
-        tag=${ECR_DOCKER_IMAGE##*/}
-        echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}"
-
-    - name: Pull docker image
-      uses: pytorch/test-infra/.github/actions/pull-docker-image@main
-      with:
-        docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
-
-    - name: Parse ref
-      id: parse-ref
-      shell: bash
-      run: .github/scripts/parse_ref.py
-
-    - name: Get workflow job id
-      id: get-job-id
-      uses: ./.github/actions/get-workflow-job-id
-      if: always()
-      with:
-        github-token: ${{ inputs.GITHUB_TOKEN }}
-
-    # Apply the filter logic to the build step too if the test-config label is already there
-    - name: Select all requested test configurations (if the test matrix is available)
-      id: filter
-      uses: ./.github/actions/filter-test-configs
-      with:
-        github-token: ${{ inputs.GITHUB_TOKEN }}
-        test-matrix: ${{ inputs.test-matrix }}
-        job-name: ${{ steps.get-job-id.outputs.job-name }}
-
-    - name: Download pytest cache
-      uses: ./.github/actions/pytest-cache-download
-      continue-on-error: true
-      with:
-        cache_dir: .pytest_cache
-        job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
-        s3_bucket: ${{ inputs.s3-bucket }}
-
-    - name: Build
-      if: steps.filter.outputs.is-test-matrix-empty == 'False' || inputs.test-matrix == ''
-      id: build
-      env:
-        BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
-        BRANCH: ${{ steps.parse-ref.outputs.branch }}
-        # TODO duplicated
-        AWS_DEFAULT_REGION: us-east-1
-        PR_NUMBER: ${{ github.event.pull_request.number }}
-        SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-        SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
-        SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
-        XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
-        PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
-        TORCH_CUDA_ARCH_LIST: ${{ inputs.cuda-arch-list }}
-        DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
-        XLA_CUDA: ${{ contains(inputs.build-environment, 'xla') && '0' || '' }}
-        DEBUG: ${{ inputs.build-with-debug == 'true' && '1' || '0' }}
-        OUR_GITHUB_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
-        HUGGING_FACE_HUB_TOKEN: ${{ inputs.HUGGING_FACE_HUB_TOKEN }}
-      shell: bash
-      run: |
-        # detached container should get cleaned up by teardown_ec2_linux
-        container_name=$(docker run \
-          -e BUILD_ENVIRONMENT \
-          -e MAX_JOBS="$(nproc --ignore=2)" \
-          -e AWS_DEFAULT_REGION \
-          -e PR_NUMBER \
-          -e SHA1 \
-          -e BRANCH \
-          -e SCCACHE_BUCKET \
-          -e SCCACHE_S3_KEY_PREFIX \
-          -e XLA_CUDA \
-          -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-          -e SKIP_SCCACHE_INITIALIZATION=1 \
-          -e TORCH_CUDA_ARCH_LIST \
-          -e PR_LABELS \
-          -e OUR_GITHUB_JOB_ID \
-          -e HUGGING_FACE_HUB_TOKEN \
-          --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
-          --security-opt seccomp=unconfined \
-          --cap-add=SYS_PTRACE \
-          --tty \
-          --detach \
-          --user jenkins \
-          -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-          -w /var/lib/jenkins/workspace \
-          "${DOCKER_IMAGE}"
-        )
-        docker exec -t "${container_name}" sh -c '.ci/pytorch/build.sh'
-
-    - name: Archive artifacts into zip
-      if: inputs.build-generates-artifacts == 'true' && steps.build.outcome != 'skipped'
-      shell: bash
-      run: |
-        zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .additional_ci_files
-
-    - name: Store PyTorch Build Artifacts on S3
-      uses: seemethere/upload-artifact-s3@v5
-      if: inputs.build-generates-artifacts == 'true' && steps.build.outcome != 'skipped'
-      with:
-        name: ${{ inputs.build-environment }}
-        retention-days: 14
-        if-no-files-found: error
-        path: artifacts.zip
-        s3-bucket: ${{ inputs.s3-bucket }}
-
-    - name: Upload sccache stats
-      if: steps.build.outcome != 'skipped'
-      uses: seemethere/upload-artifact-s3@v5
-      with:
-        s3-prefix: |
-          ${{ github.repository }}/${{ github.run_id }}/${{ github.run_attempt }}/artifact
-        retention-days: 365
-        if-no-files-found: warn
-        path: sccache-stats-*.json
-        s3-bucket: ${{ inputs.s3-bucket }}
-
-    - name: Teardown Linux
-      uses: pytorch/test-infra/.github/actions/teardown-linux@main
-      if: always()
--- a/.github/actions/linux-test/action.yml
+++ b/.github/actions/linux-test/action.yml
@ -1,384 +0,0 @@
-name: linux-test
-
-inputs:
-  build-environment:
-    required: true
-    type: string
-    description: Top-level label for what's being built/tested.
-  test-matrix:
-    required: true
-    type: string
-    description: JSON description of what test configs to run.
-  docker-image:
-    required: true
-    type: string
-    description: Docker image to run in.
-  sync-tag:
-    required: false
-    type: string
-    default: ""
-    description: |
-      If this is set, our linter will use this to make sure that every other
-      job with the same `sync-tag` is identical.
-  use-gha:
-    required: false
-    type: string
-    default: ""
-    description: If set to any value, upload to GHA. Otherwise upload to S3.
-  dashboard-tag:
-    required: false
-    type: string
-    default: ""
-  s3-bucket:
-    description: S3 bucket to download artifact
-    required: false
-    type: string
-    default: "gha-artifacts"
-  aws-role-to-assume:
-    description: role to assume for downloading artifacts
-    required: false
-    type: string
-    default: ""
-  HUGGING_FACE_HUB_TOKEN:
-    description: |
-      HF Auth token to avoid rate limits when downloading models or datasets from hub
-    required: false
-    default: ""
-  GITHUB_TOKEN:
-    description: GitHub token
-    required: true
-
-#env:
-#  GIT_DEFAULT_BRANCH: ${{ inputs.default_branch }}
-
-runs:
-  using: composite
-  steps:
-    - name: Setup Linux
-      uses: ./.github/actions/setup-linux
-
-    - name: configure aws credentials
-      if : ${{ inputs.aws-role-to-assume != '' }}
-      uses: aws-actions/configure-aws-credentials@v3
-      with:
-        role-to-assume: ${{ inputs.aws-role-to-assume }}
-        role-session-name: gha-linux-test
-        aws-region: us-east-1
-
-    - name: Calculate docker image
-      id: calculate-docker-image
-      uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
-      with:
-        docker-image-name: ${{ inputs.docker-image }}
-
-    - name: Use following to pull public copy of the image
-      id: print-ghcr-mirror
-      env:
-        ECR_DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
-      shell: bash
-      run: |
-        tag=${ECR_DOCKER_IMAGE##*/}
-        echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}"
-
-    - name: Pull docker image
-      uses: pytorch/test-infra/.github/actions/pull-docker-image@main
-      with:
-        docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
-
-    - name: Check if in a ARC runner
-      shell: bash
-      id: check_arc_runner
-      run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> "$GITHUB_OUTPUT"
-
-    - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
-      id: install-nvidia-driver
-      uses: pytorch/test-infra/.github/actions/setup-nvidia@main
-      if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
-
-    - name: Lock NVIDIA A100 40GB Frequency
-      shell: bash
-      run: |
-        sudo nvidia-smi -pm 1
-        sudo nvidia-smi -ac 1215,1410
-        nvidia-smi
-      if: contains(matrix.runner, 'a100')
-
-    - name: Start monitoring script
-      id: monitor-script
-      shell: bash
-      continue-on-error: true
-      run: |
-        python3 -m pip install psutil==5.9.1 nvidia-ml-py==11.525.84
-        python3 -m tools.stats.monitor > usage_log.txt 2>&1 &
-        echo "monitor-script-pid=${!}" >> "${GITHUB_OUTPUT}"
-
-    - name: Download build artifacts
-      uses: ./.github/actions/download-build-artifacts
-      with:
-        name: ${{ inputs.build-environment }}
-        s3-bucket: ${{ inputs.s3-bucket }}
-
-    - name: Download TD artifacts
-      continue-on-error: true
-      uses: ./.github/actions/download-td-artifacts
-
-    - name: Parse ref
-      id: parse-ref
-      shell: bash
-      run: .github/scripts/parse_ref.py
-
-    - name: Get workflow job id
-      id: get-job-id
-      uses: ./.github/actions/get-workflow-job-id
-      if: always()
-      with:
-        github-token: ${{ inputs.GITHUB_TOKEN }}
-
-    - name: Check for keep-going label and re-enabled test issues
-      # This uses the filter-test-configs action because it conviniently
-      # checks for labels and re-enabled test issues.  It does not actually do
-      # any filtering.  All filtering is done in the build step.
-      id: keep-going
-      uses: ./.github/actions/filter-test-configs
-      with:
-        github-token: ${{ inputs.GITHUB_TOKEN }}
-        test-matrix: ${{ inputs.test-matrix }}
-        job-name: ${{ steps.get-job-id.outputs.job-name }}
-
-    - name: Test
-      id: test
-      env:
-        BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
-        PR_NUMBER: ${{ github.event.pull_request.number }}
-        GITHUB_REPOSITORY: ${{ github.repository }}
-        GITHUB_WORKFLOW: ${{ github.workflow }}
-        GITHUB_JOB: ${{ github.job }}
-        GITHUB_RUN_ID: ${{ github.run_id }}
-        GITHUB_RUN_NUMBER: ${{ github.run_number }}
-        GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
-        JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
-        JOB_NAME: ${{ steps.get-job-id.outputs.job-name }}
-        BRANCH: ${{ steps.parse-ref.outputs.branch }}
-        SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-        BASE_SHA: ${{ github.event.pull_request.base.sha || github.sha }}
-        TEST_CONFIG: ${{ matrix.config }}
-        SHARD_NUMBER: ${{ matrix.shard }}
-        NUM_TEST_SHARDS: ${{ matrix.num_shards }}
-        REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }}
-        CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
-        VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
-        NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }}
-        NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
-        TD_DISTRIBUTED: ${{ steps.keep-going.outputs.ci-td-distributed }}
-        SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
-        SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
-        SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
-        DOCKER_IMAGE: ${{ inputs.docker-image }}
-        XLA_CUDA: ${{ contains(inputs.build-environment, 'xla') && '0' || '' }}
-        XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
-        PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
-        PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
-        DASHBOARD_TAG: ${{ inputs.dashboard-tag }}
-        HUGGING_FACE_HUB_TOKEN: ${{ inputs.HUGGING_FACE_HUB_TOKEN }}
-      shell: bash
-      run: |
-        set -x
-
-        if [[ $TEST_CONFIG == 'multigpu' ]]; then
-          TEST_COMMAND=.ci/pytorch/multigpu-test.sh
-        elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
-          TEST_COMMAND=.ci/onnx/test.sh
-        else
-          TEST_COMMAND=.ci/pytorch/test.sh
-        fi
-
-        # detached container should get cleaned up by teardown_ec2_linux
-        # TODO: Stop building test binaries as part of the build phase
-        # Used for GPU_FLAG since that doesn't play nice
-        # shellcheck disable=SC2086,SC2090
-        container_name=$(docker run \
-          ${GPU_FLAG:-} \
-          -e BUILD_ENVIRONMENT \
-          -e PR_NUMBER \
-          -e GITHUB_ACTIONS \
-          -e GITHUB_REPOSITORY \
-          -e GITHUB_WORKFLOW \
-          -e GITHUB_JOB \
-          -e GITHUB_RUN_ID \
-          -e GITHUB_RUN_NUMBER \
-          -e GITHUB_RUN_ATTEMPT \
-          -e JOB_ID \
-          -e JOB_NAME \
-          -e BASE_SHA \
-          -e BRANCH \
-          -e SHA1 \
-          -e AWS_DEFAULT_REGION \
-          -e IN_WHEEL_TEST \
-          -e SHARD_NUMBER \
-          -e TEST_CONFIG \
-          -e NUM_TEST_SHARDS \
-          -e REENABLED_ISSUES \
-          -e CONTINUE_THROUGH_ERROR \
-          -e VERBOSE_TEST_LOGS \
-          -e NO_TEST_TIMEOUT \
-          -e NO_TD \
-          -e TD_DISTRIBUTED \
-          -e PR_LABELS \
-          -e MAX_JOBS="$(nproc --ignore=2)" \
-          -e SCCACHE_BUCKET \
-          -e SCCACHE_S3_KEY_PREFIX \
-          -e XLA_CUDA \
-          -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-          -e PYTORCH_TEST_CUDA_MEM_LEAK_CHECK \
-          -e PYTORCH_TEST_RERUN_DISABLED_TESTS \
-          -e SKIP_SCCACHE_INITIALIZATION=1 \
-          -e HUGGING_FACE_HUB_TOKEN \
-          -e DASHBOARD_TAG \
-          --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
-          --security-opt seccomp=unconfined \
-          --cap-add=SYS_PTRACE \
-          --ipc=host \
-          --shm-size="${SHM_SIZE}" \
-          --tty \
-          --detach \
-          --name="${container_name}" \
-          --user jenkins \
-          -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-          -w /var/lib/jenkins/workspace \
-          "${DOCKER_IMAGE}"
-        )
-        # Propagate download.pytorch.org IP to container
-        grep download.pytorch.org /etc/hosts | docker exec -i "${container_name}" sudo bash -c "/bin/cat >> /etc/hosts"
-        echo "DOCKER_CONTAINER_ID=${container_name}" >> "${GITHUB_ENV}"
-        docker exec -t "${container_name}" sh -c "pip install $(echo dist/*.whl)[opt-einsum] && ${TEST_COMMAND}"
-
-    - name: Upload pytest cache if tests failed
-      uses: ./.github/actions/pytest-cache-upload
-      continue-on-error: true
-      if: failure() && steps.test.conclusion && steps.test.conclusion == 'failure'
-      with:
-        cache_dir: .pytest_cache
-        shard: ${{ matrix.shard }}
-        sha: ${{ github.event.pull_request.head.sha || github.sha }}
-        test_config: ${{ matrix.config }}
-        job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
-
-    - name: Print remaining test logs
-      shell: bash
-      if: always() && steps.test.conclusion
-      run: |
-        cat test/**/*_toprint.log || true
-
-    - name: Stop monitoring script
-      if: always() && steps.monitor-script.outputs.monitor-script-pid
-      shell: bash
-      continue-on-error: true
-      env:
-        MONITOR_SCRIPT_PID: ${{ steps.monitor-script.outputs.monitor-script-pid }}
-      run: |
-        kill "$MONITOR_SCRIPT_PID"
-
-    - name: Upload test artifacts
-      uses: ./.github/actions/upload-test-artifacts
-      if: always() && steps.test.conclusion && steps.test.conclusion != 'skipped'
-      with:
-        file-suffix: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }}
-        use-gha: ${{ inputs.use-gha }}
-        s3-bucket: ${{ inputs.s3-bucket }}
-
-    - name: Collect backtraces from coredumps (if any)
-      if: always()
-      shell: bash
-      run: |
-        # shellcheck disable=SC2156
-        find . -iname "core.[1-9]*" -exec docker exec "${DOCKER_CONTAINER_ID}" sh -c "gdb python {} -ex 'bt' -ex 'q'" \;
-
-    - name: Store Core dumps on S3
-      uses: seemethere/upload-artifact-s3@v5
-      if: failure()
-      with:
-        name: coredumps-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}
-        retention-days: 14
-        if-no-files-found: ignore
-        path: ./**/core.[1-9]*
-
-    - name: Teardown Linux
-      uses: pytorch/test-infra/.github/actions/teardown-linux@main
-      if: always()
-
-    # NB: We are currently having an intermittent GPU-related issue on G5 runners with
-    # A10G GPU. Once this happens, trying to reset the GPU as done in setup-nvidia does
-    # not seem to help. Here are some symptoms:
-    #   * Calling nvidia-smi timeouts after 60 second
-    #   * Fail to run nvidia-smi with an unable to determine the device handle for GPU
-    #     unknown error
-    #   * Test fails with a missing CUDA GPU error when initializing CUDA in PyTorch
-    #   * Run docker --gpus all fails with error response from daemon
-    #
-    # As both the root cause and recovery path are unclear, let's take the runner out of
-    # service so that it doesn't get any more jobs
-    - name: Check NVIDIA driver installation step
-      if: failure() && steps.install-nvidia-driver.outcome && steps.install-nvidia-driver.outcome != 'skipped'
-      shell: bash
-      env:
-        RUNNER_WORKSPACE: ${{ runner.workspace }}
-      run: |
-        set +e
-        set -x
-
-        nvidia-smi
-        # NB: Surprisingly, nvidia-smi command returns successfully with return code 0 even in
-        # the case where the driver has already crashed as it still can get the driver version
-        # and some basic information like the bus ID.  However, the rest of the information
-        # would be missing (ERR!), for example:
-        #
-        # +-----------------------------------------------------------------------------+
-        # | NVIDIA-SMI 525.89.02    Driver Version: 525.89.02    CUDA Version: 12.0     |
-        # |-------------------------------+----------------------+----------------------+
-        # | GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
-        # | Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
-        # |                               |                      |               MIG M. |
-        # |===============================+======================+======================|
-        # |   0  ERR!                Off  | 00000000:00:1E.0 Off |                 ERR! |
-        # |ERR!  ERR! ERR!    ERR! / ERR! |   4184MiB / 23028MiB |    ERR!      Default |
-        # |                               |                      |                 ERR! |
-        # +-------------------------------+----------------------+----------------------+
-        #
-        # +-----------------------------------------------------------------------------+
-        # | Processes:                                                                  |
-        # |  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
-        # |        ID   ID                                                   Usage      |
-        # |=============================================================================|
-        # +-----------------------------------------------------------------------------+
-        #
-        # This should be reported as a failure instead as it will guarantee to fail when
-        # Docker tries to run with --gpus all
-        #
-        # So, the correct check here is to query one of the missing piece of info like
-        # GPU name, so that the command can fail accordingly
-        nvidia-smi --query-gpu=gpu_name --format=csv,noheader --id=0
-        NVIDIA_SMI_STATUS=$?
-
-        # These are acceptable return code from nvidia-smi as copied from setup-nvidia GitHub action
-        if [ "$NVIDIA_SMI_STATUS" -ne 0 ] && [ "$NVIDIA_SMI_STATUS" -ne 14 ]; then
-          echo "NVIDIA driver installation has failed, shutting down the runner..."
-          .github/scripts/stop_runner_service.sh
-        fi
-
-        # For runner with multiple GPUs, we also want to confirm that the number of GPUs are the
-        # power of 2, i.e. 1, 2, 4, or 8. This is to avoid flaky test issue when one GPU fails
-        # https://github.com/pytorch/test-infra/issues/4000
-        GPU_COUNT=$(nvidia-smi --list-gpus | wc -l)
-        NVIDIA_SMI_STATUS=$?
-
-        # These are acceptable return code from nvidia-smi as copied from setup-nvidia GitHub action
-        if [ "$NVIDIA_SMI_STATUS" -ne 0 ] && [ "$NVIDIA_SMI_STATUS" -ne 14 ]; then
-          echo "NVIDIA driver installation has failed, shutting down the runner..."
-          .github/scripts/stop_runner_service.sh
-        fi
-
-        # Check the GPU count to be a power of 2
-        if [ "$GPU_COUNT" -le 8 ] && [ "$GPU_COUNT" -ne 1 ] && [ "$GPU_COUNT" -ne 2 ] && [ "$GPU_COUNT" -ne 4 ] && [ "$GPU_COUNT" -ne 8 ]; then
-          echo "NVIDIA driver detects $GPU_COUNT GPUs. The runner has a broken GPU, shutting it down..."
-          .github/scripts/stop_runner_service.sh
-        fi
--- a/.github/actions/pytest-cache-download/action.yml
+++ b/.github/actions/pytest-cache-download/action.yml
@ -9,10 +9,6 @@ inputs:
  job_identifier:
    description: Text that uniquely identifies a given job type within a workflow. All shards of a job should share the same job identifier.
    required: true
-  s3_bucket:
-    description: S3 bucket to download PyTest cache
-    required: false
-    default: "gha-artifacts"

 runs:
  using: composite
@ -34,7 +30,6 @@ runs:
        CACHE_DIR: ${{ inputs.cache_dir }}
        JOB_IDENTIFIER: ${{ inputs.job_identifier }}
        REPO: ${{ github.repository }}
-        BUCKET: ${{ inputs.s3_bucket }}
      run: |
        python3 .github/scripts/pytest_cache.py \
          --download \
@ -43,4 +38,3 @@ runs:
          --job_identifier $JOB_IDENTIFIER \
          --temp_dir $RUNNER_TEMP \
          --repo $REPO \
-          --bucket $BUCKET \
--- a/.github/actions/setup-linux/action.yml
+++ b/.github/actions/setup-linux/action.yml
@ -15,12 +15,10 @@ runs:
          category=$1
          # If it is GCP runner (runner name contains gcp), do not run this
          runner_name_str=${{ runner.name }}
-          if [[ -f /.inarc ]]; then
-            echo "ARC Runner, no info on ec2 metadata"
-          elif [[ $runner_name_str == *"gcp"* ]]; then
-            echo "Runner is from Google Cloud Platform, No info on ec2 metadata"
-          else
+          if [[ $runner_name_str != *"gcp"* ]]; then
            curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
+          else
+            echo "Runner is from Google Cloud Platform, No info on ec2 metadata"
          fi
        }
        echo "ami-id: $(get_ec2_metadata ami-id)"
@ -28,14 +26,8 @@ runs:
        echo "instance-type: $(get_ec2_metadata instance-type)"
        echo "system info $(uname -a)"

-    - name: Check if in a ARC runner
-      shell: bash
-      id: check_arc_runner
-      run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)"  >> $GITHUB_OUTPUT
-
    - name: Start docker if docker deamon is not running
      shell: bash
-      if: ${{ steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
      run: |
        if systemctl is-active --quiet docker; then
            echo "Docker daemon is running...";
@ -66,7 +58,6 @@ runs:
        env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"

    - name: Kill any existing containers, clean up images
-      if: ${{ steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
      shell: bash
      run: |
        # ignore expansion of "docker ps -q" since it could be empty
@ -105,28 +96,3 @@ runs:

        echo "${RESOLVED_IP} ${PT_DOMAIN}" | sudo tee -a /etc/hosts
        cat /etc/hosts
-
-    - name: Check that the docker daemon is running
-      shell: bash
-      continue-on-error: true
-      if: ${{ steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'true' }}
-      run: |
-        set +x
-
-        max_attempts=30
-        delay=10
-        attempt=1
-
-        for attempt in $(seq 1 $max_attempts); do
-          echo "Attempt $attempt of $max_attempts: Checking if Docker daemon is running..."
-          if docker info > /dev/null 2>&1; then
-            echo "Docker is running. Proceeding with the next steps"
-            exit 0
-          else
-            echo "Docker is not running yet."
-            echo "Retrying in $delay seconds..."
-            sleep $delay
-          fi
-        done
-        echo "Reached maximum attempts to connect to Docker. Exiting."
-        exit 1
--- a/.github/actions/test-pytorch-binary/action.yml
+++ b/.github/actions/test-pytorch-binary/action.yml
@ -35,7 +35,7 @@ runs:
          "${DOCKER_IMAGE}"
        )

-        if [[ "${GPU_ARCH_TYPE}" != "rocm" && "${BUILD_ENVIRONMENT}" != "linux-aarch64-binary-manywheel" && "${BUILD_ENVIRONMENT}" != "linux-s390x-binary-manywheel" ]]; then
+        if [[ "${GPU_ARCH_TYPE}" != "rocm" && "${BUILD_ENVIRONMENT}" != "linux-aarch64-binary-manywheel" ]]; then
          # Propagate download.pytorch.org IP to container. This is only needed on Linux non aarch64 runner
          grep download.pytorch.org /etc/hosts | docker exec -i "${container_name}" bash -c "/bin/cat >> /etc/hosts"
        fi
@ -44,12 +44,3 @@ runs:
        # Generate test script
        docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
        docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
-
-    - name: Cleanup docker
-      if: always() && env.BUILD_ENVIRONMENT == 'linux-s390x-binary-manywheel'
-      shell: bash
-      run: |
-        # on s390x stop the container for clean worker stop
-        # ignore expansion of "docker ps -q" since it could be empty
-        # shellcheck disable=SC2046
-        docker stop $(docker ps -q) || true
--- a/.github/actions/upload-test-artifacts/action.yml
+++ b/.github/actions/upload-test-artifacts/action.yml
@ -11,10 +11,6 @@ inputs:
      Suffix to add to the filename of the artifacts. This should include the
      workflow job id, see [Job id in artifacts].
    required: true
-  s3-bucket:
-    description: S3 bucket to download builds
-    required: false
-    default: "gha-artifacts"

 runs:
  using: composite
@ -46,7 +42,7 @@ runs:
      env:
        FILE_SUFFIX: ${{ inputs.file-suffix }}
      run: |
-        # Remove any previous usage logs if they exist
+        # Remove any previous test reports if they exist
        rm -f logs-*.zip
        # this workflow is also run in bazel build test, but we dont generate usage reports for it
        # so check to see if the file exists first
@ -57,18 +53,6 @@ runs:
            zip -r "logs-${FILE_SUFFIX}.zip" test -i '*.log'
        fi

-    - name: Zip debugging artifacts for upload
-      if: runner.os != 'Windows' && !inputs.use-gha
-      shell: bash
-      env:
-        FILE_SUFFIX: ${{ inputs.file-suffix }}
-      run: |
-        # Remove any previous debugging artifacts if they exist
-        rm -f debug-*.zip
-        if [ -d 'test/debug' ]; then
-          zip -r "debug-${FILE_SUFFIX}.zip" test/debug
-        fi
-
    # Windows zip
    - name: Zip JSONs for upload
      if: runner.os == 'Windows' && !inputs.use-gha
@ -103,7 +87,6 @@ runs:
      uses: seemethere/upload-artifact-s3@v5
      if: ${{ !inputs.use-gha }}
      with:
-        s3-bucket: ${{ inputs.s3-bucket }}
        s3-prefix: |
          ${{ github.repository }}/${{ github.run_id }}/${{ github.run_attempt }}/artifact
        retention-days: 14
@ -114,7 +97,6 @@ runs:
      uses: seemethere/upload-artifact-s3@v5
      if: ${{ !inputs.use-gha }}
      with:
-        s3-bucket: ${{ inputs.s3-bucket }}
        s3-prefix: |
          ${{ github.repository }}/${{ github.run_id }}/${{ github.run_attempt }}/artifact
        retention-days: 14
@ -126,25 +108,12 @@ runs:
      if: ${{ !inputs.use-gha }}
      continue-on-error: true
      with:
-        s3-bucket: ${{ inputs.s3-bucket }}
        s3-prefix: |
          ${{ github.repository }}/${{ github.run_id }}/${{ github.run_attempt }}/artifact
        retention-days: 14
        if-no-files-found: ignore
        path: logs-*.zip

-    - name: Store Debug Artifacts on S3
-      uses: seemethere/upload-artifact-s3@v5
-      if: ${{ !inputs.use-gha }}
-      continue-on-error: true
-      with:
-        s3-bucket: ${{ inputs.s3-bucket }}
-        s3-prefix: |
-          ${{ github.repository }}/${{ github.run_id }}/${{ github.run_attempt }}/artifact
-        retention-days: 14
-        if-no-files-found: ignore
-        path: debug-*.zip
-
    # GHA upload
    - name: Store Test Downloaded JSONs on Github
      uses: actions/upload-artifact@v3
--- a/.github/ci_commit_pins/audio.txt
+++ b/.github/ci_commit_pins/audio.txt
@ -1 +1 @@
-1980f8af5bcd0bb2ce51965cf79d8d4c25dad8a0
+87aeb554d3e2f7855b7abe5120c282f59648ed7a
--- a/.github/ci_commit_pins/vision.txt
+++ b/.github/ci_commit_pins/vision.txt
@ -1 +1 @@
-d23a6e1664d20707c11781299611436e1f0c104f
+2c127da8b5e2e8f44b50994c6cb931bcca267cfe
--- a/.github/ci_commit_pins/xla.txt
+++ b/.github/ci_commit_pins/xla.txt
@ -1 +1 @@
-6f0b61e5d782913a0fc7743812f2a8e522189111
+r2.3
--- a/.github/label_to_label.yml
+++ b/.github/label_to_label.yml
@ -1,13 +0,0 @@
-# Use this to auto apply labels based on other labels.  Applies to both PRs and
-# issues. Currently only supports any and all
- any:
-  - "module: custom operators"
-  - "module: aotdispatch"
-  then:
-  - "module: pt2-dispatcher"
- any:
-  - "module: dynamo"
-  - "module: pt2-dispatcher"
-  - "module: inductor"
-  then:
-  - "oncall: pt2"
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@ -35,9 +35,6 @@
 - test/distributed/tensor/parallel/test_fsdp_2d_parallel.py
 - torch/distributed/_tensor/**
 - torch/distributed/fsdp/**
- torch/csrc/inductor/**
- test/cpp/aoti_abi_check/**
- test/cpp/aoti_inference/**

 "module: cpu":
 - aten/src/ATen/cpu/**
@ -58,17 +55,6 @@
 - third_party/mkl-dnn.BUILD
 - torch/csrc/jit/codegen/onednn/**
 - test/test_jit_llga_fuser.py
- test/test_mkldnn.py
-
-"ciflow/linux-aarch64":
- third_party/ideep
- caffe2/ideep/**
- caffe2/python/ideep/**
- cmake/Modules/FindMKLDNN.cmake
- third_party/mkl-dnn.BUILD
- torch/csrc/jit/codegen/onednn/**
- test/test_jit_llga_fuser.py
- test/test_mkldnn.py

 "module: amp (automated mixed precision)":
 - torch/amp/**
--- a/.github/merge_rules.yaml
+++ b/.github/merge_rules.yaml
@ -28,13 +28,12 @@
  - caffe2/python/onnx/**
  approved_by:
  - BowenBao
+  - abock
  - justinchuby
-  - liqunfu
  - shubhambhokare1
  - thiagocrepaldi
  - titaiwangms
  - wschin
-  - xadupre
  mandatory_checks_name:
  - EasyCLA
  - Lint
@ -237,23 +236,6 @@
  - Lint
  - pull

- name: XPU ATen
-  patterns:
-  - aten/src/ATen/xpu/**
-  - c10/xpu/**
-  - torch/csrc/xpu/**
-  - torch/xpu/**
-  - test/xpu/**
-  - third_party/xpu.txt
-  approved_by:
-  - EikanWang
-  - jgong5
-  - gujinghui
-  mandatory_checks_name:
-  - EasyCLA
-  - Lint
-  - pull
-
 - name: Distributions
  patterns:
  - torch/distributions/**
@ -375,14 +357,12 @@

 - name: CPU inductor
  patterns:
-  - torch/_inductor/mkldnn_lowerings.py
  - torch/_inductor/fx_passes/mkldnn_fusion.py
  - torch/_inductor/fx_passes/quantization.py
  - torch/_inductor/codegen/cpp.py
  - test/inductor/test_mkldnn_pattern_matcher.py
  - test/inductor/test_cpu_repo.py
  - test/inductor/test_cpu_cpp_wrapper.py
-  - aten/src/ATen/cpu/**
  - aten/src/ATen/native/quantized/cpu/**
  - test/quantization/core/test_quantized_op.py
  - torch/ao/quantization/quantizer/x86_inductor_quantizer.py
--- a/.github/pytorch-probot.yml
+++ b/.github/pytorch-probot.yml
@ -7,8 +7,6 @@ ciflow_push_tags:
 - ciflow/binaries_wheel
 - ciflow/inductor
 - ciflow/inductor-perf-compare
- ciflow/inductor-micro-benchmark
- ciflow/linux-aarch64
 - ciflow/mps
 - ciflow/nightly
 - ciflow/periodic
@ -17,12 +15,9 @@ ciflow_push_tags:
 - ciflow/trunk
 - ciflow/unstable
 - ciflow/xpu
- ciflow/torchbench
 retryable_workflows:
 - lint
 - pull
 - trunk
 - linux-binary
 - windows-binary
-labeler_config: labeler.yml
-label_to_label_config: label_to_label.yml
--- a/.github/requirements-gha-cache.txt
+++ b/.github/requirements-gha-cache.txt
@ -5,7 +5,7 @@
 #   functorch/docs/requirements.txt
 #   .ci/docker/requirements-ci.txt
 boto3==1.19.12
-jinja2==3.1.4
+jinja2==3.0.1
 lintrunner==0.10.7
 ninja==1.10.0.post1
 nvidia-ml-py==11.525.84
--- a/.github/requirements/pip-requirements-iOS.txt
+++ b/.github/requirements/pip-requirements-iOS.txt
@ -1,4 +1,4 @@
 # iOS simulator requirements
 coremltools==5.0b5
 protobuf==3.20.2
-optree==0.11.0
+optree==0.9.1
--- a/.github/requirements/pip-requirements-macOS.txt
+++ b/.github/requirements/pip-requirements-macOS.txt
@ -26,7 +26,4 @@ pytest-cpp==2.3.0
 rockset==1.0.3
 z3-solver==4.12.2.0
 tensorboard==2.13.0
-optree==0.11.0
-# NB: test_hparams_* from test_tensorboard is failing with protobuf 5.26.0 in
-# which the stringify metadata is wrong when escaping double quote
-protobuf==3.20.2
+optree==0.9.1
--- a/.github/scripts/amd/package_triton_wheel.sh
+++ b/.github/scripts/amd/package_triton_wheel.sh
@ -1,99 +0,0 @@
-set -ex
-
-# Set ROCM_HOME isn't available, use ROCM_PATH if set or /opt/rocm
-ROCM_HOME="${ROCM_HOME:-${ROCM_PATH:-/opt/rocm}}"
-
-# Find rocm_version.h header file for ROCm version extract
-rocm_version_h="${ROCM_HOME}/include/rocm-core/rocm_version.h"
-if [ ! -f "$rocm_version_h" ]; then
-    rocm_version_h="${ROCM_HOME}/include/rocm_version.h"
-fi
-
-# Error out if rocm_version.h not found
-if [ ! -f "$rocm_version_h" ]; then
-    echo "Error: rocm_version.h not found in expected locations." >&2
-    exit 1
-fi
-
-# Extract major, minor and patch ROCm version numbers
-MAJOR_VERSION=$(grep 'ROCM_VERSION_MAJOR' "$rocm_version_h" | awk '{print $3}')
-MINOR_VERSION=$(grep 'ROCM_VERSION_MINOR' "$rocm_version_h" | awk '{print $3}')
-PATCH_VERSION=$(grep 'ROCM_VERSION_PATCH' "$rocm_version_h" | awk '{print $3}')
-ROCM_INT=$(($MAJOR_VERSION * 10000 + $MINOR_VERSION * 100 + $PATCH_VERSION))
-echo "ROCm version: $ROCM_INT"
-
-# Check TRITON_ROCM_DIR is set
-if [[ -z "${TRITON_ROCM_DIR}" ]]; then
-    export TRITON_ROCM_DIR=third_party/amd/backend
-fi
-
-# Remove packaged libs and headers
-rm -rf $TRITON_ROCM_DIR/include/*
-
-LIBTINFO_PATH="/usr/lib64/libtinfo.so.5"
-LIBNUMA_PATH="/usr/lib64/libnuma.so.1"
-LIBELF_PATH="/usr/lib64/libelf.so.1"
-
-OS_SO_PATHS=(
-    $LIBELF_PATH
-    $LIBNUMA_PATH
-    $LIBTINFO_PATH
-)
-
-for lib in "${OS_SO_PATHS[@]}"
-do
-    cp $lib $TRITON_ROCM_DIR/lib/
-done
-
-# Required ROCm libraries
-if [[ "${MAJOR_VERSION}" == "6" ]]; then
-    libamdhip="libamdhip64.so.6"
-else
-    libamdhip="libamdhip64.so.5"
-fi
-
-# Required ROCm libraries - ROCm 6.0
-ROCM_SO=(
-    "${libamdhip}"
-    "libhsa-runtime64.so.1"
-    "libamd_comgr.so.2"
-    "libdrm.so.2"
-    "libdrm_amdgpu.so.1"
-)
-
-if [[ $ROCM_INT -ge 60100 ]]; then
-    ROCM_SO+=("librocprofiler-register.so.0")
-fi
-
-for lib in "${ROCM_SO[@]}"
-do
-    file_path=($(find $ROCM_HOME/lib/ -name "$lib")) # First search in lib
-    if [[ -z $file_path ]]; then
-        if [ -d "$ROCM_HOME/lib64/" ]; then
-            file_path=($(find $ROCM_HOME/lib64/ -name "$lib")) # Then search in lib64
-        fi
-    fi
-    if [[ -z $file_path ]]; then
-        file_path=($(find $ROCM_HOME/ -name "$lib")) # Then search in ROCM_HOME
-    fi
-    if [[ -z $file_path ]]; then
-        file_path=($(find /opt/ -name "$lib")) # Then search in /opt
-    fi
-    if [[ -z $file_path ]]; then
-            echo "Error: Library file $lib is not found." >&2
-            exit 1
-    fi
-
-    cp $file_path $TRITON_ROCM_DIR/lib
-    # When running locally, and not building a wheel, we need to satisfy shared objects requests that don't look for versions
-    LINKNAME=$(echo $lib | sed -e 's/\.so.*/.so/g')
-    ln -sf $lib $TRITON_ROCM_DIR/lib/$LINKNAME
-
-done
-
-# Copy Include Files
-cp -r $ROCM_HOME/include/hip $TRITON_ROCM_DIR/include
-
-# Copy linker
-mkdir -p $TRITON_ROCM_DIR/llvm/bin
-cp $ROCM_HOME/llvm/bin/ld.lld $TRITON_ROCM_DIR/llvm/bin/
--- a/.github/scripts/amd/patch_triton_wheel.sh
+++ b/.github/scripts/amd/patch_triton_wheel.sh
@ -1,103 +0,0 @@
-#!/bin/bash
-set -x
-
-if [ -z "$1" ]; then
-    echo "Need wheel location argument" && exit 1
-fi
-
-WHEELHOUSE_DIR=$1
-PATCHELF_BIN=patchelf
-ROCM_LIB=backends/amd/lib
-ROCM_LD=backends/amd/llvm/bin
-PREFIX=triton
-fname_without_so_number() {
-    LINKNAME=$(echo $1 | sed -e 's/\.so.*/.so/g')
-    echo "$LINKNAME"
-}
-
-replace_needed_sofiles() {
-    find $1 -name '*.so*' -o -name 'ld.lld' | while read sofile; do
-        origname=$2
-        patchedname=$3
-        if [[ "$origname" != "$patchedname" ]]; then
-            set +e
-            origname=$($PATCHELF_BIN --print-needed $sofile | grep "$origname.*")
-            ERRCODE=$?
-            set -e
-            if [ "$ERRCODE" -eq "0" ]; then
-                echo "patching $sofile entry $origname to $patchedname"
-                $PATCHELF_BIN --replace-needed $origname $patchedname $sofile
-            fi
-        fi
-    done
-}
-
-mkdir  -p "/tmp_dir"
-pushd /tmp_dir
-for pkg in /$WHEELHOUSE_DIR/*triton*.whl; do
-    echo "Modifying $pkg"
-    rm -rf tmp
-    mkdir -p tmp
-    cd tmp
-    cp $pkg .
-    unzip -q $(basename $pkg)
-    rm -f $(basename $pkg)
-    $PATCHELF_BIN --set-rpath ${LD_SO_RPATH:-'$ORIGIN:$ORIGIN/../../lib'} $PREFIX/$ROCM_LD/ld.lld
-    $PATCHELF_BIN --print-rpath $PREFIX/$ROCM_LD/ld.lld
-    # Modify libtriton.so as it sits in _C directory apart from its dependencies
-    find $PREFIX/_C -type f -name "*.so*" | while read sofile; do
-        echo "Setting rpath of $sofile"
-        $PATCHELF_BIN --set-rpath ${C_SO_RPATH:-'$ORIGIN:$ORIGIN/'../$ROCM_LIB} ${FORCE_RPATH:-} $sofile
-        $PATCHELF_BIN --print-rpath $sofile
-    done
-
-    # All included dependencies are included in a single lib directory
-    deps=()
-    deps_soname=()
-    while read sofile; do
-        echo "Setting rpath of $sofile to ${LIB_SO_RPATH:-'$ORIGIN'}"
-        $PATCHELF_BIN --set-rpath ${LIB_SO_RPATH:-'$ORIGIN'} ${FORCE_RPATH:-} $sofile
-        $PATCHELF_BIN --print-rpath $sofile
-        deps+=("$sofile")
-        deps_soname+=("$(basename $sofile)")
-    done < <(find $PREFIX/$ROCM_LIB -type f -name "*.so*")
-
-    patched=()
-    for filepath in "${deps[@]}"; do
-        filename=$(basename $filepath)
-        destpath=$PREFIX/$ROCM_LIB/$filename
-        if [[ "$filepath" != "$destpath" ]]; then
-            cp $filepath $destpath
-        fi
-        patchedpath=$(fname_without_so_number $destpath)
-        patchedname=$(basename $patchedpath)
-        if [[ "$destpath" != "$patchedpath" ]]; then
-            mv $destpath $patchedpath
-        fi
-        patched+=("$patchedname")
-        echo "Copied $filepath to $patchedpath"
-    done
-
-    # Go through all required shared objects and see if any of our other objects are dependants.  If so, replace so.ver wth so
-    for ((i=0;i<${#deps[@]};++i)); do
-        echo "replacing "${deps_soname[i]} ${patched[i]}
-        replace_needed_sofiles $PREFIX/$ROCM_LIB ${deps_soname[i]} ${patched[i]}
-        replace_needed_sofiles $PREFIX/_C ${deps_soname[i]} ${patched[i]}
-        replace_needed_sofiles $PREFIX/$ROCM_LD ${deps_soname[i]} ${patched[i]}
-    done
-
-    # Re-bundle whl with so adjustments
-    zip -rqy $(basename $pkg) *
-
-    if [[ -z "${MANYLINUX_VERSION}" ]]; then
-        newpkg=$pkg
-    else
-        newpkg=$(echo $pkg | sed -e "s/\linux_x86_64/${MANYLINUX_VERSION}/g")
-    fi
-
-    # Remove original whl
-    rm -f $pkg
-
-    # Move rebuilt whl to original location with new name.
-    mv $(basename $pkg) $newpkg
-done
--- a/.github/scripts/build_triton_wheel.py
+++ b/.github/scripts/build_triton_wheel.py
@ -10,6 +10,9 @@ from typing import Optional
 SCRIPT_DIR = Path(__file__).parent
 REPO_DIR = SCRIPT_DIR.parent.parent

+# TODO: Remove me once Triton version is again in sync for vanilla and ROCm
+ROCM_TRITION_VERSION = "2.2.0"
+

 def read_triton_pin(rocm_hash: bool = False) -> str:
    triton_file = "triton.txt" if not rocm_hash else "triton-rocm.txt"
@ -29,6 +32,27 @@ def check_and_replace(inp: str, src: str, dst: str) -> str:
    return inp.replace(src, dst)


+def patch_setup_py(
+    path: Path,
+    *,
+    version: str,
+    name: str = "triton",
+    expected_version: Optional[str] = None,
+) -> None:
+    with open(path) as f:
+        orig = f.read()
+    # Replace name
+    orig = check_and_replace(orig, 'name="triton",', f'name="{name}",')
+    # Replace version
+    if not expected_version:
+        expected_version = read_triton_version()
+    orig = check_and_replace(
+        orig, f'version="{expected_version}",', f'version="{version}",'
+    )
+    with open(path, "w") as f:
+        f.write(orig)
+
+
 def patch_init_py(
    path: Path, *, version: str, expected_version: Optional[str] = None
 ) -> None:
@ -68,17 +92,23 @@ def build_triton(
    with TemporaryDirectory() as tmpdir:
        triton_basedir = Path(tmpdir) / "triton"
        triton_pythondir = triton_basedir / "python"
-        triton_repo = "https://github.com/openai/triton"
        if build_rocm:
+            triton_repo = "https://github.com/ROCmSoftwarePlatform/triton"
            triton_pkg_name = "pytorch-triton-rocm"
        else:
+            triton_repo = "https://github.com/openai/triton"
            triton_pkg_name = "pytorch-triton"
        check_call(["git", "clone", triton_repo], cwd=tmpdir)
        if release:
            ver, rev, patch = version.split(".")
-            check_call(
-                ["git", "checkout", f"release/{ver}.{rev}.x"], cwd=triton_basedir
-            )
+            if build_rocm:
+                check_call(
+                    ["git", "checkout", f"release/2.2.x"], cwd=triton_basedir
+                )
+            else:
+                check_call(
+                    ["git", "checkout", f"release/{ver}.{rev}.x"], cwd=triton_basedir
+                )
        else:
            check_call(["git", "checkout", commit_hash], cwd=triton_basedir)

@ -91,7 +121,7 @@ def build_triton(
                print("source:\n  path: .\n", file=meta)
                print(
                    "build:\n  string: py{{py}}\n  number: 1\n  script: cd python; "
-                    "python setup.py install --record=record.txt\n",
+                    "python setup.py install --single-version-externally-managed --record=record.txt\n",
                    " script_env:\n   - MAX_JOBS\n",
                    file=meta,
                )
@ -137,15 +167,18 @@ def build_triton(
        patch_init_py(
            triton_pythondir / "triton" / "__init__.py",
            version=f"{version}",
-            expected_version=None,
+            expected_version=ROCM_TRITION_VERSION if build_rocm else None,
        )

        if build_rocm:
-            check_call(
-                [f"{SCRIPT_DIR}/amd/package_triton_wheel.sh"],
-                cwd=triton_basedir,
-                shell=True,
+            # TODO: Remove me when ROCM triton is updated
+            patch_setup_py(
+                triton_pythondir / "setup.py",
+                name=triton_pkg_name,
+                version=f"{version}",
+                expected_version=ROCM_TRITION_VERSION,
            )
+            check_call("scripts/amd/setup_rocm_libs.sh", cwd=triton_basedir, shell=True)
            print("ROCm libraries setup for triton installation...")

        check_call(
@ -156,10 +189,7 @@ def build_triton(
        shutil.copy(whl_path, Path.cwd())

        if build_rocm:
-            check_call(
-                [f"{SCRIPT_DIR}/amd/patch_triton_wheel.sh", Path.cwd()],
-                cwd=triton_basedir,
-            )
+            check_call("scripts/amd/fix_so.sh", cwd=triton_basedir, shell=True)

        return Path.cwd() / whl_path.name

--- a/.github/scripts/cherry_pick.py
+++ b/.github/scripts/cherry_pick.py
@ -29,7 +29,7 @@ def parse_args() -> Any:
        "--onto-branch", type=str, required=True, help="the target release branch"
    )
    parser.add_argument(
-        "--github-actor", type=str, required=True, help="all the world's a stage"
+        "--github-actor", type=str, required=True, help="all the world’s a stage"
    )
    parser.add_argument(
        "--classification",
--- a/.github/scripts/comment_on_pr.py
+++ b/.github/scripts/comment_on_pr.py
@ -23,10 +23,8 @@ def main() -> None:

    job_link = f"[job]({run_url})" if run_url is not None else "job"
    msg = (
-        f"The {args.action} {job_link} was canceled or timed out. This most often happen if two merge requests were issued"
-        + " for the same PR, or if merge job was waiting for more than 6 hours for tests to finish."
-        + " In later case, please do not hesitate to reissue the merge command\n"
-        + f" For more information see [pytorch-bot wiki]({BOT_COMMANDS_WIKI})."
+        f"The {args.action} {job_link} was canceled. If you believe this is a mistake,"
+        + f" then you can re trigger it through [pytorch-bot]({BOT_COMMANDS_WIKI})."
    )

    gh_post_pr_comment(org, project, args.pr_num, msg)
--- a/.github/scripts/delete_old_branches.py
+++ b/.github/scripts/delete_old_branches.py
@ -18,7 +18,7 @@ ESTIMATED_TOKENS = [0]

 TOKEN = os.environ["GITHUB_TOKEN"]
 if not TOKEN:
-    raise Exception("GITHUB_TOKEN is not set")  # noqa: TRY002
+    raise Exception("GITHUB_TOKEN is not set")

 REPO_ROOT = Path(__file__).parent.parent.parent

--- a/.github/scripts/drci_mocks.json.gz
+++ b/.github/scripts/drci_mocks.json.gz
--- a/.github/scripts/filter_test_configs.py
+++ b/.github/scripts/filter_test_configs.py
@ -1,7 +1,6 @@
 #!/usr/bin/env python3

 import json
-import logging
 import os
 import re
 import subprocess
@ -9,7 +8,6 @@ import sys
 import warnings
 from enum import Enum
 from functools import lru_cache
-from logging import info
 from typing import Any, Callable, Dict, List, Optional, Set
 from urllib.request import Request, urlopen

@ -19,7 +17,33 @@ REENABLE_TEST_REGEX = "(?i)(Close(d|s)?|Resolve(d|s)?|Fix(ed|es)?) (#|https://gi

 PREFIX = "test-config/"

-logging.basicConfig(level=logging.INFO)
+# Same as shard names
+VALID_TEST_CONFIG_LABELS = {
+    f"{PREFIX}{label}"
+    for label in {
+        "backwards_compat",
+        "crossref",
+        "default",
+        "deploy",
+        "distributed",
+        "docs_tests",
+        "dynamo",
+        "force_on_cpu",
+        "functorch",
+        "inductor",
+        "inductor_distributed",
+        "inductor_huggingface",
+        "inductor_timm",
+        "inductor_torchbench",
+        "jit_legacy",
+        "multigpu",
+        "nogpu_AVX512",
+        "nogpu_NO_AVX2",
+        "slow",
+        "tsan",
+        "xla",
+    }
+}


 def is_cuda_or_rocm_job(job_name: Optional[str]) -> bool:
@ -38,9 +62,9 @@ SUPPORTED_PERIODICAL_MODES: Dict[str, Callable[[Optional[str]], bool]] = {
 }

 # The link to the published list of disabled jobs
-DISABLED_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/disabled-jobs.json"
+DISABLED_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/disabled-jobs.json?versionId=qO7aEr.Og33PtLXfNq0j0yj.bbLC7SzR"
 # and unstable jobs
-UNSTABLE_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/unstable-jobs.json"
+UNSTABLE_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/unstable-jobs.json?versionId=7NhgpqKTtGXVUnL1C79KboTW_5qQx8y5"

 # Some constants used to handle disabled and unstable jobs
 JOB_NAME_SEP = "/"
@ -66,12 +90,6 @@ def parse_args() -> Any:
    parser.add_argument(
        "--test-matrix", type=str, required=True, help="the original test matrix"
    )
-    parser.add_argument(
-        "--selected-test-configs",
-        type=str,
-        default="",
-        help="a comma-separated list of test configurations from the test matrix to keep",
-    )
    parser.add_argument(
        "--workflow", type=str, help="the name of the current workflow, i.e. pull"
    )
@ -137,25 +155,19 @@ def get_labels(pr_number: int) -> Set[str]:
    }


-def filter_labels(labels: Set[str], label_regex: Any) -> Set[str]:
-    """
-    Return the list of matching labels
-    """
-    return {l for l in labels if re.match(label_regex, l)}
-
-
 def filter(test_matrix: Dict[str, List[Any]], labels: Set[str]) -> Dict[str, List[Any]]:
    """
    Select the list of test config to run from the test matrix. The logic works
    as follows:

-    If the PR has one or more test-config labels as specified, only these test configs
-    will be selected.  This also works with ciflow labels, for example, if a PR has both
-    ciflow/trunk and test-config/functorch, only trunk functorch builds and tests will
-    be run.
+    If the PR has one or more labels as specified in the VALID_TEST_CONFIG_LABELS set, only
+    these test configs will be selected.  This also works with ciflow labels, for example,
+    if a PR has both ciflow/trunk and test-config/functorch, only trunk functorch builds
+    and tests will be run

    If the PR has none of the test-config label, all tests are run as usual.
    """
+
    filtered_test_matrix: Dict[str, List[Any]] = {"include": []}

    for entry in test_matrix.get("include", []):
@ -165,46 +177,23 @@ def filter(test_matrix: Dict[str, List[Any]], labels: Set[str]) -> Dict[str, Lis

        label = f"{PREFIX}{config_name.strip()}"
        if label in labels:
-            msg = f"Select {config_name} because label {label} is present in the pull request by the time the test starts"
-            info(msg)
+            print(
+                f"Select {config_name} because label {label} is presented in the pull request by the time the test starts"
+            )
            filtered_test_matrix["include"].append(entry)

-    test_config_labels = filter_labels(labels, re.compile(f"{PREFIX}.+"))
-    if not filtered_test_matrix["include"] and not test_config_labels:
-        info("Found no test-config label on the PR, so all test configs are included")
-        # Found no test-config label and the filtered test matrix is empty, return the same
+    valid_test_config_labels = labels.intersection(VALID_TEST_CONFIG_LABELS)
+
+    if not filtered_test_matrix["include"] and not valid_test_config_labels:
+        # Found no valid label and the filtered test matrix is empty, return the same
        # test matrix as before so that all tests can be run normally
        return test_matrix
    else:
-        msg = f"Found {test_config_labels} on the PR so only these test configs are run"
-        info(msg)
        # When the filter test matrix contain matches or if a valid test config label
        # is found in the PR, return the filtered test matrix
        return filtered_test_matrix


-def filter_selected_test_configs(
-    test_matrix: Dict[str, List[Any]], selected_test_configs: Set[str]
-) -> Dict[str, List[Any]]:
-    """
-    Keep only the selected configs if the list if not empty. Otherwise, keep all test configs.
-    This filter is used when the workflow is dispatched manually.
-    """
-    if not selected_test_configs:
-        return test_matrix
-
-    filtered_test_matrix: Dict[str, List[Any]] = {"include": []}
-    for entry in test_matrix.get("include", []):
-        config_name = entry.get("config", "")
-        if not config_name:
-            continue
-
-        if config_name in selected_test_configs:
-            filtered_test_matrix["include"].append(entry)
-
-    return filtered_test_matrix
-
-
 def set_periodic_modes(
    test_matrix: Dict[str, List[Any]], job_name: Optional[str]
 ) -> Dict[str, List[Any]]:
@ -385,33 +374,30 @@ def process_jobs(
        # - If the target record has the job (config) name, only that test config
        #   will be skipped or marked as unstable
        if not target_job_cfg:
-            msg = (
+            print(
                f"Issue {target_url} created by {author} has {issue_type.value} "
                + f"all CI jobs for {workflow} / {job_name}"
            )
-            info(msg)
            return _filter_jobs(
                test_matrix=test_matrix,
                issue_type=issue_type,
            )

        if target_job_cfg == BUILD_JOB_NAME:
-            msg = (
+            print(
                f"Issue {target_url} created by {author} has {issue_type.value} "
                + f"the build job for {workflow} / {job_name}"
            )
-            info(msg)
            return _filter_jobs(
                test_matrix=test_matrix,
                issue_type=issue_type,
            )

        if target_job_cfg in (TEST_JOB_NAME, BUILD_AND_TEST_JOB_NAME):
-            msg = (
+            print(
                f"Issue {target_url} created by {author} has {issue_type.value} "
                + f"all the test jobs for {workflow} / {job_name}"
            )
-            info(msg)
            return _filter_jobs(
                test_matrix=test_matrix,
                issue_type=issue_type,
@ -477,7 +463,7 @@ def parse_reenabled_issues(s: Optional[str]) -> List[str]:


 def get_reenabled_issues(pr_body: str = "") -> List[str]:
-    default_branch = f"origin/{os.environ.get('GIT_DEFAULT_BRANCH', 'main')}"
+    default_branch = os.getenv("GIT_DEFAULT_BRANCH", "main")
    try:
        commit_messages = subprocess.check_output(
            f"git cherry -v {default_branch}".split(" ")
@ -508,15 +494,10 @@ def perform_misc_tasks(
        "ci-no-test-timeout", check_for_setting(labels, pr_body, "ci-no-test-timeout")
    )
    set_output("ci-no-td", check_for_setting(labels, pr_body, "ci-no-td"))
-    # Only relevant for the one linux distributed cuda job, delete this when TD
-    # is rolled out completely
-    set_output(
-        "ci-td-distributed", check_for_setting(labels, pr_body, "ci-td-distributed")
-    )

    # Obviously, if the job name includes unstable, then this is an unstable job
    is_unstable = job_name and IssueType.UNSTABLE.value in job_name
-    if not is_unstable and test_matrix and test_matrix.get("include"):
+    if not is_unstable and test_matrix:
        # Even when the job name doesn't mention unstable, we will also mark it as
        # unstable when the test matrix only includes unstable jobs. Basically, this
        # logic allows build or build-and-test jobs to be marked as unstable too.
@ -586,16 +567,6 @@ def main() -> None:
        # No PR number, no tag, we can just return the test matrix as it is
        filtered_test_matrix = test_matrix

-    if args.selected_test_configs:
-        selected_test_configs = {
-            v.strip().lower()
-            for v in args.selected_test_configs.split(",")
-            if v.strip()
-        }
-        filtered_test_matrix = filter_selected_test_configs(
-            filtered_test_matrix, selected_test_configs
-        )
-
    if args.event_name == "schedule" and args.schedule == "29 8 * * *":
        # we don't want to run the mem leak check or disabled tests on normal
        # periodically scheduled jobs, only the ones at this time
--- a/.github/scripts/generate_binary_build_matrix.py
+++ b/.github/scripts/generate_binary_build_matrix.py
@ -13,16 +13,16 @@ architectures:
 import os
 from typing import Dict, List, Optional, Tuple

-CUDA_ARCHES = ["11.8", "12.1", "12.4"]
+CUDA_ARCHES = ["11.8", "12.1"]


-CUDA_ARCHES_FULL_VERSION = {"11.8": "11.8.0", "12.1": "12.1.1", "12.4": "12.4.0"}
+CUDA_ARCHES_FULL_VERSION = {"11.8": "11.8.0", "12.1": "12.1.1"}


-CUDA_ARCHES_CUDNN_VERSION = {"11.8": "8", "12.1": "8", "12.4": "8"}
+CUDA_ARCHES_CUDNN_VERSION = {"11.8": "8", "12.1": "8"}


-ROCM_ARCHES = ["6.0", "6.1"]
+ROCM_ARCHES = ["5.7", "6.0"]


 CPU_CXX11_ABI_ARCH = ["cpu-cxx11-abi"]
@ -31,9 +31,6 @@ CPU_CXX11_ABI_ARCH = ["cpu-cxx11-abi"]
 CPU_AARCH64_ARCH = ["cpu-aarch64"]


-CPU_S390X_ARCH = ["cpu-s390x"]
-
-
 PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
    "11.8": (
        "nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | "  # noqa: B950
@ -61,20 +58,6 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
        "nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | "
        "nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'"
    ),
-    "12.4": (
-        "nvidia-cuda-nvrtc-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | "
-        "nvidia-cuda-runtime-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | "
-        "nvidia-cuda-cupti-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | "
-        "nvidia-cudnn-cu12==8.9.7.29; platform_system == 'Linux' and platform_machine == 'x86_64' | "
-        "nvidia-cublas-cu12==12.4.2.65; platform_system == 'Linux' and platform_machine == 'x86_64' | "
-        "nvidia-cufft-cu12==11.2.0.44; platform_system == 'Linux' and platform_machine == 'x86_64' | "
-        "nvidia-curand-cu12==10.3.5.119; platform_system == 'Linux' and platform_machine == 'x86_64' | "
-        "nvidia-cusolver-cu12==11.6.0.99; platform_system == 'Linux' and platform_machine == 'x86_64' | "
-        "nvidia-cusparse-cu12==12.3.0.142; platform_system == 'Linux' and platform_machine == 'x86_64' | "
-        "nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | "
-        "nvidia-nvtx-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | "
-        "nvidia-nvjitlink-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64'"
-    ),
 }


@ -133,8 +116,6 @@ def arch_type(arch_version: str) -> str:
        return "cpu-cxx11-abi"
    elif arch_version in CPU_AARCH64_ARCH:
        return "cpu-aarch64"
-    elif arch_version in CPU_S390X_ARCH:
-        return "cpu-s390x"
    else:  # arch_version should always be "cpu" in this case
        return "cpu"

@ -154,7 +135,6 @@ WHEEL_CONTAINER_IMAGES = {
    "cpu": f"pytorch/manylinux-builder:cpu-{DEFAULT_TAG}",
    "cpu-cxx11-abi": f"pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-{DEFAULT_TAG}",
    "cpu-aarch64": f"pytorch/manylinuxaarch64-builder:cpu-aarch64-{DEFAULT_TAG}",
-    "cpu-s390x": f"pytorch/manylinuxs390x-builder:cpu-s390x-{DEFAULT_TAG}",
 }

 CONDA_CONTAINER_IMAGES = {
@ -211,7 +191,6 @@ def translate_desired_cuda(gpu_arch_type: str, gpu_arch_version: str) -> str:
        "cpu": "cpu",
        "cpu-aarch64": "cpu",
        "cpu-cxx11-abi": "cpu-cxx11-abi",
-        "cpu-s390x": "cpu",
        "cuda": f"cu{gpu_arch_version.replace('.', '')}",
        "rocm": f"rocm{gpu_arch_version}",
    }.get(gpu_arch_type, gpu_arch_version)
@ -313,8 +292,8 @@ def generate_wheels_matrix(
    python_versions: Optional[List[str]] = None,
 ) -> List[Dict[str, str]]:
    package_type = "wheel"
-    if os == "linux" or os == "linux-aarch64" or os == "linux-s390x":
-        # NOTE: We only build manywheel packages for x86_64 and aarch64 and s390x linux
+    if os == "linux" or os == "linux-aarch64":
+        # NOTE: We only build manywheel packages for x86_64 and aarch64 linux
        package_type = "manywheel"

    if python_versions is None:
@ -331,10 +310,6 @@ def generate_wheels_matrix(
            # Only want the one arch as the CPU type is different and
            # uses different build/test scripts
            arches = ["cpu-aarch64"]
-        elif os == "linux-s390x":
-            # Only want the one arch as the CPU type is different and
-            # uses different build/test scripts
-            arches = ["cpu-s390x"]

    ret: List[Dict[str, str]] = []
    for python_version in python_versions:
@ -345,12 +320,11 @@ def generate_wheels_matrix(
                if arch_version == "cpu"
                or arch_version == "cpu-cxx11-abi"
                or arch_version == "cpu-aarch64"
-                or arch_version == "cpu-s390x"
                else arch_version
            )

            # 12.1 linux wheels require PYTORCH_EXTRA_INSTALL_REQUIREMENTS to install
-            if arch_version in ["12.4", "12.1", "11.8"] and os == "linux":
+            if arch_version in ["12.1", "11.8"] and os == "linux":
                ret.append(
                    {
                        "python_version": python_version,
@ -393,6 +367,5 @@ def generate_wheels_matrix(
    return ret


-validate_nccl_dep_consistency("12.4")
 validate_nccl_dep_consistency("12.1")
 validate_nccl_dep_consistency("11.8")
--- a/.github/scripts/generate_ci_workflows.py
+++ b/.github/scripts/generate_ci_workflows.py
@ -95,7 +95,6 @@ class OperatingSystem:
    MACOS = "macos"
    MACOS_ARM64 = "macos-arm64"
    LINUX_AARCH64 = "linux-aarch64"
-    LINUX_S390X = "linux-s390x"


 LINUX_BINARY_BUILD_WORFKLOWS = [
@ -333,20 +332,6 @@ AARCH64_BINARY_BUILD_WORKFLOWS = [
    ),
 ]

-S390X_BINARY_BUILD_WORKFLOWS = [
-    BinaryBuildWorkflow(
-        os=OperatingSystem.LINUX_S390X,
-        package_type="manywheel",
-        build_configs=generate_binary_build_matrix.generate_wheels_matrix(
-            OperatingSystem.LINUX_S390X
-        ),
-        ciflow_config=CIFlowConfig(
-            labels={LABEL_CIFLOW_BINARIES, LABEL_CIFLOW_BINARIES_WHEEL},
-            isolated_workflow=True,
-        ),
-    ),
-]
-

 def main() -> None:
    jinja_env = jinja2.Environment(
@ -365,10 +350,6 @@ def main() -> None:
            jinja_env.get_template("linux_binary_build_workflow.yml.j2"),
            AARCH64_BINARY_BUILD_WORKFLOWS,
        ),
-        (
-            jinja_env.get_template("linux_binary_build_workflow.yml.j2"),
-            S390X_BINARY_BUILD_WORKFLOWS,
-        ),
        (
            jinja_env.get_template("linux_binary_build_workflow.yml.j2"),
            LINUX_BINARY_SMOKE_WORKFLOWS,
@ -397,9 +378,7 @@ def main() -> None:
    for template, workflows in template_and_workflows:
        # added Iterable check to appease the mypy gods
        if not isinstance(workflows, Iterable):
-            raise Exception(  # noqa: TRY002
-                f"How is workflows not iterable? {workflows}"
-            )  # noqa: TRY002
+            raise Exception(f"How is workflows not iterable? {workflows}")
        for workflow in workflows:
            workflow.generate_workflow_file(workflow_template=template)

--- a/.github/scripts/generate_docker_release_matrix.py
+++ b/.github/scripts/generate_docker_release_matrix.py
@ -21,8 +21,6 @@ DOCKER_IMAGE_TYPES = ["runtime", "devel"]

 def generate_docker_matrix() -> Dict[str, List[Dict[str, str]]]:
    ret: List[Dict[str, str]] = []
-    # CUDA amd64 Docker images are available as both runtime and devel while
-    # CPU arm64 image is only available as runtime.
    for cuda, version in generate_binary_build_matrix.CUDA_ARCHES_FULL_VERSION.items():
        for image in DOCKER_IMAGE_TYPES:
            ret.append(
@ -33,19 +31,9 @@ def generate_docker_matrix() -> Dict[str, List[Dict[str, str]]]:
                        cuda
                    ],
                    "image_type": image,
-                    "platform": "linux/amd64",
+                    "platform": "linux/arm64,linux/amd64",
                }
            )
-    ret.append(
-        {
-            "cuda": "cpu",
-            "cuda_full_version": "",
-            "cudnn_version": "",
-            "image_type": "runtime",
-            "platform": "linux/arm64",
-        }
-    )
-
    return {"include": ret}


--- a/.github/scripts/get_workflow_job_id.py
+++ b/.github/scripts/get_workflow_job_id.py
@ -4,7 +4,6 @@

 import argparse
 import json
-import operator
 import os
 import re
 import sys
@ -127,7 +126,7 @@ def find_job_id_name(args: Any) -> Tuple[str, str]:

    # Sort the jobs list by start time, in descending order. We want to get the most
    # recently scheduled job on the runner.
-    jobs.sort(key=operator.itemgetter("started_at"), reverse=True)
+    jobs.sort(key=lambda job: job["started_at"], reverse=True)

    for job in jobs:
        if job["runner_name"] == args.runner_name:
--- a/.github/scripts/get_workflow_type.py
+++ b/.github/scripts/get_workflow_type.py
@ -1,99 +0,0 @@
-import json
-from argparse import ArgumentParser
-from typing import Any
-
-from github import Auth, Github
-from github.Issue import Issue
-
-
-WORKFLOW_TYPE_LABEL = "label"
-WORKFLOW_TYPE_RG = "rg"
-WORKFLOW_TYPE_BOTH = "both"
-
-
-def parse_args() -> Any:
-    parser = ArgumentParser("Get dynamic rollout settings")
-    parser.add_argument("--github-token", type=str, required=True, help="GitHub token")
-    parser.add_argument(
-        "--github-repo",
-        type=str,
-        required=False,
-        default="pytorch/test-infra",
-        help="GitHub repo to get the issue",
-    )
-    parser.add_argument(
-        "--github-issue", type=int, required=True, help="GitHub issue umber"
-    )
-    parser.add_argument(
-        "--github-user", type=str, required=True, help="GitHub username"
-    )
-    parser.add_argument(
-        "--github-branch", type=str, required=True, help="Current GitHub branch"
-    )
-
-    return parser.parse_args()
-
-
-def get_gh_client(github_token: str) -> Github:
-    auth = Auth.Token(github_token)
-    return Github(auth=auth)
-
-
-def get_issue(gh: Github, repo: str, issue_num: int) -> Issue:
-    repo = gh.get_repo(repo)
-    return repo.get_issue(number=issue_num)
-
-
-def is_exception_branch(branch: str) -> bool:
-    return branch.split("/")[0] in {"main", "nightly", "release", "landchecks"}
-
-
-def get_workflow_type(issue: Issue, username: str) -> str:
-    user_list = issue.get_comments()[0].body.split("\r\n")
-    try:
-        run_option = issue.get_comments()[1].body.split("\r\n")[0]
-    except Exception as e:
-        run_option = "single"
-
-    if user_list[0] == "!":
-        # Use old runners for everyone
-        return WORKFLOW_TYPE_LABEL
-    elif user_list[1] == "*":
-        if run_option == WORKFLOW_TYPE_BOTH:
-            # Use ARC runners and old runners for everyone
-            return WORKFLOW_TYPE_BOTH
-        else:
-            # Use only ARC runners for everyone
-            return WORKFLOW_TYPE_RG
-    elif username in user_list:
-        if run_option == WORKFLOW_TYPE_BOTH:
-            # Use ARC runners and old runners for a specific user
-            return WORKFLOW_TYPE_BOTH
-        else:
-            # Use only ARC runners for a specific user
-            return WORKFLOW_TYPE_RG
-    else:
-        # Use old runners by default
-        return WORKFLOW_TYPE_LABEL
-
-
-def main() -> None:
-    args = parse_args()
-
-    if is_exception_branch(args.github_branch):
-        output = {"workflow_type": WORKFLOW_TYPE_LABEL}
-    else:
-        try:
-            gh = get_gh_client(args.github_token)
-            issue = get_issue(gh, args.github_repo, args.github_issue)
-
-            output = {"workflow_type": get_workflow_type(issue, args.github_user)}
-        except Exception as e:
-            output = {"workflow_type": WORKFLOW_TYPE_LABEL}
-
-    json_output = json.dumps(output)
-    print(json_output)
-
-
-if __name__ == "__main__":
-    main()
--- a/.github/scripts/gql_mocks.json.gz
+++ b/.github/scripts/gql_mocks.json.gz
--- a/.github/scripts/lintrunner.sh
+++ b/.github/scripts/lintrunner.sh
@ -6,9 +6,6 @@ CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
 eval "$(command conda 'shell.bash' 'hook' 2> /dev/null)"
 conda activate "${CONDA_ENV}"

-# Use uv to speed up lintrunner init
-python3 -m pip install uv
-
 CACHE_DIRECTORY="/tmp/.lintbin"
 # Try to recover the cached binaries
 if [[ -d "${CACHE_DIRECTORY}" ]]; then
--- a/.github/scripts/td_llm_indexer.sh
+++ b/.github/scripts/td_llm_indexer.sh
@ -1,20 +0,0 @@
-#!/bin/bash
-
-set -euxo pipefail
-
-# Download requirements
-cd llm-target-determinator
-pip install -q -r requirements.txt
-cd ../codellama
-pip install -e .
-
-# Run indexer
-cd ../llm-target-determinator
-
-torchrun \
-    --standalone \
-    --nnodes=1 \
-    --nproc-per-node=1 \
-    indexer.py \
-    --experiment-name indexer-files \
-    --granularity FILE
--- a/.github/scripts/test_filter_test_configs.py
+++ b/.github/scripts/test_filter_test_configs.py
@ -9,7 +9,6 @@ from unittest import main, mock, TestCase
 import yaml
 from filter_test_configs import (
    filter,
-    filter_selected_test_configs,
    get_labels,
    mark_unstable_jobs,
    parse_reenabled_issues,
@ -18,6 +17,7 @@ from filter_test_configs import (
    remove_disabled_jobs,
    set_periodic_modes,
    SUPPORTED_PERIODICAL_MODES,
+    VALID_TEST_CONFIG_LABELS,
 )


@ -273,13 +273,13 @@ class TestConfigFilter(TestCase):
        testcases = [
            {
                "test_matrix": '{include: [{config: "default", runner: "linux"}]}',
-                "expected": '{"include": []}',
-                "description": "Request test-config/cfg but the test matrix doesn't have it",
+                "expected": '{"include": [{"config": "default", "runner": "linux"}]}',
+                "description": "No match, keep the same test matrix",
            },
            {
                "test_matrix": '{include: [{config: "default", runner: "linux"}, {config: "plain-cfg"}]}',
-                "expected": '{"include": []}',
-                "description": "A valid test config label needs to start with test-config/",
+                "expected": '{"include": [{"config": "default", "runner": "linux"}, {"config": "plain-cfg"}]}',
+                "description": "No match because there is no prefix or suffix, keep the same test matrix",
            },
            {
                "test_matrix": '{include: [{config: "default", runner: "linux"}, {config: "cfg", shard: 1}]}',
@ -294,8 +294,9 @@ class TestConfigFilter(TestCase):
            )
            self.assertEqual(case["expected"], json.dumps(filtered_test_matrix))

-    def test_filter_with_test_config_label(self) -> None:
+    def test_filter_with_valid_label(self) -> None:
        mocked_labels = {f"{PREFIX}cfg", "ciflow/trunk"}
+        VALID_TEST_CONFIG_LABELS.add(f"{PREFIX}cfg")

        testcases = [
            {
@ -316,51 +317,6 @@ class TestConfigFilter(TestCase):
            )
            self.assertEqual(case["expected"], json.dumps(filtered_test_matrix))

-    def test_filter_selected_test_configs(self) -> None:
-        testcases = [
-            {
-                "test_matrix": '{include: [{config: "default"}]}',
-                "selected_test_configs": "",
-                "expected": '{"include": [{"config": "default"}]}',
-                "description": "No selected test configs",
-            },
-            {
-                "test_matrix": '{include: [{config: "default"}]}',
-                "selected_test_configs": "foo",
-                "expected": '{"include": []}',
-                "description": "A different test config is selected",
-            },
-            {
-                "test_matrix": '{include: [{config: "default"}]}',
-                "selected_test_configs": "foo, bar",
-                "expected": '{"include": []}',
-                "description": "A different set of test configs is selected",
-            },
-            {
-                "test_matrix": '{include: [{config: "default"}]}',
-                "selected_test_configs": "foo, bar,default",
-                "expected": '{"include": [{"config": "default"}]}',
-                "description": "One of the test config is selected",
-            },
-            {
-                "test_matrix": '{include: [{config: "default"}, {config: "bar"}]}',
-                "selected_test_configs": "foo, bar,Default",
-                "expected": '{"include": [{"config": "default"}, {"config": "bar"}]}',
-                "description": "Several test configs are selected",
-            },
-        ]
-
-        for case in testcases:
-            selected_test_configs = {
-                v.strip().lower()
-                for v in case["selected_test_configs"].split(",")
-                if v.strip()
-            }
-            filtered_test_matrix = filter_selected_test_configs(
-                yaml.safe_load(case["test_matrix"]), selected_test_configs
-            )
-            self.assertEqual(case["expected"], json.dumps(filtered_test_matrix))
-
    def test_set_periodic_modes(self) -> None:
        testcases: List[Dict[str, str]] = [
            {
@ -685,7 +641,6 @@ class TestConfigFilter(TestCase):
            ci_verbose_test_logs: bool = False,
            ci_no_test_timeout: bool = False,
            ci_no_td: bool = False,
-            ci_td_distributed: bool = False,
            is_unstable: bool = False,
            reenabled_issues: str = "",
        ) -> str:
@ -694,7 +649,6 @@ class TestConfigFilter(TestCase):
                f"ci-verbose-test-logs={ci_verbose_test_logs}\n"
                f"ci-no-test-timeout={ci_no_test_timeout}\n"
                f"ci-no-td={ci_no_td}\n"
-                f"ci-td-distributed={ci_td_distributed}\n"
                f"is-unstable={is_unstable}\n"
                f"reenabled-issues={reenabled_issues}\n"
            )
--- a/.github/scripts/test_trymerge.py
+++ b/.github/scripts/test_trymerge.py
@ -205,6 +205,7 @@ def mocked_read_merge_rules(repo: Any, org: str, project: str) -> List[MergeRule
            approved_by=["pytorch/metamates", "ngimel"],
            mandatory_checks_name=[
                "Lint",
+                "Facebook CLA Check",
                "pull / linux-xenial-cuda11.3-py3.7-gcc7 / build",
            ],
            ignore_flaky_failures=True,
@ -397,7 +398,7 @@ class TestTryMerge(TestCase):
    def test_gql_retrieve_checksuites(self, *args: Any) -> None:
        "Fetch comments and conclusions for PR with 60 commits"
        pr = GitHubPR("pytorch", "pytorch", 94787)
-        self.assertEqual(len(pr.get_checkrun_conclusions()), 182)
+        self.assertEqual(len(pr.get_checkrun_conclusions()), 183)

    def test_team_members(self, *args: Any) -> None:
        "Test fetching team members works"
@ -741,30 +742,6 @@ class TestBypassFailures(TestCase):
        self.assertTrue(len(failed) == 0)
        self.assertTrue(len(ignorable["UNSTABLE"]) == 1)

-        # Add another test case where there is no unstable keyword in the job name, but
-        # the job has already been marked as unstable
-        pr = GitHubPR("pytorch", "executorch", 3318)
-        checks = pr.get_checkrun_conclusions()
-        checks = get_classifications(
-            pr.pr_num,
-            pr.project,
-            checks,
-            [],
-        )
-        print(checks)
-        workflow_name = "test-llama-app"
-        job_name = "mobile-job (android)"
-        self.assertTrue(
-            checks[f"Android / {workflow_name} / {job_name}"].classification
-            == "UNSTABLE"
-        )
-        pending, failed, ignorable = categorize_checks(
-            checks, list(checks.keys()), ok_failed_checks_threshold=1
-        )
-        self.assertTrue(len(pending) == 0)
-        self.assertTrue(len(failed) == 0)
-        self.assertTrue(len(ignorable["UNSTABLE"]) == 1)
-
    def test_get_classifications_broken_trunk(self, *args: Any) -> None:
        # The mock merge base is the actual value returned by gh_fetch_merge_base
        test_cases = [
@ -856,41 +833,6 @@ class TestBypassFailures(TestCase):
        self.assertTrue(len(ignorable["FLAKY"]) == 4)
        self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 2)

-    def test_get_classifications_wrong_workflow_name(self, *args: Any) -> None:
-        pr = GitHubPR("pytorch", "pytorch", 123104)
-        checks = pr.get_checkrun_conclusions()
-
-        check_name = "linux-binary-conda / conda-py3_8-cuda11_8-build / build"
-        check_name_workflow_path = ".github/workflows/generated-linux-binary-conda-nightly.yml / conda-py3_8-cuda11_8-build / build"
-
-        # Mock a check where the workflow name uses the full path
-        checks[check_name_workflow_path] = JobCheckState(
-            check_name_workflow_path,
-            checks[check_name].url,
-            checks[check_name].status,
-            checks[check_name].classification,
-            checks[check_name].job_id,
-            checks[check_name].title,
-            checks[check_name].summary,
-        )
-        del checks[check_name]
-
-        checks = get_classifications(
-            pr.pr_num,
-            pr.project,
-            checks,
-            [],
-        )
-        pending, failed, ignorable = categorize_checks(
-            checks,
-            list(checks.keys()),
-        )
-
-        self.assertTrue(len(pending) == 0)
-        self.assertTrue(len(failed) == 0)
-        self.assertTrue(len(ignorable["FLAKY"]) == 1)
-        self.assertTrue(len(ignorable["BROKEN_TRUNK"]) == 0)
-
    @mock.patch("trymerge.read_merge_rules", side_effect=xla_merge_rules)
    def test_dont_ignore_flaky_failures(self, *args: Any) -> None:
        """
--- a/.github/scripts/trymerge.py
+++ b/.github/scripts/trymerge.py
@ -123,7 +123,6 @@ fragment PRCheckSuites on CheckSuiteConnection {
        workflow {
          name
        }
-        databaseId
        url
      }
      checkRuns(first: 50) {
@ -1399,10 +1398,7 @@ def find_matching_merge_rule(
        )
        required_checks = list(
            filter(
-                lambda x: ("EasyCLA" in x)
-                or ("Facebook CLA Check" in x)
-                or not skip_mandatory_checks,
-                mandatory_checks,
+                lambda x: "EasyCLA" in x or not skip_mandatory_checks, mandatory_checks
            )
        )
        pending_checks, failed_checks, _ = categorize_checks(
@ -1413,13 +1409,6 @@ def find_matching_merge_rule(
            else 0,
        )

-        # categorize_checks assumes all tests are required if required_checks is empty.
-        # this is a workaround as we want to keep that behavior for categorize_checks
-        # generally.
-        if not required_checks:
-            pending_checks = []
-            failed_checks = []
-
        hud_link = f"https://hud.pytorch.org/{pr.org}/{pr.project}/commit/{pr.last_commit()['oid']}"
        if len(failed_checks) > 0:
            if reject_reason_score < 30000:
@ -1619,59 +1608,28 @@ def remove_job_name_suffix(name: str, replacement: str = ")") -> str:


 def is_broken_trunk(
-    check: JobCheckState,
+    name: str,
    drci_classifications: Any,
 ) -> bool:
-    if not check or not drci_classifications:
+    if not name or not drci_classifications:
        return False

-    name = check.name
-    job_id = check.job_id
-
    # Consult the list of broken trunk failures from Dr.CI
    return any(
-        (name == broken_trunk["name"]) or (job_id and job_id == broken_trunk["id"])
+        name == broken_trunk["name"]
        for broken_trunk in drci_classifications.get("BROKEN_TRUNK", [])
    )


-def is_unstable(
-    check: JobCheckState,
-    drci_classifications: Any,
-) -> bool:
-    if not check or not drci_classifications:
-        return False
-
-    name = check.name
-    job_id = check.job_id
-
-    # The job name has the unstable keyword. This is the original way to mark a job
-    # as unstable on HUD, Dr.CI, and trymerge
-    if "unstable" in name:
-        return True
-
-    # Consult the list of unstable failures from Dr.CI
-    return any(
-        (name == unstable["name"] or (job_id and job_id == unstable["id"]))
-        for unstable in drci_classifications.get("UNSTABLE", [])
-    )
-
-
 def is_flaky(
-    check: JobCheckState,
+    name: str,
    drci_classifications: Any,
 ) -> bool:
-    if not check or not drci_classifications:
+    if not name or not drci_classifications:
        return False

-    name = check.name
-    job_id = check.job_id
-
    # Consult the list of flaky failures from Dr.CI
-    return any(
-        (name == flaky["name"] or (job_id and job_id == flaky["id"]))
-        for flaky in drci_classifications.get("FLAKY", [])
-    )
+    return any(name == flaky["name"] for flaky in drci_classifications.get("FLAKY", []))


 def is_invalid_cancel(
@ -1744,7 +1702,7 @@ def get_classifications(
        if check.status == "SUCCESS" or check.status == "NEUTRAL":
            continue

-        if is_unstable(check, drci_classifications):
+        if "unstable" in name:
            checks_with_classifications[name] = JobCheckState(
                check.name,
                check.url,
@ -1758,7 +1716,7 @@ def get_classifications(

        # NB: It's important to note that when it comes to ghstack and broken trunk classification,
        # Dr.CI uses the base of the whole stack
-        if is_broken_trunk(check, drci_classifications):
+        if is_broken_trunk(name, drci_classifications):
            checks_with_classifications[name] = JobCheckState(
                check.name,
                check.url,
@ -1770,7 +1728,7 @@ def get_classifications(
            )
            continue

-        elif is_flaky(check, drci_classifications):
+        elif is_flaky(name, drci_classifications):
            checks_with_classifications[name] = JobCheckState(
                check.name,
                check.url,
--- a/.github/scripts/tryrebase.py
+++ b/.github/scripts/tryrebase.py
@ -60,7 +60,7 @@ def rebase_onto(
    repo._run_git("rebase", onto_branch, branch)

    if repo.rev_parse(branch) == repo.rev_parse(onto_branch):
-        raise Exception(SAME_SHA_ERROR)  # noqa: TRY002
+        raise Exception(SAME_SHA_ERROR)

    if dry_run:
        push_result = repo._run_git("push", "--dry-run", "-f", remote_url, refspec)
@ -100,7 +100,7 @@ def rebase_ghstack_onto(
    repo._run_git("rebase", onto_branch, orig_ref)

    if repo.rev_parse(orig_ref) == repo.rev_parse(onto_branch):
-        raise Exception(SAME_SHA_ERROR)  # noqa: TRY002
+        raise Exception(SAME_SHA_ERROR)

    # steal the identity of the committer of the commit on the orig branch
    email = repo._run_git("log", orig_ref, "--pretty=format:%ae", "-1")
@ -126,7 +126,7 @@ def rebase_ghstack_onto(
        print(push_result)
        if ghstack_result.returncode != 0:
            print(ghstack_result.stderr.decode("utf-8"))
-            raise Exception(f"\n```{push_result}```")  # noqa: TRY002
+            raise Exception(f"\n```{push_result}```")
        # The contents of a successful push result should look like:
        # Summary of changes (ghstack 0.6.0)

--- a/.github/templates/common.yml.j2
+++ b/.github/templates/common.yml.j2
@ -8,7 +8,7 @@
 # NOTE: If testing pytorch/builder changes you can change this variable to change what pytorch/builder reference
 #       the binary builds will check out
 {%- set builder_repo = "pytorch/builder" -%}
-{%- set builder_branch = "main" -%}
+{%- set builder_branch = "release/2.3" -%}

 {%- macro concurrency(build_environment) -%}
 concurrency:
--- a/.github/templates/linux_binary_build_workflow.yml.j2
+++ b/.github/templates/linux_binary_build_workflow.yml.j2
@ -33,8 +33,6 @@ env:
  # Needed for conda builds
  {%- if "aarch64" in build_environment %}
  ALPINE_IMAGE: "arm64v8/alpine"
-  {%- elif "s390x" in build_environment %}
-  ALPINE_IMAGE: "docker.io/s390x/alpine"
  {%- else %}
  ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
  {%- endif %}
@ -48,7 +46,7 @@ env:
  PYTORCH_FINAL_PACKAGE_DIR: /artifacts
  PYTORCH_ROOT: /pytorch
  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-  SKIP_ALL_TESTS: 0
+  SKIP_ALL_TESTS: 1
 !{{ common.concurrency(build_environment) }}

 jobs:
@ -60,9 +58,6 @@ jobs:
      {%- if "aarch64" in build_environment %}
      runs_on: linux.arm64.2xlarge
      ALPINE_IMAGE: "arm64v8/alpine"
-      {%- elif "s390x" in build_environment %}
-      runs_on: linux.s390x
-      ALPINE_IMAGE: "docker.io/s390x/alpine"
      {%- elif "conda" in build_environment and config["gpu_arch_type"] == "cuda" %}
      runs_on: linux.24xlarge
      {%- endif %}
@ -84,9 +79,6 @@ jobs:
      {%- if "aarch64" in build_environment %}
      runs_on: linux.arm64.2xlarge
      ALPINE_IMAGE: "arm64v8/alpine"
-      {%- elif "s390x" in build_environment %}
-      runs_on: linux.s390x
-      ALPINE_IMAGE: "docker.io/s390x/alpine"
      {%- elif config["gpu_arch_type"] == "rocm" %}
      runs_on: linux.rocm.gpu
      {%- elif config["gpu_arch_type"] == "cuda" %}
@ -108,8 +100,8 @@ jobs:
        with:
          name: !{{ config["build_name"] }}
          path: "${{ runner.temp }}/artifacts/"
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
-      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch) }}
+      !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
+      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch, checkout_pr_head=False) }}
      - name: ROCm set GPU_FLAG
        run: |
          echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
--- a/.github/templates/macos_binary_build_workflow.yml.j2
+++ b/.github/templates/macos_binary_build_workflow.yml.j2
@ -48,7 +48,7 @@ env:
  BUILD_ENVIRONMENT: !{{ build_environment }}
  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
  PR_NUMBER: ${{ github.event.pull_request.number }}
-  SKIP_ALL_TESTS: 0
+  SKIP_ALL_TESTS: 1
 {%- if cross_compile_arm64 %}
  CROSS_COMPILE_ARM64: 1
 {% endif %}
@ -81,8 +81,8 @@ jobs:
          elif [ -d "/Applications/Xcode_13.3.1.app" ]; then
            echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
          fi
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
-      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch) }}
+      !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
+      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch, checkout_pr_head=False) }}
      - name: Install sccache (only for non-forked PRs, and pushes to trunk)
        uses: nick-fields/retry@v2.8.2
        if: ${{ github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }}
--- a/.github/templates/windows_binary_build_workflow.yml.j2
+++ b/.github/templates/windows_binary_build_workflow.yml.j2
@ -65,8 +65,8 @@ jobs:
    steps:
      !{{ common.setup_ec2_windows() }}
      !{{ set_runner_specific_vars() }}
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
-      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch) }}
+      !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
+      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch, checkout_pr_head=False) }}
      - name: Populate binary env
        shell: bash
        run: |
@ -105,8 +105,8 @@ jobs:
        with:
          name: !{{ config["build_name"] }}
          path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
-      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch) }}
+      !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
+      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch, checkout_pr_head=False) }}
      - name: Populate binary env
        shell: bash
        run: |
--- a/.github/workflows/_android-build-test.yml
+++ b/.github/workflows/_android-build-test.yml
@ -37,7 +37,7 @@ jobs:
      keep-going: ${{ steps.filter.outputs.keep-going }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.3
        with:
          fetch-depth: 1
          submodules: false
@ -59,25 +59,25 @@ jobs:
    runs-on: ${{ matrix.runner }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.3
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.3

      - name: Setup Linux
        uses: ./.github/actions/setup-linux

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.3
        with:
          docker-image-name: ${{ inputs.docker-image-name }}

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.3
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

@ -141,5 +141,5 @@ jobs:
        if: always()

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.3
        if: always()
--- a/.github/workflows/_android-full-build-test.yml
+++ b/.github/workflows/_android-full-build-test.yml
@ -37,7 +37,7 @@ jobs:
      keep-going: ${{ steps.filter.outputs.keep-going }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.3
        with:
          fetch-depth: 1
          submodules: false
@ -59,25 +59,25 @@ jobs:
    runs-on: ${{ matrix.runner }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.3
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.3

      - name: Setup Linux
        uses: ./.github/actions/setup-linux

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.3
        with:
          docker-image-name: ${{ inputs.docker-image-name }}

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.3
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

@ -186,5 +186,5 @@ jobs:
        if: always()

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.3
        if: always()
--- a/.github/workflows/_bazel-build-test.yml
+++ b/.github/workflows/_bazel-build-test.yml
@ -42,7 +42,7 @@ jobs:
      reenabled-issues: ${{ steps.filter.outputs.reenabled-issues }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.3
        with:
          fetch-depth: 1
          submodules: false
@ -64,36 +64,31 @@ jobs:
    runs-on: ${{ matrix.runner }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.3
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.3

      - name: Setup Linux
        uses: ./.github/actions/setup-linux

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.3
        with:
          docker-image-name: ${{ inputs.docker-image-name }}

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.3
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

-      - name: Check if in a ARC runner
-        shell: bash
-        id: check_arc_runner
-        run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> "$GITHUB_OUTPUT"
-
      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
-        uses: pytorch/test-infra/.github/actions/setup-nvidia@main
-        if: ${{ inputs.cuda-version != 'cpu' && steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
+        uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.3
+        if: ${{ inputs.cuda-version != 'cpu' }}

      - name: Output disk space left
        run: |
@ -201,5 +196,5 @@ jobs:
          file-suffix: bazel-${{ github.job }}_${{ steps.get-job-id.outputs.job-id }}

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.3
        if: always()
--- a/.github/workflows/_binary-build-linux.yml
+++ b/.github/workflows/_binary-build-linux.yml
@ -139,23 +139,20 @@ jobs:
        run: env

      - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
-        if: inputs.build_environment != 'linux-s390x-binary-manywheel'
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.3
        continue-on-error: true
        with:
          github-secret: ${{ secrets.github-token }}

      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.3
        with:
-          no-sudo: ${{ inputs.build_environment == 'linux-aarch64-binary-manywheel' || inputs.build_environment == 'linux-s390x-binary-manywheel' }}
+          no-sudo: ${{ inputs.build_environment == 'linux-aarch64-binary-manywheel' }}

      - name: Setup Linux
-        if: inputs.build_environment != 'linux-s390x-binary-manywheel'
        uses: ./.github/actions/setup-linux

      - name: Chown workspace
-        if: inputs.build_environment != 'linux-s390x-binary-manywheel'
        uses: ./.github/actions/chown-workspace
        with:
          ALPINE_IMAGE: ${{ inputs.ALPINE_IMAGE }}
@ -168,7 +165,7 @@ jobs:
          rm -rf "${GITHUB_WORKSPACE}"
          mkdir "${GITHUB_WORKSPACE}"

-          if [[ ${{ inputs.build_environment }} == 'linux-aarch64-binary-manywheel' ]] || [[ ${{ inputs.build_environment }} == 'linux-s390x-binary-manywheel' ]] ; then
+          if [[ ${{ inputs.build_environment }} == 'linux-aarch64-binary-manywheel' ]]; then
            rm -rf "${RUNNER_TEMP}/artifacts"
            mkdir "${RUNNER_TEMP}/artifacts"
          fi
@ -176,7 +173,6 @@ jobs:
      - name: Checkout PyTorch to pytorch dir
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -190,7 +186,7 @@ jobs:
      - name: Checkout pytorch/builder to builder dir
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.3
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -215,8 +211,8 @@ jobs:
            ]}

      - name: Pull Docker image
-        if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' && inputs.build_environment != 'linux-s390x-binary-manywheel' }}
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' }}
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.3
        with:
          docker-image: ${{ inputs.DOCKER_IMAGE }}

@ -257,7 +253,7 @@ jobs:
          fi

      - name: Chown artifacts
-        if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' && inputs.build_environment != 'linux-s390x-binary-manywheel' }}
+        if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' }}
        shell: bash
        run: |
          # Ensure the working directory gets chowned back to the current user
@ -272,20 +268,11 @@ jobs:
            ${{ runner.temp }}/artifacts/*

      - name: Teardown Linux
-        if: always() && inputs.build_environment != 'linux-s390x-binary-manywheel'
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        if: always()
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.3

      - name: Chown workspace
-        if: always() && inputs.build_environment != 'linux-s390x-binary-manywheel'
+        if: always()
        uses: ./pytorch/.github/actions/chown-workspace
        with:
          ALPINE_IMAGE: ${{ inputs.ALPINE_IMAGE }}
-
-      - name: Cleanup docker
-        if: always() && inputs.build_environment == 'linux-s390x-binary-manywheel'
-        shell: bash
-        run: |
-          # on s390x stop the container for clean worker stop
-          # ignore expansion of "docker ps -q" since it could be empty
-          # shellcheck disable=SC2046
-          docker stop $(docker ps -q) || true
--- a/.github/workflows/_binary-test-linux.yml
+++ b/.github/workflows/_binary-test-linux.yml
@ -127,24 +127,21 @@ jobs:
          } >> "${GITHUB_ENV} }}"

      - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
-        if: inputs.build_environment != 'linux-s390x-binary-manywheel'
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.3
        continue-on-error: true
        with:
          github-secret: ${{ secrets.github-token }}

        # Setup the environment
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.3
        with:
-          no-sudo: ${{ inputs.build_environment == 'linux-aarch64-binary-manywheel' || inputs.build_environment == 'linux-s390x-binary-manywheel' }}
+          no-sudo: ${{ inputs.build_environment == 'linux-aarch64-binary-manywheel' }}

      - name: Setup Linux
-        if: inputs.build_environment != 'linux-s390x-binary-manywheel'
        uses: ./.github/actions/setup-linux

      - name: Chown workspace
-        if: inputs.build_environment != 'linux-s390x-binary-manywheel'
        uses: ./.github/actions/chown-workspace
        with:
          ALPINE_IMAGE: ${{ inputs.ALPINE_IMAGE }}
@ -158,7 +155,6 @@ jobs:
      - name: Checkout PyTorch to pytorch dir
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch

@ -171,7 +167,7 @@ jobs:
      - name: Checkout pytorch/builder to builder dir
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.3
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -202,12 +198,12 @@ jobs:
          path: "${{ runner.temp }}/artifacts/"

      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
-        uses: pytorch/test-infra/.github/actions/setup-nvidia@main
+        uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.3
        if: ${{ inputs.GPU_ARCH_TYPE == 'cuda' && steps.filter.outputs.is-test-matrix-empty == 'False' }}

      - name: Pull Docker image
-        if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' && inputs.build_environment != 'linux-s390x-binary-manywheel' }}
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' }}
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.3
        with:
          docker-image: ${{ inputs.DOCKER_IMAGE }}

@ -216,11 +212,11 @@ jobs:
        uses: ./pytorch/.github/actions/test-pytorch-binary

      - name: Teardown Linux
-        if: always() && inputs.build_environment != 'linux-s390x-binary-manywheel'
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        if: always()
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.3

      - name: Chown workspace
-        if: always() && inputs.build_environment != 'linux-s390x-binary-manywheel'
+        if: always()
        uses: ./pytorch/.github/actions/chown-workspace
        with:
          ALPINE_IMAGE: ${{ inputs.ALPINE_IMAGE }}
--- a/.github/workflows/_binary-upload.yml
+++ b/.github/workflows/_binary-upload.yml
@ -95,7 +95,7 @@ jobs:
      SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.3
        with:
          no-sudo: true

--- a/.github/workflows/_buck-build-test.yml
+++ b/.github/workflows/_buck-build-test.yml
@ -23,7 +23,7 @@ jobs:
      keep-going: ${{ steps.filter.outputs.keep-going }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.3
        with:
          fetch-depth: 1
          submodules: false
@ -44,7 +44,7 @@ jobs:
    runs-on: ${{ matrix.runner }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.3

      - name: Set up JDK 8
        uses: actions/setup-java@v3
@ -53,7 +53,7 @@ jobs:
          distribution: 'temurin'

      - name: Setup miniconda
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.3
        with:
          python-version: 3.8
          environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}
--- a/.github/workflows/_docs.yml
+++ b/.github/workflows/_docs.yml
@ -28,21 +28,7 @@ on:
        description: |
          If this is set, our linter will use this to make sure that every other
          job with the same `sync-tag` is identical.
-      s3-bucket:
-        description: S3 bucket to download artifact
-        required: false
-        type: string
-        default: "gha-artifacts"
-      aws-role-to-assume:
-        description: role to assume for downloading artifacts
-        required: false
-        type: string
-        default: ""
-      upload-aws-role-to-assume:
-        description: role to assume for downloading artifacts
-        required: false
-        type: string
-        default: ""
+
    secrets:
      GH_PYTORCHBOT_TOKEN:
        required: false
@ -80,7 +66,7 @@ jobs:
    name: build-docs-${{ matrix.docs_type }}-${{ inputs.push }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.3
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}
          instructions: |
@ -91,27 +77,19 @@ jobs:

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.3

      - name: Setup Linux
        uses: ./.github/actions/setup-linux

-      - name: configure aws credentials
-        if : ${{ inputs.aws-role-to-assume != '' }}
-        uses: aws-actions/configure-aws-credentials@v3
-        with:
-          role-to-assume: ${{ inputs.aws-role-to-assume }}
-          role-session-name: gha-linux-test
-          aws-region: us-east-1
-
      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.3
        with:
          docker-image-name: ${{ inputs.docker-image }}

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.3
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

@ -119,7 +97,6 @@ jobs:
        uses: ./.github/actions/download-build-artifacts
        with:
          name: ${{ inputs.build-environment }}
-          s3-bucket: ${{ inputs.s3-bucket }}

      - name: Generate netrc (only for docs-push)
        if: inputs.push
@ -179,14 +156,6 @@ jobs:
        uses: ./.github/actions/chown-workspace
        if: always()

-      - name: configure aws credentials
-        if : ${{ inputs.upload-aws-role-to-assume != '' }}
-        uses: aws-actions/configure-aws-credentials@v3
-        with:
-          role-to-assume: ${{ inputs.upload-aws-role-to-assume }}
-          role-session-name: gha-linux-test
-          aws-region: us-east-1
-
      - name: Upload Python Docs Preview
        uses: seemethere/upload-artifact-s3@v5
        if: ${{ github.event_name == 'pull_request' && matrix.docs_type == 'python' && steps.build-docs.outcome == 'success' }}
@ -194,7 +163,7 @@ jobs:
          retention-days: 14
          s3-bucket: doc-previews
          if-no-files-found: error
-          path: pytorch_docs/main/
+          path: pytorch.github.io/docs/main/
          s3-prefix: pytorch/pytorch/${{ github.event.pull_request.number }}

      - name: Upload C++ Docs Preview
@ -218,5 +187,5 @@ jobs:
          s3-prefix: pytorch/pytorch/${{ github.event.pull_request.number }}/functorchdocs

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.3
        if: always()
--- a/.github/workflows/_ios-build-test.yml
+++ b/.github/workflows/_ios-build-test.yml
@ -46,7 +46,7 @@ jobs:
      keep-going: ${{ steps.filter.outputs.keep-going }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.3
        with:
          fetch-depth: 1
          submodules: false
@ -80,7 +80,7 @@ jobs:
    steps:
      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.3

      - name: Populate CI build options
        shell: bash
@ -102,7 +102,7 @@ jobs:
            brew install libtool

      - name: Setup miniconda for iOS
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.3
        with:
          python-version: "3.9"
          environment-file: .github/requirements/conda-env-iOS.txt
--- a/.github/workflows/_linux-build-label.yml
+++ b/.github/workflows/_linux-build-label.yml
@ -1,109 +0,0 @@
-name: linux-build
-
-on:
-  workflow_call:
-    inputs:
-      build-environment:
-        required: true
-        type: string
-        description: Top-level label for what's being built/tested.
-      docker-image-name:
-        required: true
-        type: string
-        description: Name of the base docker image to build with.
-      build-generates-artifacts:
-        required: false
-        type: boolean
-        default: true
-        description: If set, upload generated build artifacts.
-      build-with-debug:
-        required: false
-        type: boolean
-        default: false
-        description: If set, build in debug mode.
-      sync-tag:
-        required: false
-        type: string
-        default: ""
-        description: |
-          If this is set, our linter will use this to make sure that every other
-          job with the same `sync-tag` is identical.
-      cuda-arch-list:
-        required: false
-        type: string
-        default: "5.2"
-        description: Runner label to select worker type
-      runner:
-        required: false
-        type: string
-        default: "linux.2xlarge"
-        description: |
-          List of CUDA architectures CI build should target.
-      test-matrix:
-        required: false
-        type: string
-        description: |
-          An option JSON description of what test configs to run later on. This
-          is moved here from the Linux test workflow so that we can apply filter
-          logic using test-config labels earlier and skip unnecessary builds
-      s3-bucket:
-        description: S3 bucket to download artifact
-        required: false
-        type: string
-        default: "gha-artifacts"
-      aws-role-to-assume:
-        description: role to assume for downloading artifacts
-        required: false
-        type: string
-        default: ""
-    secrets:
-      HUGGING_FACE_HUB_TOKEN:
-        required: false
-        description: |
-          HF Auth token to avoid rate limits when downloading models or datasets from hub
-
-    outputs:
-      docker-image:
-        value: ${{ jobs.build.outputs.docker-image }}
-        description: The docker image containing the built PyTorch.
-      test-matrix:
-        value: ${{ jobs.build.outputs.test-matrix }}
-        description: An optional JSON description of what test configs to run later on.
-
-jobs:
-  build:
-    # Don't run on forked repos
-    if: github.repository_owner == 'pytorch'
-    runs-on: ${{ inputs.runner }}
-    timeout-minutes: 240
-    outputs:
-      docker-image: ${{ steps.linux-build.outputs.docker-image }}
-      test-matrix: ${{ steps.linux-build.outputs.test-matrix }}
-    steps:
-      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
-        with:
-          github-secret: ${{ secrets.GITHUB_TOKEN }}
-
-      # [pytorch repo ref]
-      # Use a pytorch/pytorch reference instead of a reference to the local
-      # checkout because when we run this action we don't *have* a local
-      # checkout. In other cases you should prefer a local checkout.
-      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
-
-      - name: Linux Build
-        id: linux-build
-        uses: ./.github/actions/linux-build
-        with:
-          build-environment: ${{ inputs.build-environment }}
-          docker-image-name: ${{ inputs.docker-image-name }}
-          build-generates-artifacts: ${{ inputs.build-generates-artifacts }}
-          build-with-debug: ${{ inputs.build-with-debug }}
-          sync-tag: ${{ inputs.sync-tag }}
-          cuda-arch-list: ${{ inputs.cuda-arch-list }}
-          test-matrix: ${{ inputs.test-matrix }}
-          s3-bucket: ${{ inputs.s3-bucket }}
-          aws-role-to-assume: ${{ inputs.aws-role-to-assume }}
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
--- a/.github/workflows/_linux-build-rg.yml
+++ b/.github/workflows/_linux-build-rg.yml
@ -1,105 +0,0 @@
-name: linux-build-rg
-
-on:
-  workflow_call:
-    inputs:
-      build-environment:
-        required: true
-        type: string
-        description: Top-level label for what's being built/tested.
-      docker-image-name:
-        required: true
-        type: string
-        description: Name of the base docker image to build with.
-      build-generates-artifacts:
-        required: false
-        type: boolean
-        default: true
-        description: If set, upload generated build artifacts.
-      build-with-debug:
-        required: false
-        type: boolean
-        default: false
-        description: If set, build in debug mode.
-      sync-tag:
-        required: false
-        type: string
-        default: ""
-        description: |
-          If this is set, our linter will use this to make sure that every other
-          job with the same `sync-tag` is identical.
-      cuda-arch-list:
-        required: false
-        type: string
-        default: "5.2"
-        description: |
-          List of CUDA architectures CI build should target.
-      runner-group:
-        required: false
-        type: string
-        default: "arc-lf-linux.2xlarge"
-        description: Runner group to select group type
-      test-matrix:
-        required: false
-        type: string
-        description: |
-          An option JSON description of what test configs to run later on. This
-          is moved here from the Linux test workflow so that we can apply filter
-          logic using test-config labels earlier and skip unnecessary builds
-      s3-bucket:
-        description: S3 bucket to download artifact
-        required: false
-        type: string
-        default: "gha-artifacts"
-      aws-role-to-assume:
-        description: role to assume for downloading artifacts
-        required: false
-        type: string
-        default: ""
-    secrets:
-      HUGGING_FACE_HUB_TOKEN:
-        required: false
-        description: |
-          HF Auth token to avoid rate limits when downloading models or datasets from hub
-
-    outputs:
-      docker-image:
-        value: ${{ jobs.build.outputs.docker-image }}
-        description: The docker image containing the built PyTorch.
-      test-matrix:
-        value: ${{ jobs.build.outputs.test-matrix }}
-        description: An optional JSON description of what test configs to run later on.
-
-jobs:
-  build:
-    # Don't run on forked repos
-    if: github.repository_owner == 'pytorch'
-    runs-on:
-      group: ${{ inputs.runner-group }}
-    timeout-minutes: 240
-    outputs:
-      docker-image: ${{ steps.linux-build.outputs.docker-image }}
-      test-matrix: ${{ steps.linux-build.outputs.test-matrix }}
-    steps:
-      # [pytorch repo ref]
-      # Use a pytorch/pytorch reference instead of a reference to the local
-      # checkout because when we run this action we don't *have* a local
-      # checkout. In other cases you should prefer a local checkout.
-      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
-
-      - name: Linux Build
-        id: linux-build
-        uses: ./.github/actions/linux-build
-        with:
-          build-environment: ${{ inputs.build-environment }}
-          docker-image-name: ${{ inputs.docker-image-name }}
-          build-generates-artifacts: ${{ inputs.build-generates-artifacts }}
-          build-with-debug: ${{ inputs.build-with-debug }}
-          sync-tag: ${{ inputs.sync-tag }}
-          cuda-arch-list: ${{ inputs.cuda-arch-list }}
-          test-matrix: ${{ inputs.test-matrix }}
-          s3-bucket: ${{ inputs.s3-bucket }}
-          aws-role-to-assume: ${{ inputs.aws-role-to-assume }}
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
--- a/.github/workflows/_linux-build.yml
+++ b/.github/workflows/_linux-build.yml
@ -47,23 +47,6 @@ on:
          An option JSON description of what test configs to run later on. This
          is moved here from the Linux test workflow so that we can apply filter
          logic using test-config labels earlier and skip unnecessary builds
-      selected-test-configs:
-        description: |
-          A comma-separated list of test configurations from the test matrix to keep,
-          The empty list means we are going to keep every configurations by defaults
-        required: false
-        type: string
-        default: ""
-      s3-bucket:
-        description: S3 bucket to download artifact
-        required: false
-        type: string
-        default: "gha-artifacts"
-      aws-role-to-assume:
-        description: Role to assume for downloading artifacts
-        required: false
-        type: string
-        default: ""
    secrets:
      HUGGING_FACE_HUB_TOKEN:
        required: false
@ -90,7 +73,7 @@ jobs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.3
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}

@ -99,22 +82,14 @@ jobs:
      # checkout because when we run this action we don't *have* a local
      # checkout. In other cases you should prefer a local checkout.
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.3

      - name: Setup Linux
        uses: ./.github/actions/setup-linux

-      - name: configure aws credentials
-        uses: aws-actions/configure-aws-credentials@v3
-        if: ${{ inputs.aws-role-to-assume != '' }}
-        with:
-          role-to-assume: ${{ inputs.aws-role-to-assume }}
-          role-session-name: gha-linux-build
-          aws-region: us-east-1
-
      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.3
        with:
          docker-image-name: ${{ inputs.docker-image-name }}

@ -128,7 +103,7 @@ jobs:
          echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}"

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.3
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

@ -150,7 +125,6 @@ jobs:
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          test-matrix: ${{ inputs.test-matrix }}
-          selected-test-configs: ${{ inputs.selected-test-configs }}
          job-name: ${{ steps.get-job-id.outputs.job-name }}

      - name: Download pytest cache
@ -159,7 +133,6 @@ jobs:
        with:
          cache_dir: .pytest_cache
          job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
-          s3_bucket: ${{ inputs.s3-bucket }}

      - name: Build
        if: steps.filter.outputs.is-test-matrix-empty == 'False' || inputs.test-matrix == ''
@ -224,7 +197,6 @@ jobs:
          retention-days: 14
          if-no-files-found: error
          path: artifacts.zip
-          s3-bucket: ${{ inputs.s3-bucket }}

      - name: Upload sccache stats
        if: steps.build.outcome != 'skipped'
@ -235,8 +207,7 @@ jobs:
          retention-days: 365
          if-no-files-found: warn
          path: sccache-stats-*.json
-          s3-bucket: ${{ inputs.s3-bucket }}

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.3
        if: always()
--- a/.github/workflows/_linux-test-label.yml
+++ b/.github/workflows/_linux-test-label.yml
@ -1,85 +0,0 @@
-name: linux-test-rg
-
-on:
-  workflow_call:
-    inputs:
-      build-environment:
-        required: true
-        type: string
-        description: Top-level label for what's being built/tested.
-      test-matrix:
-        required: true
-        type: string
-        description: JSON description of what test configs to run.
-      docker-image:
-        required: true
-        type: string
-        description: Docker image to run in.
-      sync-tag:
-        required: false
-        type: string
-        default: ""
-        description: |
-          If this is set, our linter will use this to make sure that every other
-          job with the same `sync-tag` is identical.
-      timeout-minutes:
-        required: false
-        type: number
-        default: 240
-        description: |
-          Set the maximum (in minutes) how long the workflow should take to finish
-      use-gha:
-        required: false
-        type: string
-        default: ""
-        description: If set to any value, upload to GHA. Otherwise upload to S3.
-      dashboard-tag:
-        required: false
-        type: string
-        default: ""
-      s3-bucket:
-        description: S3 bucket to download artifact
-        required: false
-        type: string
-        default: "gha-artifacts"
-      aws-role-to-assume:
-        description: role to assume for downloading artifacts
-        required: false
-        type: string
-        default: ""
-    secrets:
-      HUGGING_FACE_HUB_TOKEN:
-        required: false
-        description: |
-          HF Auth token to avoid rate limits when downloading models or datasets from hub
-
-env:
-  GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
-
-jobs:
-  test:
-    # Don't run on forked repos or empty test matrix
-    if: github.repository_owner == 'pytorch' && toJSON(fromJSON(inputs.test-matrix).include) != '[]'
-    strategy:
-      matrix: ${{ fromJSON(inputs.test-matrix) }}
-      fail-fast: false
-    runs-on: ${{ matrix.runner }}
-    timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
-    steps:
-      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
-
-      - name: Linux Test
-        id: linux-test
-        uses: ./.github/actions/linux-test
-        with:
-          build-environment: ${{ inputs.build-environment }}
-          test-matrix: ${{ inputs.test-matrix }}
-          docker-image: ${{ inputs.docker-image }}
-          sync-tag: ${{ inputs.sync-tag }}
-          use-gha: ${{ inputs.use-gha }}
-          dashboard-tag: ${{ inputs.dashboard-tag }}
-          s3-bucket: ${{ inputs.s3-bucket }}
-          aws-role-to-assume: ${{ inputs.aws-role-to-assume }}
-          HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/_linux-test-rg.yml
+++ b/.github/workflows/_linux-test-rg.yml
@ -1,86 +0,0 @@
-name: linux-test-label
-
-on:
-  workflow_call:
-    inputs:
-      build-environment:
-        required: true
-        type: string
-        description: Top-level label for what's being built/tested.
-      test-matrix:
-        required: true
-        type: string
-        description: JSON description of what test configs to run.
-      docker-image:
-        required: true
-        type: string
-        description: Docker image to run in.
-      sync-tag:
-        required: false
-        type: string
-        default: ""
-        description: |
-          If this is set, our linter will use this to make sure that every other
-          job with the same `sync-tag` is identical.
-      timeout-minutes:
-        required: false
-        type: number
-        default: 240
-        description: |
-          Set the maximum (in minutes) how long the workflow should take to finish
-      use-gha:
-        required: false
-        type: string
-        default: ""
-        description: If set to any value, upload to GHA. Otherwise upload to S3.
-      dashboard-tag:
-        required: false
-        type: string
-        default: ""
-      s3-bucket:
-        description: S3 bucket to download artifact
-        required: false
-        type: string
-        default: "gha-artifacts"
-      aws-role-to-assume:
-        description: role to assume for downloading artifacts
-        required: false
-        type: string
-        default: ""
-    secrets:
-      HUGGING_FACE_HUB_TOKEN:
-        required: false
-        description: |
-          HF Auth token to avoid rate limits when downloading models or datasets from hub
-
-env:
-  GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
-
-jobs:
-  test:
-    # Don't run on forked repos or empty test matrix
-    if: github.repository_owner == 'pytorch' && toJSON(fromJSON(inputs.test-matrix).include) != '[]'
-    strategy:
-      matrix: ${{ fromJSON(inputs.test-matrix) }}
-      fail-fast: false
-    runs-on:
-      group: ${{ matrix.runner }}
-    timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
-    steps:
-      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
-
-      - name: Linux Test
-        id: linux-test
-        uses: ./.github/actions/linux-test
-        with:
-          build-environment: ${{ inputs.build-environment }}
-          test-matrix: ${{ inputs.test-matrix }}
-          docker-image: ${{ inputs.docker-image }}
-          sync-tag: ${{ inputs.sync-tag }}
-          use-gha: ${{ inputs.use-gha }}
-          dashboard-tag: ${{ inputs.dashboard-tag }}
-          s3-bucket: ${{ inputs.s3-bucket }}
-          aws-role-to-assume: ${{ inputs.aws-role-to-assume }}
-          HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/_linux-test.yml
+++ b/.github/workflows/_linux-test.yml
@ -37,16 +37,6 @@ on:
        required: false
        type: string
        default: ""
-      s3-bucket:
-        description: S3 bucket to download artifact
-        required: false
-        type: string
-        default: "gha-artifacts"
-      aws-role-to-assume:
-        description: role to assume for downloading artifacts
-        required: false
-        type: string
-        default: ""
    secrets:
      HUGGING_FACE_HUB_TOKEN:
        required: false
@ -67,7 +57,7 @@ jobs:
    timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.3
        if: ${{ !contains(matrix.runner, 'gcp.a100') }}
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}
@ -76,22 +66,14 @@ jobs:
              docker exec -it $(docker container ps --format '{{.ID}}') bash

      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.3

      - name: Setup Linux
        uses: ./.github/actions/setup-linux

-      - name: configure aws credentials
-        if : ${{ inputs.aws-role-to-assume != '' }}
-        uses: aws-actions/configure-aws-credentials@v3
-        with:
-          role-to-assume: ${{ inputs.aws-role-to-assume }}
-          role-session-name: gha-linux-test
-          aws-region: us-east-1
-
      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.3
        with:
          docker-image-name: ${{ inputs.docker-image }}

@ -105,19 +87,14 @@ jobs:
          echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}"

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.3
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

-      - name: Check if in a ARC runner
-        shell: bash
-        id: check_arc_runner
-        run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> "$GITHUB_OUTPUT"
-
      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
        id: install-nvidia-driver
-        uses: pytorch/test-infra/.github/actions/setup-nvidia@main
-        if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}
+        uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.3
+        if: contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu')

      - name: Lock NVIDIA A100 40GB Frequency
        run: |
@ -139,7 +116,6 @@ jobs:
        uses: ./.github/actions/download-build-artifacts
        with:
          name: ${{ inputs.build-environment }}
-          s3-bucket: ${{ inputs.s3-bucket }}

      - name: Download TD artifacts
        continue-on-error: true
@ -200,7 +176,6 @@ jobs:
          VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
          NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }}
          NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
-          TD_DISTRIBUTED: ${{ steps.keep-going.outputs.ci-td-distributed }}
          SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
          SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
          SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
@ -253,7 +228,6 @@ jobs:
            -e VERBOSE_TEST_LOGS \
            -e NO_TEST_TIMEOUT \
            -e NO_TD \
-            -e TD_DISTRIBUTED \
            -e PR_LABELS \
            -e MAX_JOBS="$(nproc --ignore=2)" \
            -e SCCACHE_BUCKET \
@ -266,6 +240,7 @@ jobs:
            -e HUGGING_FACE_HUB_TOKEN \
            -e DASHBOARD_TAG \
            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
+            --ulimit stack=10485760:83886080 \
            --security-opt seccomp=unconfined \
            --cap-add=SYS_PTRACE \
            --ipc=host \
@ -315,7 +290,6 @@ jobs:
        with:
          file-suffix: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }}
          use-gha: ${{ inputs.use-gha }}
-          s3-bucket: ${{ inputs.s3-bucket }}

      - name: Collect backtraces from coredumps (if any)
        if: always()
@ -333,7 +307,7 @@ jobs:
          path: ./**/core.[1-9]*

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.3
        if: always()

      # NB: We are currently having an intermittent GPU-related issue on G5 runners with
--- a/.github/workflows/_mac-build.yml
+++ b/.github/workflows/_mac-build.yml
@ -71,11 +71,11 @@ jobs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
    steps:
      - name: Clean up disk space before running MacOS workflow
-        uses: pytorch/test-infra/.github/actions/check-disk-space@main
+        uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.3

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.3

      - name: Set xcode version
        env:
@ -87,7 +87,7 @@ jobs:

      - name: Setup miniconda
        if: inputs.environment-file == ''
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.3
        with:
          python-version: ${{ inputs.python-version }}
          environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}
@ -97,7 +97,7 @@ jobs:
      # environment even though the arch is x86-64
      - name: Setup miniconda using the provided environment file
        if: inputs.environment-file != ''
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.3
        with:
          python-version: ${{ inputs.python-version }}
          environment-file: ${{ inputs.environment-file }}
@ -207,4 +207,4 @@ jobs:
      - name: Clean up disk space
        if: always()
        continue-on-error: true
-        uses: pytorch/test-infra/.github/actions/check-disk-space@main
+        uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.3
--- a/.github/workflows/_mac-test-mps.yml
+++ b/.github/workflows/_mac-test-mps.yml
@ -40,7 +40,7 @@ jobs:
      reenabled-issues: ${{ steps.filter.outputs.reenabled-issues }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.3
        with:
          submodules: false

@ -81,7 +81,7 @@ jobs:
          use-gha: true

      - name: Setup miniconda
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.3
        with:
          python-version: ${{ inputs.python-version }}
          environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}
@ -159,4 +159,4 @@ jobs:
      - name: Clean up disk space
        if: always()
        continue-on-error: true
-        uses: pytorch/test-infra/.github/actions/check-disk-space@main
+        uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.3
--- a/.github/workflows/_mac-test.yml
+++ b/.github/workflows/_mac-test.yml
@ -24,6 +24,11 @@ on:
        default: "3.8"
        description: |
          The python version to be used. Will be 3.8 by default
+      arch:
+        required: true
+        type: string
+        description: |
+          Contains the architecture to run the tests with
      timeout-minutes:
        required: false
        type: number
@ -39,7 +44,7 @@ jobs:
    # Also ensure that we always run with the right architecture
    defaults:
      run:
-        shell: bash -e -l {0}
+        shell: arch -arch ${{ inputs.arch }} bash -e -l {0}
    strategy:
      matrix: ${{ fromJSON(inputs.test-matrix) }}
      fail-fast: false
@ -74,11 +79,11 @@ jobs:
          done

      - name: Clean up disk space before running MacOS workflow
-        uses: pytorch/test-infra/.github/actions/check-disk-space@main
+        uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.3

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.3

      - name: Download build artifacts
        uses: ./.github/actions/download-build-artifacts
@ -93,7 +98,7 @@ jobs:
          use-gha: true

      - name: Setup miniconda
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.3
        with:
          python-version: ${{ inputs.python-version }}
          environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}
@ -128,6 +133,12 @@ jobs:
          test-matrix: ${{ inputs.test-matrix }}
          job-name: ${{ steps.get-job-id.outputs.job-name }}

+      - name: Pre-process arm64 wheels
+        if: inputs.build-environment == 'macos-12-py3-arm64'
+        run: |
+          # As wheels are cross-compiled they are reported as x86_64 ones
+          ORIG_WHLNAME=$(ls -1 dist/*.whl); ARM_WHLNAME=${ORIG_WHLNAME/x86_64/arm64}; mv "${ORIG_WHLNAME}" "${ARM_WHLNAME}"
+
      - name: Set Test step time
        id: test-timeout
        shell: bash
@ -216,4 +227,4 @@ jobs:
      - name: Clean up disk space
        if: always()
        continue-on-error: true
-        uses: pytorch/test-infra/.github/actions/check-disk-space@main
+        uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.3
--- a/.github/workflows/_rocm-test.yml
+++ b/.github/workflows/_rocm-test.yml
@ -58,7 +58,7 @@ jobs:
    steps:
      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.3
        with:
          no-sudo: true

@ -80,12 +80,12 @@ jobs:

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.3
        with:
          docker-image-name: ${{ inputs.docker-image }}

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.3
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Andrey Talman	37257774c6	Triton wheel build using 2.3.x branch (#122403 ) * Triton build 2.3.x * Revert "[Release Only] Build triton using pinned version rather branch (#121765)" This reverts commit d69c4219127e2cf5d9637b0daacc0a24e65f8133. * Triton wheel change * release	2024-03-21 12:52:21 -04:00
shunting314	c4e5434423	necessary change to make torch2.3 work with triton2.2 (#122139 )	2024-03-21 08:24:53 -04:00
pytorchbot	b4f90aae1b	CI: Specify libc and libstdcxx versions in conda environments (#121929 ) Without this we get mismatches between the GLIBC and GLIBCXX ABI used by conda packages vs pytorch. Pull Request resolved: https://github.com/pytorch/pytorch/pull/121556 Approved by: https://github.com/isuruf, https://github.com/malfet (cherry picked from commit 7a53dedb07ed72b85d1e083ce38c43c7810fc5f1) Co-authored-by: Peter Bell <peterbell10@live.co.uk>	2024-03-14 17:56:46 -04:00
Andrey Talman	94d6463255	[RELEASE ONLY CHANGES] Increase timeout for linux binary jobs, fix workflow lint (#121851 ) * [release only] Increase timeout job for linux binary builds by 30min * fix lint	2024-03-13 19:50:57 -04:00
Andrey Talman	6a89a753b1	[RELEASE ONLY CHANGES] Apply release only changes Release 2.3 (#121813 ) * [Release only changes] Release only changes #2 * common+lint	2024-03-13 11:03:48 -04:00
Andrey Talman	d69c421912	[Release Only] Build triton using pinned version rather branch (#121765 )	2024-03-12 19:05:23 -04:00
Andrey Talman	6725db07ae	[RELEASE ONLY CHANGES] Apply release only changes Release 2.3 (#121726 ) * Apply release only changes * temp changes * tweak * fix * Revert "tweak" This reverts commit 38edcac21448829ac114c73423c84614628e2598.	2024-03-12 18:14:35 -04:00
 @ -1 +1 @@
 .0.0
 .3.0