change the test wheel to release wheel when release wheel available (#145884 )

change the test wheel to release wheel when release wheel available (#145252) change the test wheel to release wheel when release wheel available Pull Request resolved: https://github.com/pytorch/pytorch/pull/145252 Approved by: https://github.com/seemethere, https://github.com/atalman Co-authored-by: Nikita Shulga <2453524+malfet@users.noreply.github.com> (cherry picked from commit 9003d81144fcda2d96814cf9126dbe2b9deb7de7) Co-authored-by: Zheng, Zhaoqiong <zhaoqiong.zheng@intel.com>
[CUDA] Change slim-wheel libraries load order (#145662 )
2025-10-25 08:11:06 +08:00 · 2025-01-28 16:09:34 -08:00 · 2025-01-24 14:54:25 -08:00 · 2025-01-24 08:40:13 -08:00 · 2025-01-24 08:39:52 -08:00 · 2025-01-24 09:16:57 -05:00
4055 changed files with 30621 additions and 310368 deletions
--- a/.ci/aarch64_linux/aarch64_ci_build.sh
+++ b/.ci/aarch64_linux/aarch64_ci_build.sh
@ -3,9 +3,6 @@ set -eux -o pipefail

 GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-}

-# cuda arm build for Grace Hopper solely
-export TORCH_CUDA_ARCH_LIST="9.0"
-
 SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
 source $SCRIPTPATH/aarch64_ci_setup.sh

--- a/.ci/aarch64_linux/aarch64_ci_setup.sh
+++ b/.ci/aarch64_linux/aarch64_ci_setup.sh
@ -5,16 +5,14 @@ set -eux -o pipefail
 # By creating symlinks from desired /opt/python to /usr/local/bin/

 NUMPY_VERSION=2.0.2
-PYGIT2_VERSION=1.15.1
-if [[ "$DESIRED_PYTHON"  == "3.13" ]]; then
+if [[ "$DESIRED_PYTHON"  == "3.13" || "$DESIRED_PYTHON" == "3.13t" ]]; then
    NUMPY_VERSION=2.1.2
-    PYGIT2_VERSION=1.16.0
 fi

 SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
 source $SCRIPTPATH/../manywheel/set_desired_python.sh

-pip install -q numpy==${NUMPY_VERSION} pyyaml==6.0.2 scons==4.7.0 ninja==1.11.1 patchelf==0.17.2 pygit2==${PYGIT2_VERSION}
+pip install -q numpy==${NUMPY_VERSION} pyyaml==6.0.2 scons==4.7.0 ninja==1.11.1 patchelf==0.17.2

 for tool in python python3 pip pip3 ninja scons patchelf; do
    ln -sf ${DESIRED_PYTHON_BIN_DIR}/${tool} /usr/local/bin;
--- a/.ci/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/.ci/aarch64_linux/aarch64_wheel_ci_build.py
@ -6,8 +6,6 @@ import shutil
 from subprocess import check_call, check_output
 from typing import List

-from pygit2 import Repository
-

 def list_dir(path: str) -> List[str]:
    """'
@ -171,10 +169,9 @@ if __name__ == "__main__":
    args = parse_arguments()
    enable_mkldnn = args.enable_mkldnn
    enable_cuda = args.enable_cuda
-    repo = Repository("/pytorch")
-    branch = repo.head.name
-    if branch == "HEAD":
-        branch = "master"
+    branch = check_output(
+        ["git", "rev-parse", "--abbrev-ref", "HEAD"], cwd="/pytorch"
+    ).decode()

    print("Building PyTorch wheel")
    build_vars = "MAX_JOBS=5 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
@ -186,7 +183,7 @@ if __name__ == "__main__":
        build_vars += (
            f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 "
        )
-    elif branch in ["nightly", "master"]:
+    elif branch in ["nightly", "main"]:
        build_date = (
            check_output(["git", "log", "--pretty=format:%cs", "-1"], cwd="/pytorch")
            .decode()
--- a/.ci/aarch64_linux/build_aarch64_wheel.py
+++ b/.ci/aarch64_linux/build_aarch64_wheel.py
@ -619,11 +619,9 @@ def build_torchaudio(
    if host.using_docker():
        build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"

-    host.run_cmd(
-        f"cd audio && export FFMPEG_ROOT=$(pwd)/third_party/ffmpeg && export USE_FFMPEG=1 \
+    host.run_cmd(f"cd audio && export FFMPEG_ROOT=$(pwd)/third_party/ffmpeg && export USE_FFMPEG=1 \
        && ./packaging/ffmpeg/build.sh \
-        && {build_vars} python3 setup.py bdist_wheel"
-    )
+        && {build_vars} python3 setup.py bdist_wheel")

    wheel_name = host.list_dir("audio/dist")[0]
    embed_libgomp(host, use_conda, os.path.join("audio", "dist", wheel_name))
--- a/.ci/docker/aotriton_version.txt
+++ b/.ci/docker/aotriton_version.txt
@ -0,0 +1,5 @@
+0.8b
+manylinux_2_28
+rocm6.2
+6f8cbcac8a92775291bb1ba8f514d4beb350baf4
+e938def5d32869fe2e00aec0300f354c9f157867bebdf2e104d732b94cb238d8
--- a/.ci/docker/build.sh
+++ b/.ci/docker/build.sh
@ -208,6 +208,20 @@ case "$image" in
    CONDA_CMAKE=yes
    TRITON=yes
    ;;
+  pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9)
+    CUDA_VERSION=12.4.1
+    CUDNN_VERSION=9
+    ANACONDA_PYTHON_VERSION=3.10
+    GCC_VERSION=9
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    KATEX=yes
+    UCX_COMMIT=${_UCX_COMMIT}
+    UCC_COMMIT=${_UCC_COMMIT}
+    CONDA_CMAKE=yes
+    TRITON=yes
+    ;;
  pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9)
    CUDA_VERSION=12.1.1
    CUDNN_VERSION=9
@ -222,6 +236,20 @@ case "$image" in
    CONDA_CMAKE=yes
    TRITON=yes
    ;;
+  pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9)
+    CUDA_VERSION=12.4.1
+    CUDNN_VERSION=9
+    ANACONDA_PYTHON_VERSION=3.10
+    GCC_VERSION=9
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    KATEX=yes
+    UCX_COMMIT=${_UCX_COMMIT}
+    UCC_COMMIT=${_UCC_COMMIT}
+    CONDA_CMAKE=yes
+    TRITON=yes
+    ;;
  pytorch-linux-focal-py3-clang10-onnx)
    ANACONDA_PYTHON_VERSION=3.9
    CLANG_VERSION=10
@ -268,7 +296,7 @@ case "$image" in
    PROTOBUF=yes
    DB=yes
    VISION=yes
-    ROCM_VERSION=6.2.4
+    ROCM_VERSION=6.1
    NINJA_VERSION=1.9.0
    CONDA_CMAKE=yes
    TRITON=yes
@ -279,7 +307,7 @@ case "$image" in
    PROTOBUF=yes
    DB=yes
    VISION=yes
-    ROCM_VERSION=6.3
+    ROCM_VERSION=6.2.4
    NINJA_VERSION=1.9.0
    CONDA_CMAKE=yes
    TRITON=yes
@ -497,7 +525,7 @@ docker build \
       --build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
       --build-arg "KATEX=${KATEX:-}" \
       --build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
-       --build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx90a;gfx942}" \
+       --build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx90a}" \
       --build-arg "IMAGE_NAME=${IMAGE_NAME}" \
       --build-arg "UCX_COMMIT=${UCX_COMMIT}" \
       --build-arg "UCC_COMMIT=${UCC_COMMIT}" \
--- a/.ci/docker/centos-rocm/Dockerfile
+++ b/.ci/docker/centos-rocm/Dockerfile
@ -113,6 +113,13 @@ COPY triton_version.txt triton_version.txt
 RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
 RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt

+# Install AOTriton (Early fail)
+COPY ./aotriton_version.txt aotriton_version.txt
+COPY ./common/common_utils.sh common_utils.sh
+COPY ./common/install_aotriton.sh install_aotriton.sh
+RUN ["/bin/bash", "-c", "./install_aotriton.sh /opt/rocm && rm -rf install_aotriton.sh aotriton_version.txt common_utils.sh"]
+ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton
+
 # Install ccache/sccache (do this last, so we get priority in PATH)
 COPY ./common/install_cache.sh install_cache.sh
 ENV PATH /opt/cache/bin:$PATH
--- a/.ci/docker/ci_commit_pins/executorch.txt
+++ b/.ci/docker/ci_commit_pins/executorch.txt
@ -1 +1 @@
-a29b208a06ab378bb29ab1aa68932e412f8e09f1
+6f638937d64e3396793956d75ee3e14802022745
--- a/.ci/docker/ci_commit_pins/triton.txt
+++ b/.ci/docker/ci_commit_pins/triton.txt
@ -1 +1 @@
-0d4682f073ded4d1a8260dd4208a43d735ae3a2b
+35c6c7c6284582b3f41c71c150e11b517acf074a
--- a/.ci/docker/common/install_acl.sh
+++ b/.ci/docker/common/install_acl.sh
@ -1,7 +1,7 @@
 set -euo pipefail

 readonly version=v24.04
-readonly src_host=https://github.com/ARM-software
+readonly src_host=https://review.mlplatform.org/ml
 readonly src_repo=ComputeLibrary

 # Clone ACL
--- a/.ci/docker/common/install_aotriton.sh
+++ b/.ci/docker/common/install_aotriton.sh
@ -0,0 +1,23 @@
+#!/bin/bash
+
+set -ex
+
+source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
+
+TARBALL='aotriton.tar.gz'
+# This read command alwasy returns with exit code 1
+read -d "\n" VER MANYLINUX ROCMBASE PINNED_COMMIT SHA256 < aotriton_version.txt || true
+ARCH=$(uname -m)
+AOTRITON_INSTALL_PREFIX="$1"
+AOTRITON_URL="https://github.com/ROCm/aotriton/releases/download/${VER}/aotriton-${VER}-${MANYLINUX}_${ARCH}-${ROCMBASE}-shared.tar.gz"
+
+cd "${AOTRITON_INSTALL_PREFIX}"
+# Must use -L to follow redirects
+curl -L --retry 3 -o "${TARBALL}" "${AOTRITON_URL}"
+ACTUAL_SHA256=$(sha256sum "${TARBALL}" | cut -d " " -f 1)
+if [ "${SHA256}" != "${ACTUAL_SHA256}" ]; then
+  echo -n "Error: The SHA256 of downloaded tarball is ${ACTUAL_SHA256},"
+  echo " which does not match the expected value ${SHA256}."
+  exit
+fi
+tar xf "${TARBALL}" && rm -rf "${TARBALL}"
--- a/.ci/docker/common/install_cache.sh
+++ b/.ci/docker/common/install_cache.sh
@ -9,7 +9,7 @@ install_ubuntu() {
  # Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``
  apt-get install -y cargo
  echo "Checking out sccache repo"
-  git clone https://github.com/mozilla/sccache -b v0.9.0
+  git clone https://github.com/mozilla/sccache -b v0.8.2
  cd sccache
  echo "Building sccache"
  cargo build --release
@ -36,7 +36,11 @@ sed -e 's|PATH="\(.*\)"|PATH="/opt/cache/bin:\1"|g' -i /etc/environment
 export PATH="/opt/cache/bin:$PATH"

 # Setup compiler cache
-install_ubuntu
+if [ -n "$ROCM_VERSION" ]; then
+  curl --retry 3 http://repo.radeon.com/misc/.sccache_amd/sccache -o /opt/cache/bin/sccache
+else
+  install_ubuntu
+fi
 chmod a+x /opt/cache/bin/sccache

 function write_sccache_stub() {
--- a/.ci/docker/common/install_cpython.sh
+++ b/.ci/docker/common/install_cpython.sh
@ -70,7 +70,7 @@ function do_cpython_build {
    # install setuptools since python 3.12 is required to use distutils
    ${prefix}/bin/pip install wheel==0.34.2 setuptools==68.2.2
    local abi_tag=$(${prefix}/bin/python -c "from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag; print('{0}{1}-{2}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag()))")
-    ln -sf ${prefix} /opt/python/${abi_tag}
+    ln -s ${prefix} /opt/python/${abi_tag}
 }

 function build_cpython {
--- a/.ci/docker/common/install_rocm.sh
+++ b/.ci/docker/common/install_rocm.sh
@ -62,22 +62,6 @@ install_ubuntu() {
        sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
    done

-    # ROCm 6.3 had a regression where initializing static code objects had significant overhead
-    if [[ $(ver $ROCM_VERSION) -eq $(ver 6.3) ]]; then
-        # clr build needs CppHeaderParser but can only find it using conda's python
-        /opt/conda/bin/python -m pip install CppHeaderParser
-        git clone https://github.com/ROCm/HIP -b rocm-6.3.x
-        HIP_COMMON_DIR=$(readlink -f HIP)
-        git clone https://github.com/jeffdaily/clr -b release/rocm-rel-6.3-statco-hotfix
-        mkdir -p clr/build
-        pushd clr/build
-        cmake .. -DCLR_BUILD_HIP=ON -DHIP_COMMON_DIR=$HIP_COMMON_DIR
-        make -j
-        cp hipamd/lib/libamdhip64.so.6.3.* /opt/rocm/lib/libamdhip64.so.6.3.*
-        popd
-        rm -rf HIP clr
-    fi
-
    # Cleanup
    apt-get autoclean && apt-get clean
    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
--- a/.ci/docker/libtorch/Dockerfile
+++ b/.ci/docker/libtorch/Dockerfile
@ -92,6 +92,13 @@ RUN apt-get update -y && \
 RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
 RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh

+# Install AOTriton
+COPY ./common/common_utils.sh common_utils.sh
+COPY ./aotriton_version.txt aotriton_version.txt
+COPY ./common/install_aotriton.sh install_aotriton.sh
+RUN bash ./install_aotriton.sh /opt/rocm && rm install_aotriton.sh aotriton_version.txt
+ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton
+
 FROM ${BASE_TARGET} as final
 COPY --from=openssl            /opt/openssl           /opt/openssl
 # Install patchelf
--- a/.ci/docker/manywheel/Dockerfile
+++ b/.ci/docker/manywheel/Dockerfile
@ -198,3 +198,10 @@ ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
 RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
 ADD ./common/install_miopen.sh install_miopen.sh
 RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
+
+# Install AOTriton
+COPY ./common/common_utils.sh common_utils.sh
+COPY ./aotriton_version.txt aotriton_version.txt
+COPY ./common/install_aotriton.sh install_aotriton.sh
+RUN bash ./install_aotriton.sh /opt/rocm && rm install_aotriton.sh aotriton_version.txt
+ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton
--- a/.ci/docker/requirements-ci.txt
+++ b/.ci/docker/requirements-ci.txt
@ -30,10 +30,10 @@ dill==0.3.7
 #Pinned versions: 0.3.7
 #test that import: dynamo/test_replay_record.py test_dataloader.py test_datapipe.py test_serialization.py

-expecttest==0.3.0
+expecttest==0.2.1
 #Description: method for writing tests where test framework auto populates
 # the expected output based on previous runs
-#Pinned versions: 0.3.0
+#Pinned versions: 0.2.1
 #test that import:

 fbscribelogger==0.1.7
@ -280,9 +280,9 @@ unittest-xml-reporting<=3.2.0,>=2.0.0
 #test that import:

 #lintrunner is supported on aarch64-linux only from 0.12.4 version
-lintrunner==0.12.7
+lintrunner==0.12.5
 #Description: all about linters!
-#Pinned versions: 0.12.7
+#Pinned versions: 0.12.5
 #test that import:

 redis>=4.0.0
@ -294,7 +294,7 @@ ghstack==0.8.0
 #Pinned versions: 0.8.0
 #test that import:

-jinja2==3.1.5
+jinja2==3.1.4
 #Description: jinja2 template engine
 #Pinned versions: 3.1.4
 #test that import:
@ -304,7 +304,7 @@ pytest-cpp==2.3.0
 #Pinned versions: 2.3.0
 #test that import:

-z3-solver==4.12.6.0
+z3-solver==4.12.2.0
 #Description: The Z3 Theorem Prover Project
 #Pinned versions:
 #test that import:
--- a/.ci/docker/ubuntu-rocm/Dockerfile
+++ b/.ci/docker/ubuntu-rocm/Dockerfile
@ -107,11 +107,12 @@ COPY triton_version.txt triton_version.txt
 RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
 RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt

-# This is needed by sccache
-COPY ./common/install_openssl.sh install_openssl.sh
-ENV OPENSSL_ROOT_DIR /opt/openssl
-RUN bash ./install_openssl.sh
-ENV OPENSSL_DIR /opt/openssl
+# Install AOTriton
+COPY ./aotriton_version.txt aotriton_version.txt
+COPY ./common/common_utils.sh common_utils.sh
+COPY ./common/install_aotriton.sh install_aotriton.sh
+RUN ["/bin/bash", "-c", "./install_aotriton.sh /opt/rocm && rm -rf install_aotriton.sh aotriton_version.txt common_utils.sh"]
+ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton

 # Install ccache/sccache (do this last, so we get priority in PATH)
 COPY ./common/install_cache.sh install_cache.sh
--- a/.ci/manywheel/build_cuda.sh
+++ b/.ci/manywheel/build_cuda.sh
@ -43,6 +43,13 @@ if [[ -n "$DESIRED_CUDA" ]]; then
        fi
    fi
    echo "Using CUDA $CUDA_VERSION as determined by DESIRED_CUDA"
+
+    # There really has to be a better way to do this - eli
+    # Possibly limiting builds to specific cuda versions be delimiting images would be a choice
+    if [[ "$OS_NAME" == *"Ubuntu"* ]]; then
+        echo "Switching to CUDA version ${DESIRED_CUDA}"
+        /builder/conda/switch_cuda_version.sh "${DESIRED_CUDA}"
+    fi
 else
    CUDA_VERSION=$(nvcc --version|grep release|cut -f5 -d" "|cut -f1 -d",")
    echo "CUDA $CUDA_VERSION Detected"
@ -53,10 +60,22 @@ cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.')
 TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6"
 case ${CUDA_VERSION} in
    12.6)
-        TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0+PTX"
+        if [[ "$GPU_ARCH_TYPE" = "cuda-aarch64" ]]; then
+            TORCH_CUDA_ARCH_LIST="9.0"
+        else
+            TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0"
+        fi
        EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
        ;;
    12.4)
+        if [[ "$GPU_ARCH_TYPE" = "cuda-aarch64" ]]; then
+            TORCH_CUDA_ARCH_LIST="9.0"
+        else
+            TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0"
+        fi
+        EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
+        ;;
+    12.1)
        TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0"
        EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
        ;;
@ -256,7 +275,7 @@ else
    exit 1
 fi

-# run_tests.sh requires DESIRED_CUDA to know what tests to exclude
+# builder/test.sh requires DESIRED_CUDA to know what tests to exclude
 export DESIRED_CUDA="$cuda_version_nodot"

 # Switch `/usr/local/cuda` to the desired CUDA version
--- a/.ci/manywheel/build_rocm.sh
+++ b/.ci/manywheel/build_rocm.sh
@ -118,7 +118,7 @@ if [[ "$OS_NAME" == *"CentOS Linux"* || "$OS_NAME" == *"AlmaLinux"* ]]; then
    fi
    LIBDRM_PATH="/opt/amdgpu/lib64/libdrm.so.2"
    LIBDRM_AMDGPU_PATH="/opt/amdgpu/lib64/libdrm_amdgpu.so.1"
-    if [[ $ROCM_INT -ge 60100 && $ROCM_INT -lt 60300 ]]; then
+    if [[ $ROCM_INT -ge 60100 ]]; then
        # Below libs are direct dependencies of libhipsolver
        LIBSUITESPARSE_CONFIG_PATH="/lib64/libsuitesparseconfig.so.4"
        if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
@ -151,7 +151,7 @@ elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
    fi
    LIBDRM_PATH="/usr/lib/x86_64-linux-gnu/libdrm.so.2"
    LIBDRM_AMDGPU_PATH="/usr/lib/x86_64-linux-gnu/libdrm_amdgpu.so.1"
-    if [[ $ROCM_INT -ge 60100 && $ROCM_INT -lt 60300 ]]; then
+    if [[ $ROCM_INT -ge 60100 ]]; then
        # Below libs are direct dependencies of libhipsolver
        LIBCHOLMOD_PATH="/lib/x86_64-linux-gnu/libcholmod.so.3"
        # Below libs are direct dependencies of libcholmod
@ -186,6 +186,15 @@ do
    OS_SO_FILES[${#OS_SO_FILES[@]}]=$file_name # Append lib to array
 done

+# FIXME: Temporary until https://github.com/pytorch/pytorch/pull/137443 lands
+# Install AOTriton
+if [ -e ${PYTORCH_ROOT}/.ci/docker/aotriton_version.txt ]; then
+    cp -a ${PYTORCH_ROOT}/.ci/docker/aotriton_version.txt aotriton_version.txt
+    bash ${PYTORCH_ROOT}/.ci/docker/common/install_aotriton.sh ${ROCM_HOME} && rm aotriton_version.txt
+    export AOTRITON_INSTALLED_PREFIX=${ROCM_HOME}/aotriton
+    ROCM_SO_FILES+=("libaotriton_v2.so")
+fi
+
 # rocBLAS library files
 ROCBLAS_LIB_SRC=$ROCM_HOME/lib/rocblas/library
 ROCBLAS_LIB_DST=lib/rocblas/library
@ -257,6 +266,20 @@ RCCL_SHARE_FILES=($(ls $RCCL_SHARE_SRC))
 DEPS_AUX_SRCLIST+=(${RCCL_SHARE_FILES[@]/#/$RCCL_SHARE_SRC/})
 DEPS_AUX_DSTLIST+=(${RCCL_SHARE_FILES[@]/#/$RCCL_SHARE_DST/})

+# PyTorch 2.6+ (AOTriton 0.8b+)
+# AKS = "AOTriton Kernel Storage", a file format to store GPU kernels compactly
+if (( $(echo "${PYTORCH_VERSION} 2.6" | awk '{print ($1 >= $2)}') )); then
+    LIBAOTRITON_DIR=$(find "$ROCM_HOME/lib/" -name "libaotriton_v2.so" -printf '%h\n')
+    if [[ -z ${LIBAOTRITON_DIR} ]]; then
+        LIBAOTRITON_DIR=$(find "$ROCM_HOME/" -name "libaotriton_v2.so" -printf '%h\n')
+    fi
+    AKS_FILES=($(find "${LIBAOTRITON_DIR}/aotriton.images" -type f -name '*.aks?' -printf '%P\n'))
+    AKS_SRC="${LIBAOTRITON_DIR}/aotriton.images"
+    AKS_DST="lib/aotriton.images"
+    DEPS_AUX_SRCLIST+=(${AKS_FILES[@]/#/${AKS_SRC}/})
+    DEPS_AUX_DSTLIST+=(${AKS_FILES[@]/#/${AKS_DST}/})
+fi
+
 echo "PYTORCH_ROCM_ARCH: ${PYTORCH_ROCM_ARCH}"

 SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
--- a/.ci/pytorch/build.sh
+++ b/.ci/pytorch/build.sh
@ -228,7 +228,7 @@ if [[ "$BUILD_ENVIRONMENT" == *-debug* ]]; then
  export CMAKE_BUILD_TYPE=RelWithAssert
 fi

-# Do not change workspace permissions for ROCm and s390x CI jobs
+# Do not change workspace permissions for ROCm CI jobs
 # as it can leave workspace with bad permissions for cancelled jobs
 if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && -d /var/lib/jenkins/workspace ]]; then
  # Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96)
@ -247,7 +247,7 @@ if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && -d /v
 fi

 if [[ "$BUILD_ENVIRONMENT" == *-bazel-* ]]; then
-  set -e -o pipefail
+  set -e

  get_bazel

@ -278,7 +278,7 @@ else
          "$BUILD_ENVIRONMENT" != *xla* ]]; then
      if [[ "$BUILD_ENVIRONMENT" != *py3.8* ]]; then
        # Install numpy-2.0.2 for builds which are backward compatible with 1.X
-        python -mpip install numpy==2.0.2
+        python -mpip install --pre numpy==2.0.2
      fi

      WERROR=1 python setup.py clean
--- a/.ci/pytorch/common.sh
+++ b/.ci/pytorch/common.sh
@ -3,7 +3,7 @@
 # Common setup for all Jenkins scripts
 # shellcheck source=./common_utils.sh
 source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
-set -ex -o pipefail
+set -ex

 # Required environment variables:
 #   $BUILD_ENVIRONMENT (should be set by your Docker image)
--- a/.ci/pytorch/common_utils.sh
+++ b/.ci/pytorch/common_utils.sh
@ -160,7 +160,7 @@ function install_torchvision() {
 }

 function install_tlparse() {
-  pip_install --user "tlparse==0.3.30"
+  pip_install --user "tlparse==0.3.25"
  PATH="$(python -m site --user-base)/bin:$PATH"
 }

@ -192,7 +192,7 @@ function install_torchrec_and_fbgemm() {

 function clone_pytorch_xla() {
  if [[ ! -d ./xla ]]; then
-    git clone --recursive --quiet https://github.com/pytorch/xla.git
+    git clone --recursive -b r2.6 https://github.com/pytorch/xla.git
    pushd xla
    # pin the xla hash so that we don't get broken by changes to xla
    git checkout "$(cat ../.github/ci_commit_pins/xla.txt)"
--- a/.ci/pytorch/cpp_doc_push_script.sh
+++ b/.ci/pytorch/cpp_doc_push_script.sh
@ -40,7 +40,7 @@ echo "Building PyTorch C++ API docs..."
 rm -rf cppdocs
 git clone https://github.com/pytorch/cppdocs

-set -ex -o pipefail
+set -ex

 # Generate ATen files
 pushd "${pt_checkout}"
--- a/.ci/pytorch/functorch_doc_push_script.sh
+++ b/.ci/pytorch/functorch_doc_push_script.sh
@ -5,7 +5,7 @@ pt_checkout="/var/lib/jenkins/workspace"
 source "$pt_checkout/.ci/pytorch/common_utils.sh"
 echo "functorch_doc_push_script.sh: Invoked with $*"

-set -ex -o pipefail
+set -ex

 version=${DOCS_VERSION:-nightly}
 echo "version: $version"
--- a/.ci/pytorch/install_cache_xla.sh
+++ b/.ci/pytorch/install_cache_xla.sh
@ -6,7 +6,7 @@
 # return the same thing, ex checks for for rocm, CUDA, and changing the path
 # where sccache is installed, and not changing /etc/environment.

-set -ex -o pipefail
+set -ex

 install_binary() {
  echo "Downloading sccache binary from S3 repo"
--- a/.ci/pytorch/multigpu-test.sh
+++ b/.ci/pytorch/multigpu-test.sh
@ -8,62 +8,55 @@
 source "$(dirname "${BASH_SOURCE[0]}")/common.sh"

 echo "Testing pytorch"
-# When adding more tests, please use HUD to see which shard is shorter
-if [[ "${SHARD_NUMBER:-1}" == "1" ]]; then
-    # FSDP tests
-    for f in test/distributed/fsdp/*.py ; do time python test/run_test.py --verbose -i "${f#*/}" ; done
-fi
+time python test/run_test.py --include test_cuda_multigpu test_cuda_primary_ctx --verbose

-if [[ "${SHARD_NUMBER:-2}" == "2" ]]; then
-    time python test/run_test.py --include test_cuda_multigpu test_cuda_primary_ctx --verbose
+# Disabling tests to see if they solve timeout issues; see https://github.com/pytorch/pytorch/issues/70015
+# python tools/download_mnist.py --quiet -d test/cpp/api/mnist
+# OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="test/cpp/api/mnist" build/bin/test_api
+time python test/run_test.py --verbose -i distributed/test_c10d_common
+time python test/run_test.py --verbose -i distributed/test_c10d_gloo
+time python test/run_test.py --verbose -i distributed/test_c10d_nccl
+time python test/run_test.py --verbose -i distributed/test_c10d_spawn_gloo
+time python test/run_test.py --verbose -i distributed/test_c10d_spawn_nccl
+time python test/run_test.py --verbose -i distributed/test_compute_comm_reordering
+time python test/run_test.py --verbose -i distributed/test_store
+time python test/run_test.py --verbose -i distributed/test_symmetric_memory
+time python test/run_test.py --verbose -i distributed/test_pg_wrapper
+time python test/run_test.py --verbose -i distributed/rpc/cuda/test_tensorpipe_agent
+# FSDP tests
+for f in test/distributed/fsdp/*.py ; do time python test/run_test.py --verbose -i "${f#*/}" ; done
+# ShardedTensor tests
+time python test/run_test.py --verbose -i distributed/checkpoint/test_checkpoint
+time python test/run_test.py --verbose -i distributed/checkpoint/test_file_system_checkpoint
+time python test/run_test.py --verbose -i distributed/_shard/sharding_spec/test_sharding_spec
+time python test/run_test.py --verbose -i distributed/_shard/sharding_plan/test_sharding_plan
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor_reshard

-    # Disabling tests to see if they solve timeout issues; see https://github.com/pytorch/pytorch/issues/70015
-    # python tools/download_mnist.py --quiet -d test/cpp/api/mnist
-    # OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="test/cpp/api/mnist" build/bin/test_api
-    time python test/run_test.py --verbose -i distributed/test_c10d_common
-    time python test/run_test.py --verbose -i distributed/test_c10d_gloo
-    time python test/run_test.py --verbose -i distributed/test_c10d_nccl
-    time python test/run_test.py --verbose -i distributed/test_c10d_spawn_gloo
-    time python test/run_test.py --verbose -i distributed/test_c10d_spawn_nccl
-    time python test/run_test.py --verbose -i distributed/test_compute_comm_reordering
-    time python test/run_test.py --verbose -i distributed/test_store
-    time python test/run_test.py --verbose -i distributed/test_symmetric_memory
-    time python test/run_test.py --verbose -i distributed/test_pg_wrapper
-    time python test/run_test.py --verbose -i distributed/rpc/cuda/test_tensorpipe_agent
+# functional collective tests
+time python test/run_test.py --verbose -i distributed/test_functional_api

-    # ShardedTensor tests
-    time python test/run_test.py --verbose -i distributed/checkpoint/test_checkpoint
-    time python test/run_test.py --verbose -i distributed/checkpoint/test_file_system_checkpoint
-    time python test/run_test.py --verbose -i distributed/_shard/sharding_spec/test_sharding_spec
-    time python test/run_test.py --verbose -i distributed/_shard/sharding_plan/test_sharding_plan
-    time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor
-    time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor_reshard
+# DTensor tests
+time python test/run_test.py --verbose -i distributed/_tensor/test_random_ops
+time python test/run_test.py --verbose -i distributed/_tensor/test_dtensor_compile

-    # functional collective tests
-    time python test/run_test.py --verbose -i distributed/test_functional_api
+# DeviceMesh test
+time python test/run_test.py --verbose -i distributed/test_device_mesh

-    # DTensor tests
-    time python test/run_test.py --verbose -i distributed/tensor/test_random_ops
-    time python test/run_test.py --verbose -i distributed/tensor/test_dtensor_compile
+# DTensor/TP tests
+time python test/run_test.py --verbose -i distributed/tensor/parallel/test_tp_examples
+time python test/run_test.py --verbose -i distributed/tensor/parallel/test_tp_random_state

-    # DeviceMesh test
-    time python test/run_test.py --verbose -i distributed/test_device_mesh
+# FSDP2 tests
+time python test/run_test.py --verbose -i distributed/_composable/fsdp/test_fully_shard_training -- -k test_2d_mlp_with_nd_mesh

-    # DTensor/TP tests
-    time python test/run_test.py --verbose -i distributed/tensor/parallel/test_tp_examples
-    time python test/run_test.py --verbose -i distributed/tensor/parallel/test_tp_random_state
+# ND composability tests
+time python test/run_test.py --verbose -i distributed/_composable/test_composability/test_2d_composability
+time python test/run_test.py --verbose -i distributed/_composable/test_composability/test_pp_composability

-    # FSDP2 tests
-    time python test/run_test.py --verbose -i distributed/_composable/fsdp/test_fully_shard_training -- -k test_2d_mlp_with_nd_mesh
-
-    # ND composability tests
-    time python test/run_test.py --verbose -i distributed/_composable/test_composability/test_2d_composability
-    time python test/run_test.py --verbose -i distributed/_composable/test_composability/test_pp_composability
-
-    # Other tests
-    time python test/run_test.py --verbose -i test_cuda_primary_ctx
-    time python test/run_test.py --verbose -i test_optim -- -k test_forloop_goes_right_direction_multigpu
-    time python test/run_test.py --verbose -i test_optim -- -k test_mixed_device_dtype
-    time python test/run_test.py --verbose -i test_foreach -- -k test_tensors_grouping
-fi
+# Other tests
+time python test/run_test.py --verbose -i test_cuda_primary_ctx
+time python test/run_test.py --verbose -i test_optim -- -k test_forloop_goes_right_direction_multigpu
+time python test/run_test.py --verbose -i test_optim -- -k test_mixed_device_dtype
+time python test/run_test.py --verbose -i test_foreach -- -k test_tensors_grouping
 assert_git_not_dirty
--- a/.ci/pytorch/python_doc_push_script.sh
+++ b/.ci/pytorch/python_doc_push_script.sh
@ -7,7 +7,7 @@ source "$pt_checkout/.ci/pytorch/common_utils.sh"

 echo "python_doc_push_script.sh: Invoked with $*"

-set -ex -o pipefail
+set -ex

 # for statements like ${1:-${DOCS_INSTALL_PATH:-docs/}}
 # the order of operations goes:
@ -63,7 +63,7 @@ build_docs () {
    echo "(tried to echo the WARNINGS above the ==== line)"
    echo =========================
  fi
-  set -ex -o pipefail
+  set -ex
  return $code
 }

--- a/.ci/pytorch/run_tests.sh
+++ b/.ci/pytorch/run_tests.sh
@ -13,7 +13,7 @@ set -eux -o pipefail

 # This script expects to be in the pytorch root folder
 if [[ ! -d 'test' || ! -f 'test/run_test.py' ]]; then
-    echo "run_tests.sh expects to be run from the Pytorch root directory " \
+    echo "builder/test.sh expects to be run from the Pytorch root directory " \
         "but I'm actually in $(pwd)"
    exit 2
 fi
--- a/.ci/pytorch/smoke_test/smoke_test.py
+++ b/.ci/pytorch/smoke_test/smoke_test.py
@ -109,10 +109,8 @@ def check_version(package: str) -> None:
                            {release_matrix[module['name']]} for channel {channel}. But its {module_version}"
                    )
                else:
-                    print(
-                        f"{module['name']} version actual: {module_version} expected: \
-                        {release_matrix[module['name']]} for channel {channel}."
-                    )
+                    print(f"{module['name']} version actual: {module_version} expected: \
+                        {release_matrix[module['name']]} for channel {channel}.")

    else:
        print(f"Skip version check for channel {channel} as stable version is None")
@ -341,7 +339,7 @@ def smoke_test_modules():
                print(f"Output: \n{output}\n")


-def parse_args():
+def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--package",
@ -364,16 +362,9 @@ def parse_args():
        choices=["enabled", "disabled"],
        default="enabled",
    )
-    return parser.parse_args()
-
-
-def main() -> None:
-    options = parse_args()
+    options = parser.parse_args()
    print(f"torch: {torch.__version__}")
    print(torch.__config__.parallel_info())
-    # All PyTorch binary builds should be built with OpenMP
-    if not torch.backends.openmp.is_available():
-        raise RuntimeError("PyTorch must be built with OpenMP support")

    check_version(options.package)
    smoke_test_conv2d()
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@ -4,7 +4,7 @@
 # (This is set by default in the Docker images we build, so you don't
 # need to set it yourself.

-set -ex -o pipefail
+set -ex

 # Suppress ANSI color escape sequences
 export TERM=vt100
@ -12,9 +12,9 @@ export TERM=vt100
 # shellcheck source=./common.sh
 source "$(dirname "${BASH_SOURCE[0]}")/common.sh"

-# Do not change workspace permissions for ROCm and s390x CI jobs
+# Do not change workspace permissions for ROCm CI jobs
 # as it can leave workspace with bad permissions for cancelled jobs
-if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && -d /var/lib/jenkins/workspace ]]; then
+if [[ "$BUILD_ENVIRONMENT" != *rocm* && -d /var/lib/jenkins/workspace ]]; then
  # Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96)
  WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace")
  cleanup_workspace() {
@ -86,13 +86,6 @@ if [[ "$BUILD_ENVIRONMENT" == *clang9* || "$BUILD_ENVIRONMENT" == *xpu* ]]; then
  export VALGRIND=OFF
 fi

-
-if [[ "$BUILD_ENVIRONMENT" == *s390x* ]]; then
-  # There are additional warnings on s390x, maybe due to newer gcc.
-  # Skip this check for now
-  export VALGRIND=OFF
-fi
-
 if [[ "${PYTORCH_TEST_RERUN_DISABLED_TESTS}" == "1" ]] || [[ "${CONTINUE_THROUGH_ERROR}" == "1" ]]; then
  # When rerunning disable tests, do not generate core dumps as it could consume
  # the runner disk space when crashed tests are run multiple times. Running out
@ -136,7 +129,7 @@ if [[ "$TEST_CONFIG" == 'default' ]]; then
 fi

 if [[ "$TEST_CONFIG" == 'distributed' ]] && [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
-  export HIP_VISIBLE_DEVICES=0,1,2,3
+  export HIP_VISIBLE_DEVICES=0,1
 fi

 if [[ "$TEST_CONFIG" == 'slow' ]]; then
@ -160,8 +153,6 @@ elif [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
  export PYTORCH_TESTING_DEVICE_ONLY_FOR="xpu"
  # setting PYTHON_TEST_EXTRA_OPTION
  export PYTHON_TEST_EXTRA_OPTION="--xpu"
-  # Disable sccache for xpu test due to flaky issue https://github.com/pytorch/pytorch/issues/143585
-  sudo rm -rf /opt/cache
 fi

 if [[ "$TEST_CONFIG" == *crossref* ]]; then
@ -322,7 +313,6 @@ test_dynamo_wrapped_shard() {
    --exclude-jit-executor \
    --exclude-distributed-tests \
    --exclude-torch-export-tests \
-    --exclude-aot-dispatch-tests \
    --shard "$1" "$NUM_TEST_SHARDS" \
    --verbose \
    --upload-artifacts-while-running
@ -336,7 +326,7 @@ test_inductor_distributed() {
  python test/run_test.py -i inductor/test_aot_inductor.py -k test_non_default_cuda_device --verbose
  python test/run_test.py -i inductor/test_aot_inductor.py -k test_replicate_on_devices --verbose
  python test/run_test.py -i distributed/test_c10d_functional_native.py --verbose
-  python test/run_test.py -i distributed/tensor/test_dtensor_compile.py --verbose
+  python test/run_test.py -i distributed/_tensor/test_dtensor_compile.py --verbose
  python test/run_test.py -i distributed/tensor/parallel/test_micro_pipeline_tp.py --verbose
  python test/run_test.py -i distributed/_composable/test_replicate_with_compiler.py --verbose
  python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_comm.py --verbose
@ -389,29 +379,15 @@ test_inductor_aoti() {
  CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference
 }

-test_inductor_cpp_wrapper_shard() {
-  if [[ -z "$NUM_TEST_SHARDS" ]]; then
-    echo "NUM_TEST_SHARDS must be defined to run a Python test shard"
-    exit 1
-  fi
-
+test_inductor_cpp_wrapper() {
  export TORCHINDUCTOR_CPP_WRAPPER=1
  TEST_REPORTS_DIR=$(pwd)/test/test-reports
  mkdir -p "$TEST_REPORTS_DIR"

-  if [[ "$1" -eq "2" ]]; then
-    # For now, manually put the opinfo tests in shard 2, and all other tests in
-    # shard 1.  Test specific things triggering past bugs, for now.
-    python test/run_test.py \
-      --include inductor/test_torchinductor_opinfo \
-      -k 'linalg or to_sparse' \
-      --verbose
-    exit
-  fi
+  # Run certain inductor unit tests with cpp wrapper. In the end state, we should be able to run all the inductor
+  # unit tests with cpp wrapper.
+  python test/run_test.py --include inductor/test_torchinductor.py --verbose

-  # Run certain inductor unit tests with cpp wrapper. In the end state, we
-  # should be able to run all the inductor unit tests with cpp_wrapper.
-  python test/run_test.py --include inductor/test_torchinductor --verbose

  # Run inductor benchmark tests with cpp wrapper.
  # Skip benchmark tests if it's in rerun-disabled-mode.
@ -541,7 +517,7 @@ test_perf_for_dashboard() {
            --dynamic-batch-only "$@" \
            --output "$TEST_REPORTS_DIR/${backend}_dynamic_${suite}_${dtype}_${mode}_${device}_${target}.csv"
      fi
-      if [[ "$DASHBOARD_TAG" == *cppwrapper-true* ]]; then
+      if [[ "$DASHBOARD_TAG" == *cppwrapper-true* ]] && [[ "$mode" == "inference" ]]; then
        TORCHINDUCTOR_CPP_WRAPPER=1 $TASKSET python "benchmarks/dynamo/$suite.py" \
            "${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" --disable-cudagraphs "$@" \
            --output "$TEST_REPORTS_DIR/${backend}_cpp_wrapper_${suite}_${dtype}_${mode}_${device}_${target}.csv"
@ -917,20 +893,10 @@ test_libtorch_api() {
  else
    # Exclude IMethodTest that relies on torch::deploy, which will instead be ran in test_deploy
    OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="${MNIST_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_api -k "not IMethodTest"
-
-    # On s390x, pytorch is built without llvm.
-    # Even if it would be built with llvm, llvm currently doesn't support used features on s390x and
-    # test fails with errors like:
-    # JIT session error: Unsupported target machine architecture in ELF object pytorch-jitted-objectbuffer
-    # unknown file: Failure
-    # C++ exception with description "valOrErr INTERNAL ASSERT FAILED at "/var/lib/jenkins/workspace/torch/csrc/jit/tensorexpr/llvm_jit.h":34, please report a bug to PyTorch. Unexpected failure in LLVM JIT: Failed to materialize symbols: { (main, { func }) }
-    if [[ "${BUILD_ENVIRONMENT}" != *s390x* ]]; then
-      python test/run_test.py --cpp --verbose -i cpp/test_tensorexpr
-    fi
+    python test/run_test.py --cpp --verbose -i cpp/test_tensorexpr
  fi

-  # quantization is not fully supported on s390x yet
-  if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* && "${BUILD_ENVIRONMENT}" != *asan* && "${BUILD_ENVIRONMENT}" != *s390x* ]]; then
+  if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* && "${BUILD_ENVIRONMENT}" != *asan* ]]; then
    # NB: This test is not under TORCH_BIN_DIR but under BUILD_BIN_DIR
    export CPP_TESTS_DIR="${BUILD_BIN_DIR}"
    python test/run_test.py --cpp --verbose -i cpp/static_runtime_test
@ -1277,7 +1243,7 @@ EOF
 }

 test_bazel() {
-  set -e -o pipefail
+  set -e

  # bazel test needs sccache setup.
  # shellcheck source=./common-build.sh
@ -1531,7 +1497,7 @@ elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper* ]]; then
  install_torchaudio cuda
  install_torchvision
  checkout_install_torchbench hf_T5 llama moco
-  PYTHONPATH=$(pwd)/torchbench test_inductor_cpp_wrapper_shard "$SHARD_NUMBER"
+  PYTHONPATH=$(pwd)/torchbench test_inductor_cpp_wrapper
 elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
  install_torchvision
  test_inductor_shard "${SHARD_NUMBER}"
--- a/.ci/pytorch/win-build.sh
+++ b/.ci/pytorch/win-build.sh
@ -38,7 +38,7 @@ if [[ $PYLONG_API_CHECK == 0 ]]; then
  echo "PyLong_AsUnsignedLong -> THPUtils_unpackUInt32 / THPUtils_unpackUInt64"
  exit 1
 fi
-set -ex -o pipefail
+set -ex

 "$SCRIPT_HELPERS_DIR"/build_pytorch.bat

--- a/.ci/pytorch/win-test-helpers/build_pytorch.bat
+++ b/.ci/pytorch/win-test-helpers/build_pytorch.bat
@ -26,8 +26,7 @@ if not errorlevel 0 goto fail

 if "%USE_XPU%"=="1" (
  :: Install xpu support packages
-  set CUDA_VERSION=xpu
-  call %SCRIPT_HELPERS_DIR%\..\windows\internal\xpu_install.bat
+  call %INSTALLER_DIR%\install_xpu.bat
  if errorlevel 1 exit /b 1
 )

--- a/.ci/pytorch/win-test-helpers/installation-helpers/install_xpu.bat
+++ b/.ci/pytorch/win-test-helpers/installation-helpers/install_xpu.bat
@ -0,0 +1,114 @@
+@echo on
+REM Description: Install Intel Support Packages on Windows
+REM BKM reference: https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpus.html
+
+set XPU_INSTALL_MODE=%~1
+if "%XPU_INSTALL_MODE%"=="" goto xpu_bundle_install_start
+if "%XPU_INSTALL_MODE%"=="bundle" goto xpu_bundle_install_start
+if "%XPU_INSTALL_MODE%"=="driver" goto xpu_driver_install_start
+if "%XPU_INSTALL_MODE%"=="all" goto xpu_driver_install_start
+
+:arg_error
+
+echo Illegal XPU installation mode. The value can be "bundle"/"driver"/"all"
+echo If keep the value as space, will use default "bundle" mode
+exit /b 1
+
+:xpu_driver_install_start
+:: TODO Need more testing for driver installation
+set XPU_DRIVER_LINK=https://downloadmirror.intel.com/830975/gfx_win_101.5972.exe
+curl -o xpu_driver.exe --retry 3 --retry-all-errors -k %XPU_DRIVER_LINK%
+echo "XPU Driver installing..."
+start /wait "Intel XPU Driver Installer" "xpu_driver.exe"
+if errorlevel 1 exit /b 1
+del xpu_driver.exe
+if "%XPU_INSTALL_MODE%"=="driver" goto xpu_install_end
+
+:xpu_bundle_install_start
+
+set XPU_BUNDLE_PARENT_DIR=C:\Program Files (x86)\Intel\oneAPI
+set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/9d1a91e2-e8b8-40a5-8c7f-5db768a6a60c/w_intel-for-pytorch-gpu-dev_p_0.5.3.37_offline.exe
+set XPU_BUNDLE_PRODUCT_NAME=intel.oneapi.win.intel-for-pytorch-gpu-dev.product
+set XPU_BUNDLE_VERSION=0.5.3+31
+set XPU_BUNDLE_INSTALLED=0
+set XPU_BUNDLE_UNINSTALL=0
+set XPU_EXTRA_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/9d1a91e2-e8b8-40a5-8c7f-5db768a6a60c/w_intel-pti-dev_p_0.9.0.37_offline.exe
+set XPU_EXTRA_PRODUCT_NAME=intel.oneapi.win.intel-pti-dev.product
+set XPU_EXTRA_VERSION=0.9.0+36
+set XPU_EXTRA_INSTALLED=0
+set XPU_EXTRA_UNINSTALL=0
+
+if not [%XPU_VERSION%]==[] if [%XPU_VERSION%]==[2025.0] (
+    set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/efc86abd-cb77-452e-a03f-a741895b8ece/intel-deep-learning-essentials-2025.0.0.336_offline.exe
+    set XPU_BUNDLE_PRODUCT_NAME=intel.oneapi.win.deep-learning-essentials.product
+    set XPU_BUNDLE_VERSION=2025.0.0+335
+    set XPU_BUNDLE_INSTALLED=0
+    set XPU_BUNDLE_UNINSTALL=0
+    set XPU_EXTRA_URL=NULL
+    set XPU_EXTRA_PRODUCT_NAME=intel.oneapi.win.compiler.product
+    set XPU_EXTRA_VERSION=2025.0.1+1226
+    set XPU_EXTRA_INSTALLED=0
+    set XPU_EXTRA_UNINSTALL=0
+)
+
+:: Check if XPU bundle is target version or already installed
+if exist "%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" goto xpu_bundle_ver_check
+goto xpu_bundle_install
+
+:xpu_bundle_ver_check
+
+"%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" --list-products > xpu_bundle_installed_ver.log
+
+for /f "tokens=1,2" %%a in (xpu_bundle_installed_ver.log) do (
+    if "%%a"=="%XPU_BUNDLE_PRODUCT_NAME%" (
+        echo %%a Installed Version: %%b
+        set XPU_BUNDLE_INSTALLED=1
+        if not "%XPU_BUNDLE_VERSION%"=="%%b" (
+            start /wait "Installer Title" "%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" --action=remove --eula=accept --silent --product-id %%a --product-ver %%b --log-dir uninstall_bundle
+            set XPU_BUNDLE_UNINSTALL=1
+        )
+    )
+    if "%%a"=="%XPU_EXTRA_PRODUCT_NAME%" (
+        echo %%a Installed Version: %%b
+        set XPU_EXTRA_INSTALLED=1
+        if not "%XPU_EXTRA_VERSION%"=="%%b" (
+            start /wait "Installer Title" "%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" --action=remove --eula=accept --silent --product-id %%a --product-ver %%b --log-dir uninstall_bundle
+            set XPU_EXTRA_UNINSTALL=1
+        )
+    )
+    if not "%%b" == "Version" if not [%%b]==[] if not "%%a"=="%XPU_BUNDLE_PRODUCT_NAME%" if not "%%a"=="%XPU_EXTRA_PRODUCT_NAME%" (
+        echo "Uninstalling...."
+        start /wait "Installer Title" "%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" --action=remove --eula=accept --silent --product-id %%a --product-ver %%b --log-dir uninstall_bundle
+    )
+)
+if errorlevel 1 exit /b 1
+if exist xpu_bundle_installed_ver.log del xpu_bundle_installed_ver.log
+if exist uninstall_bundle rmdir /s /q uninstall_bundle
+if "%XPU_BUNDLE_INSTALLED%"=="0" goto xpu_bundle_install
+if "%XPU_BUNDLE_UNINSTALL%"=="1" goto xpu_bundle_install
+
+:xpu_extra_check
+
+if "%XPU_EXTRA_URL%"=="NULL" goto xpu_install_end
+if "%XPU_EXTRA_INSTALLED%"=="0" goto xpu_extra_install
+if "%XPU_EXTRA_UNINSTALL%"=="1" goto xpu_extra_install
+goto xpu_install_end
+
+:xpu_bundle_install
+
+curl -o xpu_bundle.exe --retry 3 --retry-all-errors -k %XPU_BUNDLE_URL%
+echo "XPU Bundle installing..."
+start /wait "Intel Pytorch Bundle Installer" "xpu_bundle.exe" --action=install --eula=accept --silent --log-dir install_bundle
+if errorlevel 1 exit /b 1
+del xpu_bundle.exe
+goto xpu_extra_check
+
+:xpu_extra_install
+
+curl -o xpu_extra.exe --retry 3 --retry-all-errors -k %XPU_EXTRA_URL%
+echo "Intel XPU EXTRA installing..."
+start /wait "Intel XPU EXTRA Installer" "xpu_extra.exe" --action=install --eula=accept --silent --log-dir install_bundle
+if errorlevel 1 exit /b 1
+del xpu_extra.exe
+
+:xpu_install_end
--- a/.ci/pytorch/win-test.sh
+++ b/.ci/pytorch/win-test.sh
@ -1,5 +1,5 @@
 #!/bin/bash
-set -ex -o pipefail
+set -ex

 SCRIPT_PARENT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
 # shellcheck source=./common.sh
@ -41,7 +41,7 @@ python -m pip install pytest-rerunfailures==10.3 pytest-cpp==2.3.0 tensorboard==
 python -m pip install z3-solver==4.12.2.0

 # Install tlparse for test\dynamo\test_structured_trace.py UTs.
-python -m pip install tlparse==0.3.30
+python -m pip install tlparse==0.3.25

 # Install parameterized
 python -m pip install parameterized==0.8.1
--- a/.ci/pytorch/windows/internal/xpu_install.bat
+++ b/.ci/pytorch/windows/internal/xpu_install.bat
@ -7,9 +7,6 @@ if not "%CUDA_VERSION%" == "xpu" (
    exit /b 0
 )

-set SRC_DIR=%NIGHTLIES_PYTORCH_ROOT%
-if not exist "%SRC_DIR%\temp_build" mkdir "%SRC_DIR%\temp_build"
-
 set XPU_INSTALL_MODE=%~1
 if "%XPU_INSTALL_MODE%"=="" goto xpu_bundle_install_start
 if "%XPU_INSTALL_MODE%"=="bundle" goto xpu_bundle_install_start
@ -104,14 +101,6 @@ goto xpu_install_end

 :xpu_bundle_install

-:: Install Level Zero SDK
-set XPU_EXTRA_LZ_URL=https://github.com/oneapi-src/level-zero/releases/download/v1.14.0/level-zero-sdk_1.14.0.zip
-curl -k -L %XPU_EXTRA_LZ_URL% --output "%SRC_DIR%\temp_build\level_zero_sdk.zip"
-echo "Installing level zero SDK..."
-7z x "%SRC_DIR%\temp_build\level_zero_sdk.zip" -o"%SRC_DIR%\temp_build\level_zero"
-set "INCLUDE=%SRC_DIR%\temp_build\level_zero\include;%INCLUDE%"
-
-:: Install Bundle
 curl -o xpu_bundle.exe --retry 3 --retry-all-errors -k %XPU_BUNDLE_URL%
 echo "XPU Bundle installing..."
 start /wait "Intel Pytorch Bundle Installer" "xpu_bundle.exe" --action=install --eula=accept --silent --log-dir install_bundle
--- a/.ci/wheel/build_wheel.sh
+++ b/.ci/wheel/build_wheel.sh
@ -226,6 +226,26 @@ if [[ -z "$BUILD_PYTHONLESS" ]]; then
    # Copy the whl to a final destination before tests are run
    echo "Renaming Wheel file: $wheel_filename_gen to $wheel_filename_new"
    cp "$whl_tmp_dir/$wheel_filename_gen" "$PYTORCH_FINAL_PACKAGE_DIR/$wheel_filename_new"
+
+    ##########################
+    # now test the binary, unless it's cross compiled arm64
+    if [[ -z "$CROSS_COMPILE_ARM64" ]]; then
+        pip uninstall -y "$TORCH_PACKAGE_NAME" || true
+        pip uninstall -y "$TORCH_PACKAGE_NAME" || true
+
+        # Create new "clean" conda environment for testing
+        conda create ${EXTRA_CONDA_INSTALL_FLAGS} -yn "test_conda_env" python="$desired_python"
+        conda activate test_conda_env
+
+        pip install "$PYTORCH_FINAL_PACKAGE_DIR/$wheel_filename_new" -v
+
+        echo "$(date) :: Running tests"
+        # TODO: Add real tests, as run_test.sh from builder is a glorified no-op
+        # pushd "$pytorch_rootdir"
+        # "${SOURCE_DIR}/../run_tests.sh" 'wheel' "$desired_python" 'cpu'
+        # popd
+        echo "$(date) :: Finished tests"
+    fi
 else
    pushd "$pytorch_rootdir"

--- a/.circleci/codegen_validation/normalize_yaml_fragment.py
+++ b/.circleci/codegen_validation/normalize_yaml_fragment.py
@ -7,7 +7,7 @@ import yaml


 # Need to import modules that lie on an upward-relative path
-sys.path.append(os.path.dirname(sys.path[0]))
+sys.path.append(os.path.join(sys.path[0], ".."))

 import cimodel.lib.miniyaml as miniyaml

--- a/.circleci/scripts/binary_macos_build.sh
+++ b/.circleci/scripts/binary_macos_build.sh
@ -0,0 +1,11 @@
+#!/bin/bash
+set -eux -o pipefail
+
+source "${BINARY_ENV_FILE:-/Users/distiller/project/env}"
+mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR"
+
+# Build
+export USE_PYTORCH_METAL_EXPORT=1
+export USE_COREML_DELEGATE=1
+export TORCH_PACKAGE_NAME="$(echo $TORCH_PACKAGE_NAME | tr '-' '_')"
+"${PYTORCH_ROOT}/.ci/wheel/build_wheel.sh"
--- a/.circleci/scripts/binary_upload.sh
+++ b/.circleci/scripts/binary_upload.sh
@ -68,29 +68,17 @@ s3_upload() {
  local pkg_type
  extension="$1"
  pkg_type="$2"
-  s3_key_prefix="${pkg_type}/${UPLOAD_CHANNEL}"
+  s3_root_dir="${UPLOAD_BUCKET}/${pkg_type}/${UPLOAD_CHANNEL}"
  if [[ -z ${UPLOAD_SUBFOLDER:-} ]]; then
-    s3_upload_dir="${UPLOAD_BUCKET}/${s3_key_prefix}/"
+    s3_upload_dir="${s3_root_dir}/"
  else
-    s3_key_prefix="${s3_key_prefix}/${UPLOAD_SUBFOLDER}"
-    s3_upload_dir="${UPLOAD_BUCKET}/${s3_key_prefix}/"
+    s3_upload_dir="${s3_root_dir}/${UPLOAD_SUBFOLDER}/"
  fi
  (
    for pkg in ${PKG_DIR}/*.${extension}; do
      (
        set -x
        ${AWS_S3_CP} --no-progress --acl public-read "${pkg}" "${s3_upload_dir}"
-        if [[ ${pkg_type} == "whl" ]]; then
-          dry_run_arg="--dry-run"
-          if [[ "${DRY_RUN}" = "disabled" ]]; then
-            dry_run_arg=""
-          fi
-          uv run scripts/release/upload_metadata_file.py \
-            --package "${pkg}" \
-            --bucket "${UPLOAD_BUCKET}" \
-            --key-prefix "${s3_key_prefix}" \
-            ${dry_run_arg}
-        fi
      )
    done
  )
@ -98,7 +86,7 @@ s3_upload() {

 # Install dependencies (should be a no-op if previously installed)
 conda install -yq anaconda-client
-pip install -q awscli uv
+pip install -q awscli

 case "${PACKAGE_TYPE}" in
  conda)
--- a/.clang-format
+++ b/.clang-format
@ -106,8 +106,6 @@ StatementMacros:
  - C10_DEFINE_int32
  - C10_DEFINE_int64
  - C10_DEFINE_string
-  - C10_DEFINE_REGISTRY_WITHOUT_WARNING
-  - C10_REGISTER_CREATOR
  - DEFINE_BINARY
  - PyObject_HEAD
  - PyObject_VAR_HEAD
--- a/.clang-tidy
+++ b/.clang-tidy
@ -1,9 +1,8 @@
 ---
 # NOTE there must be no spaces before the '-', so put the comma last.
-# The check bugprone-unchecked-optional-access is also turned on.
-# Note that it can cause clang-tidy to hang randomly. The tracking issue
+# The check bugprone-unchecked-optional-access is also turned off atm
+# because it causes clang-tidy to hang randomly. The tracking issue
 # can be found at https://github.com/llvm/llvm-project/issues/69369.
-# When that happens, we can disable it on the problematic code by NOLINT.
 InheritParentConfig: true
 Checks: '
 bugprone-*,
@ -13,10 +12,7 @@ bugprone-*,
 -bugprone-lambda-function-name,
 -bugprone-reserved-identifier,
 -bugprone-swapped-arguments,
-clang-analyzer-core.*,
-clang-analyzer-cplusplus.*,
-clang-analyzer-nullability.*,
-clang-analyzer-deadcode.*,
+-bugprone-unchecked-optional-access,
 clang-diagnostic-missing-prototypes,
 cppcoreguidelines-*,
 -cppcoreguidelines-avoid-do-while,
@ -59,11 +55,10 @@ readability-container-size-empty,
 readability-delete-null-pointer,
 readability-duplicate-include
 readability-misplaced-array-index,
-readability-redundant*
+readability-redundant-function-ptr-dereference,
+readability-redundant-smartptr-get,
 readability-simplify-subscript-expr,
 readability-string-compare,
-readability-redundant-access-specifiers,
-readability-redundant-control-flow,
 '
 HeaderFilterRegex: '^(aten/|c10/|torch/).*$'
 WarningsAsErrors: '*'
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@ -5,7 +5,7 @@ body:
 - type: markdown
  attributes:
    value: >
-      #### Before submitting a bug, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/pytorch/pytorch/issues?q=is%3Aissue+sort%3Acreated-desc+). Note: Please write your bug report in English to ensure it can be understood and addressed by the development team.
+      #### Before submitting a bug, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/pytorch/pytorch/issues?q=is%3Aissue+sort%3Acreated-desc+).
 - type: textarea
  attributes:
    label: 🐛 Describe the bug
--- a/.github/ISSUE_TEMPLATE/documentation.yml
+++ b/.github/ISSUE_TEMPLATE/documentation.yml
@ -2,10 +2,6 @@ name: 📚 Documentation
 description: Report an issue related to https://pytorch.org/docs/stable/index.html

 body:
- type: markdown
-  attributes:
-    value: >
-      #### Note: Please report your documentation issue in English to ensure it can be understood and addressed by the development team.
 - type: textarea
  attributes:
    label: 📚 The doc issue
--- a/.github/ISSUE_TEMPLATE/feature-request.yml
+++ b/.github/ISSUE_TEMPLATE/feature-request.yml
@ -2,10 +2,6 @@ name: 🚀 Feature request
 description: Submit a proposal/request for a new PyTorch feature

 body:
- type: markdown
-  attributes:
-    value: >
-      #### Note: Please write your feature request in English to ensure it can be understood and addressed by the development team.
 - type: textarea
  attributes:
    label: 🚀 The feature, motivation and pitch
--- a/.github/ISSUE_TEMPLATE/pt2-bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/pt2-bug-report.yml
@ -3,10 +3,6 @@ description: Create a report to help us reproduce and fix the bug
 labels: ["oncall: pt2"]

 body:
-  - type: markdown
-    attributes:
-      value: >
-      #### Note: Please write your bug report in English to ensure it can be understood and addressed by the development team.
  - type: markdown
    attributes:
      value: >
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@ -42,10 +42,8 @@ self-hosted-runner:
    - windows.8xlarge.nvidia.gpu
    - windows.8xlarge.nvidia.gpu.nonephemeral
    - windows.g5.4xlarge.nvidia.gpu
-    # Organization-wide AMD hosted runners
+    # Organization-wide AMD hosted MI300 runners
    - linux.rocm.gpu
-    - linux.rocm.gpu.2
-    - linux.rocm.gpu.4
    # Repo-specific Apple hosted  runners
    - macos-m1-ultra
    - macos-m2-14
--- a/.github/actions/checkout-pytorch/action.yml
+++ b/.github/actions/checkout-pytorch/action.yml
@ -41,10 +41,10 @@ runs:
        mkdir "${GITHUB_WORKSPACE}"

    - name: Checkout PyTorch
-      uses: actions/checkout@v4
+      uses: malfet/checkout@silent-checkout
      with:
        ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
        # --depth=1 for speed, manually fetch history and other refs as necessary
        fetch-depth: ${{ inputs.fetch-depth }}
        submodules: ${{ inputs.submodules }}
-        show-progress: false
+        quiet-checkout: true
--- a/.github/actions/diskspace-cleanup/action.yml
+++ b/.github/actions/diskspace-cleanup/action.yml
@ -17,10 +17,6 @@ runs:
        set -ex
        diskspace_cutoff=${{ inputs.diskspace-cutoff }}
        docker_root_dir=$(docker info -f '{{.DockerRootDir}}')
-        if [ ! -d "$docker_root_dir" ]; then
-            echo "Docker root directory ($docker_root_dir) does not exist. Skipping disk space check."
-            exit 0
-        fi
        diskspace=$(df -H --output=pcent ${docker_root_dir} | sed -n 2p | sed 's/%//' | sed 's/ //')
        msg="Please file an issue on pytorch/pytorch reporting the faulty runner. Include a link to the runner logs so the runner can be identified"
        if [[ "$diskspace" -ge "$diskspace_cutoff" ]] ; then
--- a/.github/actions/setup-rocm/action.yml
+++ b/.github/actions/setup-rocm/action.yml
@ -5,6 +5,20 @@ description: Set up ROCm host for CI
 runs:
  using: composite
  steps:
+    - name: Set DOCKER_HOST
+      shell: bash
+      run: echo "DOCKER_HOST=unix:///run/user/$(id -u)/docker.sock" >> "${GITHUB_ENV}"
+
+    - name: Remove leftover Docker config file
+      shell: bash
+      continue-on-error: true
+      run: |
+        set -ex
+
+        cat ~/.docker/config.json || true
+        # https://stackoverflow.com/questions/64455468/error-when-logging-into-ecr-with-docker-login-error-saving-credentials-not
+        rm -f ~/.docker/config.json
+
    - name: Stop all running docker containers
      if: always()
      shell: bash
@ -24,12 +38,6 @@ runs:
        cat /opt/rocm/.info/version || true
        whoami

-    - name: Runner health check amdgpu info
-      if: always()
-      shell: bash
-      run: |
-        dpkg -l | grep -E "  amdgpu"
-
    - name: Runner health check rocm-smi
      if: always()
      shell: bash
@ -60,7 +68,7 @@ runs:
        fi

    - name: Runner diskspace health check
-      uses: pytorch/pytorch/.github/actions/diskspace-cleanup@main
+      uses: ./.github/actions/diskspace-cleanup
      if: always()

    - name: Runner health check disconnect on failure
@ -69,38 +77,14 @@ runs:
      run: |
        killall runsvc.sh

-    - name: Setup useful environment variables
-      shell: bash
-      run: |
-        RUNNER_ARTIFACT_DIR="${RUNNER_TEMP}/artifacts"
-        rm -rf "${RUNNER_ARTIFACT_DIR}"
-        mkdir -p "${RUNNER_ARTIFACT_DIR}"
-        echo "RUNNER_ARTIFACT_DIR=${RUNNER_ARTIFACT_DIR}" >> "${GITHUB_ENV}"
-
-        RUNNER_TEST_RESULTS_DIR="${RUNNER_TEMP}/test-results"
-        rm -rf "${RUNNER_TEST_RESULTS_DIR}"
-        mkdir -p "${RUNNER_TEST_RESULTS_DIR}"
-        echo "RUNNER_TEST_RESULTS_DIR=${RUNNER_TEST_RESULTS_DIR}" >> "${GITHUB_ENV}"
-
-        RUNNER_DOCS_DIR="${RUNNER_TEMP}/docs"
-        rm -rf "${RUNNER_DOCS_DIR}"
-        mkdir -p "${RUNNER_DOCS_DIR}"
-        echo "RUNNER_DOCS_DIR=${RUNNER_DOCS_DIR}" >> "${GITHUB_ENV}"
-
    - name: Preserve github env variables for use in docker
      shell: bash
      run: |
-        env | grep '^GITHUB' >> "${RUNNER_TEMP}/github_env_${GITHUB_RUN_ID}"
-        env | grep '^CI' >> "${RUNNER_TEMP}/github_env_${GITHUB_RUN_ID}"
+        env | grep '^GITHUB' >> "/tmp/github_env_${GITHUB_RUN_ID}"
+        env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"

    - name: ROCm set GPU_FLAG
      shell: bash
      run: |
        # All GPUs are visible to the runner; visibility, if needed, will be set by run_test.py.
-        # Add render group for container creation.
-        render_gid=`cat /etc/group | grep render | cut -d: -f3`
-        # The --group-add daemon and --group-add bin are needed in the Ubuntu 24.04 and Almalinux OSs respectively.
-        # This is due to the device files (/dev/kfd & /dev/dri) being owned by video group on bare metal.
-        # This video group ID maps to subgid 1 inside the docker image due to the /etc/subgid entries.
-        # The group name corresponding to group ID 1 can change depending on the OS, so both are necessary.
-        echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device /dev/dri --group-add video --group-add $render_gid --group-add daemon --group-add bin --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --network=host" >> "${GITHUB_ENV}"
+        echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
--- a/.github/actions/test-pytorch-binary/action.yml
+++ b/.github/actions/test-pytorch-binary/action.yml
@ -13,6 +13,7 @@ runs:
        container_name=$(docker run \
          ${GPU_FLAG:-} \
          -e BINARY_ENV_FILE \
+          -e BUILDER_ROOT \
          -e BUILD_ENVIRONMENT \
          -e DESIRED_CUDA \
          -e DESIRED_DEVTOOLSET \
--- a/.github/ci_commit_pins/audio.txt
+++ b/.github/ci_commit_pins/audio.txt
@ -1 +1 @@
-b6d4675c7aedc53ba04f3f55786aac1de32be6b4
+332760d4b300f00a0d862e3cfe1495db3b1a14f9
--- a/.github/ci_commit_pins/xla.txt
+++ b/.github/ci_commit_pins/xla.txt
@ -1 +1 @@
-b2b890e962f5fb6f481e5da2eb4a43bb990d0f1b
+r2.6
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@ -30,9 +30,9 @@
 - torch/fx/experimental/sym_node.py
 - torch/fx/experimental/validator.py
 - torch/fx/experimental/proxy_tensor.py
- test/distributed/tensor/test_dtensor_compile.py
+- test/distributed/_tensor/test_dtensor_compile.py
 - test/distributed/tensor/parallel/test_fsdp_2d_parallel.py
- torch/distributed/tensor/**
+- torch/distributed/_tensor/**
 - torch/distributed/fsdp/**
 - torch/csrc/inductor/**
 - torch/csrc/dynamo/**
--- a/.github/requirements-gha-cache.txt
+++ b/.github/requirements-gha-cache.txt
@ -5,7 +5,7 @@
 #   functorch/docs/requirements.txt
 #   .ci/docker/requirements-ci.txt
 boto3==1.35.42
-jinja2==3.1.5
+jinja2==3.1.4
 lintrunner==0.10.7
 ninja==1.10.0.post1
 nvidia-ml-py==11.525.84
--- a/.github/requirements/pip-requirements-macOS.txt
+++ b/.github/requirements/pip-requirements-macOS.txt
@ -1,6 +1,6 @@
 boto3==1.35.42
 hypothesis==6.56.4
-expecttest==0.3.0
+expecttest==0.2.1
 fbscribelogger==0.1.7
 librosa>=0.6.2
 mpmath==1.3.0
--- a/.github/scripts/delete_old_branches.py
+++ b/.github/scripts/delete_old_branches.py
@ -22,7 +22,7 @@ TOKEN = os.environ["GITHUB_TOKEN"]
 if not TOKEN:
    raise Exception("GITHUB_TOKEN is not set")  # noqa: TRY002

-REPO_ROOT = Path(__file__).parents[2]
+REPO_ROOT = Path(__file__).parent.parent.parent

 # Query for all PRs instead of just closed/merged because it's faster
 GRAPHQL_ALL_PRS_BY_UPDATED_AT = """
--- a/.github/scripts/ensure_actions_will_cancel.py
+++ b/.github/scripts/ensure_actions_will_cancel.py
@ -6,7 +6,7 @@ from pathlib import Path
 import yaml


-REPO_ROOT = Path(__file__).resolve().parents[2]
+REPO_ROOT = Path(__file__).resolve().parent.parent.parent
 WORKFLOWS = REPO_ROOT / ".github" / "workflows"
 EXPECTED_GROUP_PREFIX = (
    "${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}"
--- a/.github/scripts/filter_test_configs.py
+++ b/.github/scripts/filter_test_configs.py
@ -39,9 +39,9 @@ SUPPORTED_PERIODICAL_MODES: Dict[str, Callable[[Optional[str]], bool]] = {
 }

 # The link to the published list of disabled jobs
-DISABLED_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/disabled-jobs.json"
+DISABLED_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/disabled-jobs.json?versionId=pQg1WJZKNqoisT5kAGG9Wmbuns5zBdBc"
 # and unstable jobs
-UNSTABLE_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/unstable-jobs.json"
+UNSTABLE_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/unstable-jobs.json?versionId=ddADM6lf9NqVTA0APn69zl3M7nMda4DH"

 # Some constants used to handle disabled and unstable jobs
 JOB_NAME_SEP = "/"
@ -562,7 +562,7 @@ def main() -> None:

    # If the tag matches, we can get the PR number from it, this is from ciflow
    # workflow dispatcher
-    tag_regex = re.compile(r"^ciflow/[\w\-]+/(?P<pr_number>\d+)$")
+    tag_regex = re.compile(r"^ciflow/\w+/(?P<pr_number>\d+)$")

    labels = set()
    if pr_number:
--- a/.github/scripts/generate_binary_build_matrix.py
+++ b/.github/scripts/generate_binary_build_matrix.py
@ -20,8 +20,7 @@ CUDA_ARCHES = ["11.8", "12.4", "12.6"]
 CUDA_ARCHES_FULL_VERSION = {"11.8": "11.8.0", "12.4": "12.4.1", "12.6": "12.6.3"}
 CUDA_ARCHES_CUDNN_VERSION = {"11.8": "9", "12.4": "9", "12.6": "9"}

-# NOTE: Also update the ROCm sources in tools/nightly.py when changing this list
-ROCM_ARCHES = ["6.2.4", "6.3"]
+ROCM_ARCHES = ["6.1", "6.2.4"]

 XPU_ARCHES = ["xpu"]

@ -94,7 +93,7 @@ def get_nccl_submodule_version() -> str:
    from pathlib import Path

    nccl_version_mk = (
-        Path(__file__).absolute().parents[2]
+        Path(__file__).absolute().parent.parent.parent
        / "third_party"
        / "nccl"
        / "nccl"
@ -159,16 +158,15 @@ def arch_type(arch_version: str) -> str:
 DEFAULT_TAG = os.getenv("RELEASE_VERSION_TAG", "main")

 WHEEL_CONTAINER_IMAGES = {
-    **{
-        gpu_arch: f"pytorch/manylinux2_28-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
-        for gpu_arch in CUDA_ARCHES
-    },
+    "11.8": f"pytorch/manylinux-builder:cuda11.8-{DEFAULT_TAG}",
+    "12.4": f"pytorch/manylinux-builder:cuda12.4-{DEFAULT_TAG}",
+    "12.6": f"pytorch/manylinux2_28-builder:cuda12.6-{DEFAULT_TAG}",
    **{
        gpu_arch: f"pytorch/manylinux2_28-builder:rocm{gpu_arch}-{DEFAULT_TAG}"
        for gpu_arch in ROCM_ARCHES
    },
    "xpu": f"pytorch/manylinux2_28-builder:xpu-{DEFAULT_TAG}",
-    "cpu": f"pytorch/manylinux2_28-builder:cpu-{DEFAULT_TAG}",
+    "cpu": f"pytorch/manylinux-builder:cpu-{DEFAULT_TAG}",
    "cpu-cxx11-abi": f"pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-{DEFAULT_TAG}",
    "cpu-aarch64": f"pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-{DEFAULT_TAG}",
    "cpu-s390x": f"pytorch/manylinuxs390x-builder:cpu-s390x-{DEFAULT_TAG}",
@ -347,7 +345,6 @@ def generate_wheels_matrix(
            if (
                gpu_arch_type in ["xpu", "cpu-s390x"]
                or os == "macos-arm64"
-                or os == "linux-aarch64"
                or os == "windows"
            ) and python_version == "3.13t":
                continue
@ -377,7 +374,13 @@ def generate_wheels_matrix(
                            gpu_arch_type, gpu_arch_version
                        ),
                        "use_split_build": "True" if use_split_build else "False",
-                        "devtoolset": "cxx11-abi",
+                        "devtoolset": (
+                            "cxx11-abi"
+                            if (
+                                arch_version == "cuda-aarch64" or arch_version == "12.6"
+                            )
+                            else ""
+                        ),
                        "container_image": WHEEL_CONTAINER_IMAGES[arch_version],
                        "package_type": package_type,
                        "pytorch_extra_install_requirements": (
@ -422,8 +425,8 @@ def generate_wheels_matrix(
                        "use_split_build": "True" if use_split_build else "False",
                        "devtoolset": (
                            "cxx11-abi"
-                            if (arch_version in ["cpu-cxx11-abi", "cpu-aarch64"])
-                            or os == "linux"
+                            if (arch_version in ["cpu-cxx11-abi", "cpu-aarch64", "xpu"])
+                            or gpu_arch_type == "rocm"
                            else ""
                        ),
                        "container_image": WHEEL_CONTAINER_IMAGES[arch_version],
--- a/.github/scripts/gitutils.py
+++ b/.github/scripts/gitutils.py
@ -32,7 +32,7 @@ def get_git_remote_name() -> str:
 def get_git_repo_dir() -> str:
    from pathlib import Path

-    return os.getenv("GIT_REPO_DIR", str(Path(__file__).resolve().parents[2]))
+    return os.getenv("GIT_REPO_DIR", str(Path(__file__).resolve().parent.parent.parent))


 def fuzzy_list_to_dict(items: List[Tuple[str, str]]) -> Dict[str, List[str]]:
--- a/.github/scripts/lint_native_functions.py
+++ b/.github/scripts/lint_native_functions.py
@ -26,7 +26,7 @@ def fn(base: str) -> str:
    return str(base / Path("aten/src/ATen/native/native_functions.yaml"))


-with open(Path(__file__).parents[2] / fn(".")) as f:
+with open(Path(__file__).parent.parent.parent / fn(".")) as f:
    contents = f.read()

 yaml = ruamel.yaml.YAML()  # type: ignore[attr-defined]
--- a/.github/scripts/lintrunner.sh
+++ b/.github/scripts/lintrunner.sh
@ -19,7 +19,7 @@ fi

 # if lintrunner is not installed, install it
 if ! command -v lintrunner &> /dev/null; then
-    python3 -m pip install lintrunner==0.12.7
+    python3 -m pip install lintrunner==0.12.5
 fi

 # This has already been cached in the docker image
--- a/.github/scripts/s390x-ci/self-hosted-builder/actions-runner.Dockerfile
+++ b/.github/scripts/s390x-ci/self-hosted-builder/actions-runner.Dockerfile
@ -1,12 +1,12 @@
 # Self-Hosted IBM Z Github Actions Runner.

 # Temporary image: amd64 dependencies.
-FROM --platform=linux/amd64 docker.io/ubuntu:24.04 as ld-prefix
+FROM docker.io/amd64/ubuntu:23.10 as ld-prefix
 ENV DEBIAN_FRONTEND=noninteractive
-RUN apt-get update && apt-get -y install ca-certificates libicu74 libssl3
+RUN apt-get update && apt-get -y install ca-certificates libicu72 libssl3

 # Main image.
-FROM --platform=linux/s390x docker.io/ubuntu:24.04
+FROM docker.io/s390x/ubuntu:23.10

 # Packages for pytorch building and testing.
 ENV DEBIAN_FRONTEND=noninteractive
--- a/.github/scripts/test_gitutils.py
+++ b/.github/scripts/test_gitutils.py
@ -68,7 +68,7 @@ class TestRetriesDecorator(TestCase):

 class TestGitRepo(TestCase):
    def setUp(self) -> None:
-        repo_dir = BASE_DIR.absolute().parent.parent
+        repo_dir = BASE_DIR.parent.parent.absolute()
        if not (repo_dir / ".git").is_dir():
            raise SkipTest(
                "Can't find git directory, make sure to run this test on real repo checkout"
--- a/.github/scripts/trymerge.py
+++ b/.github/scripts/trymerge.py
@ -669,7 +669,7 @@ def get_ghstack_prs(
        if not open_only or not candidate.is_closed():
            return False
        print(
-            f"Skipping {idx + 1} of {len(rev_list)} PR (#{candidate.pr_num}) as its already been merged"
+            f"Skipping {idx+1} of {len(rev_list)} PR (#{candidate.pr_num}) as its already been merged"
        )
        return True

--- a/.github/templates/common.yml.j2
+++ b/.github/templates/common.yml.j2
@ -5,6 +5,11 @@

 {%- set timeout_minutes = 240 -%}

+# NOTE: If testing pytorch/builder changes you can change this variable to change what pytorch/builder reference
+#       the binary builds will check out
+{%- set builder_repo = "pytorch/builder" -%}
+{%- set builder_branch = "release/2.6" -%}
+
 {%- macro concurrency(build_environment) -%}
 concurrency:
  group: !{{ build_environment }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
@ -31,7 +36,7 @@ concurrency:
 {%- macro setup_ec2_windows() -%}
      !{{ display_ec2_information() }}
      - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.6
        continue-on-error: true
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}
@ -79,7 +84,7 @@ concurrency:

 {%- macro checkout(submodules="recursive", deep_clone=True, directory="", repository="pytorch/pytorch", branch="", checkout_pr_head=True) -%}
      - name: Checkout !{{ 'PyTorch' if repository == "pytorch/pytorch" else repository }}
-        uses: actions/checkout@v4
+        uses: malfet/checkout@silent-checkout
        with:
      {%- if branch %}
          ref: !{{ branch }}
@ -97,7 +102,7 @@ concurrency:
      {%- if directory %}
          path: !{{ directory }}
      {%- endif %}
-          show-progress: false
+          quiet-checkout: true
      - name: Clean !{{ 'PyTorch' if repository == "pytorch/pytorch" else repository }} checkout
        run: |
          # Remove any artifacts from the previous checkouts
--- a/.github/templates/linux_binary_build_workflow.yml.j2
+++ b/.github/templates/linux_binary_build_workflow.yml.j2
@ -42,6 +42,7 @@ env:
  AWS_DEFAULT_REGION: us-east-1
  BINARY_ENV_FILE: /tmp/env
  BUILD_ENVIRONMENT: !{{ build_environment }}
+  BUILDER_ROOT: /builder
  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
  PR_NUMBER: ${{ github.event.pull_request.number }}
  PYTORCH_FINAL_PACKAGE_DIR: /artifacts
@ -54,7 +55,7 @@ jobs:
  get-label-type:
    if: github.repository_owner == 'pytorch'
    name: get-label-type
-    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.6
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@ -144,9 +145,9 @@ jobs:
        with:
          name: !{{ config["build_name"] }}
          path: "${{ runner.temp }}/artifacts/"
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
+      !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
      - name: Pull Docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.6
        with:
          docker-image: !{{ config["container_image"] }}
      - name: Test Pytorch binary
@ -165,12 +166,12 @@ jobs:
        with:
          name: !{{ config["build_name"] }}
          path: "${{ runner.temp }}/artifacts/"
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
+      !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
      - name: ROCm set GPU_FLAG
        run: |
          echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
      - name: Pull Docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.6
        with:
          docker-image: !{{ config["container_image"] }}
      - name: Test Pytorch binary
--- a/.github/templates/macos_binary_build_workflow.yml.j2
+++ b/.github/templates/macos_binary_build_workflow.yml.j2
@ -78,7 +78,18 @@ jobs:
          elif [ -d "/Applications/Xcode_13.3.1.app" ]; then
            echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
          fi
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
+      !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
+      - name: Install sccache (only for non-forked PRs, and pushes to trunk)
+        uses: nick-fields/retry@v3.0.0
+        if: ${{ github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }}
+        with:
+          timeout_minutes: 5
+          max_attempts: 3
+          retry_wait_seconds: 90
+          command: |
+            sudo curl --retry 3 --retry-all-errors https://s3.amazonaws.com/ossci-macos/sccache_v2.15 --output /usr/local/bin/sccache
+            sudo chmod +x /usr/local/bin/sccache
+            echo "SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2" >> "${GITHUB_ENV}"
      - name: Populate binary env
        run: |
          # shellcheck disable=SC1091
@ -88,37 +99,7 @@ jobs:
        run: |
          # shellcheck disable=SC1091
          source "${RUNNER_TEMP}/anaconda/bin/activate"
-          set -eux -o pipefail
-          # shellcheck disable=SC1090
-          source "${BINARY_ENV_FILE:-/Users/distiller/project/env}"
-          mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR"
-
-          # Build
-          USE_PYTORCH_METAL_EXPORT=1
-          USE_COREML_DELEGATE=1
-          TORCH_PACKAGE_NAME="${TORCH_PACKAGE_NAME//-/_}"
-          export USE_PYTORCH_METAL_EXPORT
-          export USE_COREML_DELEGATE
-          export TORCH_PACKAGE_NAME
-          "${PYTORCH_ROOT}/.ci/wheel/build_wheel.sh"
-{%- if config["package_type"] == "wheel" %}
-      - name: Test PyTorch wheel
-        run: |
-          # shellcheck disable=SC1091
-          source "${RUNNER_TEMP}/anaconda/bin/activate"
-          set -eux -o pipefail
-          # shellcheck disable=SC1090
-          source "${BINARY_ENV_FILE:-/Users/distiller/project/env}"
-          pip uninstall -y "$TORCH_PACKAGE_NAME" || true
-          pip uninstall -y "$TORCH_PACKAGE_NAME" || true
-
-          # Create new "clean" conda environment for testing
-          conda create -yn "test_conda_env" python="$DESIRED_PYTHON"
-          conda activate test_conda_env
-
-          pip install "$PYTORCH_FINAL_PACKAGE_DIR"/*.whl numpy -v
-          python "${PYTORCH_ROOT}/.ci/pytorch/smoke_test/smoke_test.py" --package torchonly
-{%- endif %}
+          "${PYTORCH_ROOT}/.circleci/scripts/binary_macos_build.sh"
      - uses: actions/upload-artifact@v4.4.0
        if: always()
        with:
--- a/.github/templates/upload.yml.j2
+++ b/.github/templates/upload.yml.j2
@ -7,8 +7,10 @@
 {%- macro binary_env_as_input(config, is_windows=False, include_skip_tests=False) -%}
 {%- if is_windows %}
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
+      BUILDER_ROOT: ${{ github.workspace }}/builder
 {%- else %}
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
 {%- endif %}
      PACKAGE_TYPE: !{{ config["package_type"] }}
      # TODO: This is a legacy variable that we eventually want to get rid of in
--- a/.github/templates/windows_binary_build_workflow.yml.j2
+++ b/.github/templates/windows_binary_build_workflow.yml.j2
@ -56,7 +56,7 @@ jobs:
  get-label-type:
    if: github.repository_owner == 'pytorch'
    name: get-label-type
-    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.6
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@ -80,7 +80,7 @@ jobs:
    steps:
      !{{ common.setup_ec2_windows() }}
      !{{ set_runner_specific_vars() }}
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
+      !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
      - name: Populate binary env
        shell: bash
        run: |
@ -121,7 +121,7 @@ jobs:
        with:
          name: !{{ config["build_name"] }}
          path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
+      !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
      - name: Populate binary env
        shell: bash
        run: |
--- a/.github/workflows/_bazel-build-test.yml
+++ b/.github/workflows/_bazel-build-test.yml
@ -47,7 +47,7 @@ jobs:
      reenabled-issues: ${{ steps.filter.outputs.reenabled-issues }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          fetch-depth: 1
          submodules: false
@ -69,25 +69,25 @@ jobs:
    runs-on: ${{ matrix.runner }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.6
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6

      - name: Setup Linux
        uses: ./.github/actions/setup-linux

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.6
        with:
          docker-image-name: ${{ inputs.docker-image-name }}

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.6
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

@ -97,7 +97,7 @@ jobs:
        run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"

      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
-        uses: pytorch/test-infra/.github/actions/setup-nvidia@main
+        uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.6
        if: ${{ inputs.cuda-version != 'cpu' && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}

      - name: Output disk space left
@ -209,5 +209,5 @@ jobs:
          file-suffix: bazel-${{ github.job }}_${{ steps.get-job-id.outputs.job-id }}

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.6
        if: always()
--- a/.github/workflows/_binary-build-linux.yml
+++ b/.github/workflows/_binary-build-linux.yml
@ -42,6 +42,10 @@ on:
        required: true
        type: string
        description: Root directory for the pytorch/pytorch repository
+      BUILDER_ROOT:
+        required: true
+        type: string
+        description: Root directory for the pytorch/builder repository
      PACKAGE_TYPE:
        required: true
        type: string
@ -94,6 +98,7 @@ jobs:
    timeout-minutes: ${{ inputs.timeout-minutes }}
    env:
      PYTORCH_ROOT: ${{ inputs.PYTORCH_ROOT }}
+      BUILDER_ROOT: ${{ inputs.BUILDER_ROOT }}
      PACKAGE_TYPE: ${{ inputs.PACKAGE_TYPE }}
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
@ -124,6 +129,7 @@ jobs:
        run: |
          {
            echo "PYTORCH_ROOT=${{ env.PYTORCH_ROOT }}"
+            echo "BUILDER_ROOT=${{ env.BUILDER_ROOT }}"
            echo "PACKAGE_TYPE=${{ env.PACKAGE_TYPE }}"
            echo "DESIRED_CUDA=${{ env.DESIRED_CUDA }}"
            echo "GPU_ARCH_VERSION=${{ env.GPU_ARCH_VERSION }}"
@ -153,13 +159,13 @@ jobs:

      - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
        if: inputs.build_environment != 'linux-s390x-binary-manywheel'
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.6
        continue-on-error: true
        with:
          github-secret: ${{ secrets.github-token }}

      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          no-sudo: ${{ inputs.build_environment == 'linux-aarch64-binary-manywheel' || inputs.build_environment == 'linux-s390x-binary-manywheel' }}

@ -187,12 +193,11 @@ jobs:
          fi

      - name: Checkout PyTorch to pytorch dir
-        uses: actions/checkout@v4
+        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
-          show-progress: false
+          quiet-checkout: true

      - name: Clean PyTorch checkout
        run: |
@ -214,7 +219,7 @@ jobs:

      - name: Pull Docker image
        if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' && inputs.build_environment != 'linux-s390x-binary-manywheel' }}
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.6
        with:
          docker-image: ${{ inputs.DOCKER_IMAGE }}

@ -270,7 +275,7 @@ jobs:

      - name: Teardown Linux
        if: always() && inputs.build_environment != 'linux-s390x-binary-manywheel'
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.6

      - name: Chown workspace
        if: always() && inputs.build_environment != 'linux-s390x-binary-manywheel'
--- a/.github/workflows/_binary-test-linux.yml
+++ b/.github/workflows/_binary-test-linux.yml
@ -19,6 +19,10 @@ on:
        required: true
        type: string
        description: Root directory for the pytorch/pytorch repository
+      BUILDER_ROOT:
+        required: true
+        type: string
+        description: Root directory for the pytorch/builder repository
      PACKAGE_TYPE:
        required: true
        type: string
@ -82,6 +86,7 @@ jobs:
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ inputs.PYTORCH_ROOT }}
+      BUILDER_ROOT: ${{ inputs.BUILDER_ROOT }}
      PACKAGE_TYPE: ${{ inputs.PACKAGE_TYPE }}
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
@ -111,6 +116,7 @@ jobs:
        run: |
          {
            echo "PYTORCH_ROOT=${{ env.PYTORCH_ROOT }}"
+            echo "BUILDER_ROOT=${{ env.BUILDER_ROOT }}"
            echo "PACKAGE_TYPE=${{ env.PACKAGE_TYPE }}"

            echo "DESIRED_CUDA=${{ env.DESIRED_CUDA }}"
@ -136,14 +142,14 @@ jobs:

      - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
        if: inputs.build_environment != 'linux-s390x-binary-manywheel'
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.6
        continue-on-error: true
        with:
          github-secret: ${{ secrets.github-token }}

        # Setup the environment
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          no-sudo: ${{ inputs.build_environment == 'linux-aarch64-binary-manywheel' || inputs.build_environment == 'linux-s390x-binary-manywheel' }}

@ -164,11 +170,9 @@ jobs:
          mkdir "${GITHUB_WORKSPACE}"

      - name: Checkout PyTorch to pytorch dir
-        uses: actions/checkout@v4
+        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
-          show-progress: false
          path: pytorch

      - name: Clean PyTorch checkout
@ -197,12 +201,12 @@ jobs:
          path: "${{ runner.temp }}/artifacts/"

      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
-        uses: pytorch/test-infra/.github/actions/setup-nvidia@main
+        uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.6
        if: ${{ inputs.GPU_ARCH_TYPE == 'cuda' && steps.filter.outputs.is-test-matrix-empty == 'False' }}

      - name: Pull Docker image
        if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' && inputs.build_environment != 'linux-s390x-binary-manywheel' }}
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.6
        with:
          docker-image: ${{ inputs.DOCKER_IMAGE }}

@ -212,7 +216,7 @@ jobs:

      - name: Teardown Linux
        if: always() && inputs.build_environment != 'linux-s390x-binary-manywheel'
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.6

      - name: Chown workspace
        if: always() && inputs.build_environment != 'linux-s390x-binary-manywheel'
--- a/.github/workflows/_binary-upload.yml
+++ b/.github/workflows/_binary-upload.yml
@ -15,6 +15,10 @@ on:
        required: false
        type: string
        description: Root directory for the pytorch/pytorch repository. Not actually needed, but currently passing it in since we pass in the same inputs to the reusable workflows of all binary builds
+      BUILDER_ROOT:
+        required: false
+        type: string
+        description: Root directory for the pytorch/builder repository. Not actually needed, but currently passing it in since we pass in the same inputs to the reusable workflows of all binary builds
      PACKAGE_TYPE:
        required: true
        type: string
@ -77,6 +81,7 @@ jobs:
      image: continuumio/miniconda3:4.12.0
    env:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: ${{ inputs.PACKAGE_TYPE }}
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
@ -98,7 +103,7 @@ jobs:
      USE_SPLIT_BUILD: ${{ inputs.use_split_build }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          no-sudo: true

--- a/.github/workflows/_docs.yml
+++ b/.github/workflows/_docs.yml
@ -84,7 +84,7 @@ jobs:
    name: build-docs-${{ matrix.docs_type }}-${{ inputs.push }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.6
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}
          instructions: |
@ -95,7 +95,7 @@ jobs:

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6

      - name: Setup Linux
        uses: ./.github/actions/setup-linux
@ -110,12 +110,12 @@ jobs:

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.6
        with:
          docker-image-name: ${{ inputs.docker-image }}

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.6
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

@ -222,5 +222,5 @@ jobs:
          s3-prefix: pytorch/pytorch/${{ github.event.pull_request.number }}/functorchdocs

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.6
        if: always()
--- a/.github/workflows/_linux-build.yml
+++ b/.github/workflows/_linux-build.yml
@ -108,7 +108,7 @@ jobs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.6
        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}
@ -118,7 +118,7 @@ jobs:
      # checkout because when we run this action we don't *have* a local
      # checkout. In other cases you should prefer a local checkout.
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          no-sudo: true

@ -136,7 +136,7 @@ jobs:

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.6
        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
        with:
          docker-image-name: ${{ inputs.docker-image-name }}
@ -152,7 +152,7 @@ jobs:
          echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}"

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.6
        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
@ -199,10 +199,7 @@ jobs:
          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
          SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
          SCCACHE_REGION: us-east-1
-          # Use the build environment here to make sure that all build jobs in the same environment
-          # will share the same cache regardless of which workflow they belong. This should improve
-          # the cache usage for jobs in non-pull workflows like periodic, slow, or inductor
-          SCCACHE_S3_KEY_PREFIX: ${{ inputs.build-environment || github.workflow }}
+          SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
          XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
          PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
          TORCH_CUDA_ARCH_LIST: ${{ inputs.cuda-arch-list }}
@ -219,10 +216,6 @@ jobs:
          if [[ ${BUILD_ENVIRONMENT} == *"s390x"* ]]; then
            JENKINS_USER=
            USED_IMAGE="${DOCKER_IMAGE_S390X}"
-            # ensure that docker container cleanly exits in 12 hours
-            # if for some reason cleanup action doesn't stop container
-            # when job is cancelled
-            DOCKER_SHELL_CMD="sleep 12h"

            # since some steps are skipped on s390x, if they are necessary, run them here
            env | grep '^GITHUB' >> "/tmp/github_env_${GITHUB_RUN_ID}"
@ -230,17 +223,15 @@ jobs:
          else
            JENKINS_USER="--user jenkins"
            USED_IMAGE="${DOCKER_IMAGE}"
-            DOCKER_SHELL_CMD=
          fi

          # Leaving 1GB for the runner and other things
          TOTAL_AVAILABLE_MEMORY_IN_GB=$(awk '/MemTotal/ { printf "%.3f \n", $2/1024/1024 - 1 }' /proc/meminfo)
-          # https://docs.docker.com/engine/containers/resource_constraints/#--memory-swap-details, the 3GB swap
-          # comes from https://github.com/pytorch/test-infra/pull/6058
-          TOTAL_MEMORY_WITH_SWAP=$(("${TOTAL_AVAILABLE_MEMORY_IN_GB%.*}" + 3))
+          # https://docs.docker.com/engine/containers/resource_constraints/#--memory-swap-details
+          TOTAL_MEMORY_WITH_SWAP=$(("${TOTAL_AVAILABLE_MEMORY_IN_GB%.*}" * 2))

          # detached container should get cleaned up by teardown_ec2_linux
-          # Used for JENKINS_USER and DOCKER_SHELL_CMD, which can be empty
+          # Used for JENKINS_USER, which can be empty
          # shellcheck disable=SC2086
          container_name=$(docker run \
            -e BUILD_ENVIRONMENT \
@ -271,8 +262,7 @@ jobs:
            ${JENKINS_USER} \
            -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
            -w /var/lib/jenkins/workspace \
-            "${USED_IMAGE}" \
-            ${DOCKER_SHELL_CMD}
+            "${USED_IMAGE}"
          )
          docker exec -t "${container_name}" sh -c '.ci/pytorch/build.sh'

@ -330,7 +320,7 @@ jobs:
          build-time: ${{ steps.build.outputs.build_time }}

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.6
        if: always() && inputs.build-environment != 'linux-s390x-binary-manywheel'

      - name: Cleanup docker
@ -338,5 +328,6 @@ jobs:
        shell: bash
        run: |
          # on s390x stop the container for clean worker stop
-          docker stop -a || true
-          docker kill -a || true
+          # ignore expansion of "docker ps -q" since it could be empty
+          # shellcheck disable=SC2046
+          docker stop $(docker ps -q) || true
--- a/.github/workflows/_linux-test.yml
+++ b/.github/workflows/_linux-test.yml
@ -80,8 +80,8 @@ jobs:
    timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
-        if: ${{ !contains(matrix.runner, 'gcp.a100') && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.6
+        if: ${{ !contains(matrix.runner, 'gcp.a100') }}
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}
          instructions: |
@ -89,16 +89,15 @@ jobs:
              docker exec -it $(docker container ps --format '{{.ID}}') bash

      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          no-sudo: true

      - name: Setup Linux
        uses: ./.github/actions/setup-linux
-        if: inputs.build-environment != 'linux-s390x-binary-manywheel'

      - name: configure aws credentials
-        if : ${{ inputs.aws-role-to-assume != '' && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
+        if : ${{ inputs.aws-role-to-assume != '' }}
        uses: aws-actions/configure-aws-credentials@v3
        with:
          role-to-assume: ${{ inputs.aws-role-to-assume }}
@ -107,14 +106,12 @@ jobs:

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
-        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.6
        with:
          docker-image-name: ${{ inputs.docker-image }}

      - name: Use following to pull public copy of the image
        id: print-ghcr-mirror
-        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
        env:
          ECR_DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
        shell: bash
@ -123,8 +120,7 @@ jobs:
          echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}"

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
-        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.6
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

@ -135,7 +131,7 @@ jobs:

      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
        id: install-nvidia-driver
-        uses: pytorch/test-infra/.github/actions/setup-nvidia@main
+        uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.6
        if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}

      - name: Setup GPU_FLAG for docker run
@ -170,7 +166,6 @@ jobs:
        with:
          name: ${{ inputs.build-environment }}
          s3-bucket: ${{ inputs.s3-bucket }}
-          use-gha: ${{ inputs.use-gha }}

      - name: Download TD artifacts
        continue-on-error: true
@ -235,10 +230,7 @@ jobs:
          TD_DISTRIBUTED: ${{ steps.keep-going.outputs.ci-td-distributed }}
          SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
          SCCACHE_REGION: us-east-1
-          # Use the build environment here to make sure that all build jobs in the same environment
-          # will share the same cache regardless of which workflow they belong. This should improve
-          # the cache usage for jobs in non-pull workflows like periodic, slow, or inductor
-          SCCACHE_S3_KEY_PREFIX: ${{ inputs.build-environment || github.workflow }}
+          SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
          SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
          DOCKER_IMAGE: ${{ inputs.docker-image }}
          XLA_CUDA: ${{ contains(inputs.build-environment, 'xla') && '0' || '' }}
@ -261,27 +253,9 @@ jobs:
            TEST_COMMAND=.ci/pytorch/test.sh
          fi

-          # Leaving 1GB for the runner and other things
-          TOTAL_AVAILABLE_MEMORY_IN_GB=$(awk '/MemTotal/ { printf "%.3f \n", $2/1024/1024 - 1 }' /proc/meminfo)
-          # https://docs.docker.com/engine/containers/resource_constraints/#--memory-swap-details, the 3GB swap
-          # comes from https://github.com/pytorch/test-infra/pull/6058
-          TOTAL_MEMORY_WITH_SWAP=$(("${TOTAL_AVAILABLE_MEMORY_IN_GB%.*}" + 3))
-
-          if [[ ${BUILD_ENVIRONMENT} == *"s390x"* ]]; then
-            SHM_OPTS=
-            JENKINS_USER=
-
-            # since some steps are skipped on s390x, if they are necessary, run them here
-            env | grep '^GITHUB' >> "/tmp/github_env_${GITHUB_RUN_ID}"
-            env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"
-          else
-            SHM_OPTS="--shm-size=${SHM_SIZE}"
-            JENKINS_USER="--user jenkins"
-          fi
-
          # detached container should get cleaned up by teardown_ec2_linux
          # TODO: Stop building test binaries as part of the build phase
-          # Used for GPU_FLAG, SHM_OPTS and JENKINS_USER since that doesn't play nice
+          # Used for GPU_FLAG since that doesn't play nice
          # shellcheck disable=SC2086,SC2090
          container_name=$(docker run \
            ${GPU_FLAG:-} \
@ -327,17 +301,15 @@ jobs:
            -e DASHBOARD_TAG \
            -e IS_A100_RUNNER \
            -e ARTIFACTS_FILE_SUFFIX \
-            --memory="${TOTAL_AVAILABLE_MEMORY_IN_GB%.*}g" \
-            --memory-swap="${TOTAL_MEMORY_WITH_SWAP}g" \
            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
            --security-opt seccomp=unconfined \
            --cap-add=SYS_PTRACE \
            --ipc=host \
-            ${SHM_OPTS} \
+            --shm-size="${SHM_SIZE}" \
            --tty \
            --detach \
            --name="${container_name}" \
-            ${JENKINS_USER} \
+            --user jenkins \
            -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
            -w /var/lib/jenkins/workspace \
            "${DOCKER_IMAGE}"
@ -345,11 +317,6 @@ jobs:
          # Propagate download.pytorch.org IP to container
          grep download.pytorch.org /etc/hosts | docker exec -i "${container_name}" sudo bash -c "/bin/cat >> /etc/hosts"
          echo "DOCKER_CONTAINER_ID=${container_name}" >> "${GITHUB_ENV}"
-
-          if [[ ${BUILD_ENVIRONMENT} == *"s390x"* ]]; then
-            docker exec -t "${container_name}" sh -c "python3 -m pip install -r .ci/docker/requirements-ci.txt"
-          fi
-
          docker exec -t "${container_name}" sh -c "python3 -m pip install $(echo dist/*.whl)[opt-einsum] && ${TEST_COMMAND}"

      - name: Upload pytest cache if tests failed
@ -364,7 +331,7 @@ jobs:
          job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}

      - name: Upload the benchmark results
-        uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
+        uses: pytorch/test-infra/.github/actions/upload-benchmark-results@release/2.6
        with:
          benchmark-results-dir: test/test-reports
          dry-run: false
@ -410,7 +377,7 @@ jobs:
          path: ./**/core.[1-9]*

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.6
        if: always() && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false'

      # NB: We are currently having an intermittent GPU-related issue on G5 runners with
@ -489,12 +456,3 @@ jobs:
            echo "NVIDIA driver detects $GPU_COUNT GPUs. The runner has a broken GPU, shutting it down..."
            .github/scripts/stop_runner_service.sh
          fi
-
-      - name: Cleanup docker
-        if: always() && inputs.build-environment == 'linux-s390x-binary-manywheel'
-        shell: bash
-        run: |
-          # on s390x stop the container for clean worker stop
-          # ignore expansion of "docker ps -q" since it could be empty
-          # shellcheck disable=SC2046
-          docker stop $(docker ps -q) || true
--- a/.github/workflows/_mac-build.yml
+++ b/.github/workflows/_mac-build.yml
@ -71,11 +71,11 @@ jobs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
    steps:
      - name: Clean up disk space before running MacOS workflow
-        uses: pytorch/test-infra/.github/actions/check-disk-space@main
+        uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.6

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6

      - name: Set xcode version
        env:
@ -87,7 +87,7 @@ jobs:

      - name: Setup miniconda
        if: inputs.environment-file == ''
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.6
        with:
          python-version: ${{ inputs.python-version }}
          environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}
@ -97,7 +97,7 @@ jobs:
      # environment even though the arch is x86-64
      - name: Setup miniconda using the provided environment file
        if: inputs.environment-file != ''
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.6
        with:
          python-version: ${{ inputs.python-version }}
          environment-file: ${{ inputs.environment-file }}
@ -207,4 +207,4 @@ jobs:
      - name: Clean up disk space
        if: always()
        continue-on-error: true
-        uses: pytorch/test-infra/.github/actions/check-disk-space@main
+        uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.6
--- a/.github/workflows/_mac-test-mps.yml
+++ b/.github/workflows/_mac-test-mps.yml
@ -41,7 +41,7 @@ jobs:
      reenabled-issues: ${{ steps.filter.outputs.reenabled-issues }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          submodules: false

@ -66,10 +66,10 @@ jobs:
          sysctl machdep.cpu.brand_string kern.osproductversion

      - name: Checkout PyTorch
-        uses: actions/checkout@v4
+        uses: malfet/checkout@silent-checkout
        with:
          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-          show-progress: false
+          quiet-checkout: true

      - name: Clean checkout
        run: |
@ -82,7 +82,7 @@ jobs:
          use-gha: true

      - name: Setup miniconda
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.6
        with:
          python-version: ${{ inputs.python-version }}
          environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}
@ -152,7 +152,6 @@ jobs:
          set -e

          ${CONDA_RUN} python3 test/run_test.py --mps --verbose
-          MTL_CAPTURE_ENABLED=1 ${CONDA_RUN} python3 test/test_mps.py --verbose -k test_metal_capture

      - name: Print remaining test logs
        shell: bash
@ -170,4 +169,4 @@ jobs:
      - name: Clean up disk space
        if: always()
        continue-on-error: true
-        uses: pytorch/test-infra/.github/actions/check-disk-space@main
+        uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.6
--- a/.github/workflows/_mac-test.yml
+++ b/.github/workflows/_mac-test.yml
@ -82,11 +82,11 @@ jobs:
          done

      - name: Clean up disk space before running MacOS workflow
-        uses: pytorch/test-infra/.github/actions/check-disk-space@main
+        uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.6

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6

      - name: Start monitoring script
        id: monitor-script
@ -109,7 +109,7 @@ jobs:
          use-gha: true

      - name: Setup miniconda
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.6
        with:
          python-version: ${{ inputs.python-version }}
          environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}
@ -224,7 +224,7 @@ jobs:
          file-suffix: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }}

      - name: Upload the benchmark results
-        uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
+        uses: pytorch/test-infra/.github/actions/upload-benchmark-results@release/2.6
        with:
          benchmark-results-dir: test/test-reports
          dry-run: false
@ -234,4 +234,4 @@ jobs:
      - name: Clean up disk space
        if: always()
        continue-on-error: true
-        uses: pytorch/test-infra/.github/actions/check-disk-space@main
+        uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.6
--- a/.github/workflows/_rocm-test.yml
+++ b/.github/workflows/_rocm-test.yml
@ -66,7 +66,7 @@ jobs:
    steps:
      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          no-sudo: true

@ -88,12 +88,12 @@ jobs:

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.6
        with:
          docker-image-name: ${{ inputs.docker-image }}

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.6
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

@ -170,7 +170,9 @@ jobs:
          SHARD_NUMBER: ${{ matrix.shard }}
          NUM_TEST_SHARDS: ${{ matrix.num_shards }}
          REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }}
+          SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
          DOCKER_IMAGE: ${{ inputs.docker-image }}
+          XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
          PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
          PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
          TESTS_TO_INCLUDE: ${{ inputs.tests-to-include }}
@ -217,10 +219,12 @@ jobs:
            -e NO_TEST_TIMEOUT \
            -e NO_TD \
            -e MAX_JOBS="$(nproc --ignore=2)" \
+            -e SCCACHE_BUCKET \
+            -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
            -e PYTORCH_TEST_CUDA_MEM_LEAK_CHECK \
            -e PYTORCH_TEST_RERUN_DISABLED_TESTS \
            -e TESTS_TO_INCLUDE \
-            --env-file="${RUNNER_TEMP}/github_env_${GITHUB_RUN_ID}" \
+            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
            --ulimit stack=10485760:83886080 \
            --ulimit core=0 \
            --security-opt seccomp=unconfined \
--- a/.github/workflows/_runner-determinator.yml
+++ b/.github/workflows/_runner-determinator.yml
@ -54,7 +54,7 @@ jobs:
      PR_NUMBER: ${{ github.event.pull_request.number }}
    steps:
      # - name: Checkout PyTorch
-      #   uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+      #   uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
      #   with:
      #     fetch-depth: 1
      #     submodules: true
--- a/.github/workflows/_win-build.yml
+++ b/.github/workflows/_win-build.yml
@ -84,10 +84,10 @@ jobs:
          git config --global core.fsmonitor false

      - name: Clean up leftover processes on non-ephemeral Windows runner
-        uses: pytorch/test-infra/.github/actions/cleanup-runner@main
+        uses: pytorch/test-infra/.github/actions/cleanup-runner@release/2.6

      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.6
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}
          instructions: |
@ -102,7 +102,7 @@ jobs:

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          no-sudo: true

--- a/.github/workflows/_win-test.yml
+++ b/.github/workflows/_win-test.yml
@ -66,10 +66,10 @@ jobs:
          git config --global core.fsmonitor false

      - name: Clean up leftover processes on non-ephemeral Windows runner
-        uses: pytorch/test-infra/.github/actions/cleanup-runner@main
+        uses: pytorch/test-infra/.github/actions/cleanup-runner@release/2.6

      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.6
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}
          instructions: |
@ -85,7 +85,7 @@ jobs:

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          no-sudo: true

--- a/.github/workflows/_xpu-test.yml
+++ b/.github/workflows/_xpu-test.yml
@ -62,7 +62,7 @@ jobs:
    steps:
      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6

      - name: Setup XPU
        uses: ./.github/actions/setup-xpu
@ -80,12 +80,12 @@ jobs:

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.6
        with:
          docker-image-name: ${{ inputs.docker-image }}

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.6
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

--- a/.github/workflows/build-almalinux-images.yml
+++ b/.github/workflows/build-almalinux-images.yml
@ -41,12 +41,12 @@ jobs:
      CUDA_VERSION: ${{ matrix.cuda_version }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          submodules: false
      - name: Calculate docker image
        if: env.WITH_PUSH == 'false'
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.6
        with:
            docker-image-name: almalinux-builder${{ matrix.cuda_version == 'cpu' && '-' || '-cuda' }}${{matrix.cuda_version}}
            docker-build-dir:  .ci/docker/almalinux
--- a/.github/workflows/build-libtorch-images.yml
+++ b/.github/workflows/build-libtorch-images.yml
@ -32,7 +32,7 @@ jobs:
  get-label-type:
    if: github.repository_owner == 'pytorch'
    name: get-label-type
-    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.6
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@ -51,12 +51,12 @@ jobs:
      GPU_ARCH_VERSION: ${{ matrix.cuda_version }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          submodules: false
      - name: Calculate docker image
        if: env.WITH_PUSH == 'false'
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.6
        with:
            docker-image-name: libtorch-cxx11-builder-cuda${{matrix.cuda_version}}
            docker-build-dir:  .ci/docker/libtorch
@ -87,18 +87,18 @@ jobs:
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
    strategy:
      matrix:
-        rocm_version: ["6.2.4", "6.3"]
+        rocm_version: ["6.1", "6.2.4"]
    env:
      GPU_ARCH_TYPE: rocm
      GPU_ARCH_VERSION: ${{ matrix.rocm_version }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          submodules: false
      - name: Calculate docker image
        if: env.WITH_PUSH == 'false'
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.6
        with:
            docker-image-name: libtorch-cxx11-builder-rocm${{matrix.rocm_version}}
            docker-build-dir:  .ci/docker/libtorch
@ -129,12 +129,12 @@ jobs:
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          submodules: false
      - name: Calculate docker image
        if: env.WITH_PUSH == 'false'
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.6
        with:
            docker-image-name: libtorch-cxx11-builder-cpu
            docker-build-dir:  .ci/docker/libtorch
--- a/.github/workflows/build-magma-windows.yml
+++ b/.github/workflows/build-magma-windows.yml
@ -28,7 +28,7 @@ jobs:
      CUDA_VERSION: ${{ matrix.cuda_version }}
      CONFIG: ${{ matrix.config }}
    steps:
-      - name: Checkout pytorch/pytorch
+      - name: Checkout pytorch/builder
        uses: actions/checkout@v4
      - name: Enable MSVC dev commands to enable cl.exe  # FYI incompatible with shell: bash
        uses: ilammy/msvc-dev-cmd@dd5e2fa0a7de1e7929605d9ecc020e749d9856a3
--- a/.github/workflows/build-manywheel-images-s390x.yml
+++ b/.github/workflows/build-manywheel-images-s390x.yml
@ -41,7 +41,7 @@ jobs:
      GPU_ARCH_TYPE: cpu-s390x
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          submodules: false
          no-sudo: true
--- a/.github/workflows/build-manywheel-images.yml
+++ b/.github/workflows/build-manywheel-images.yml
@ -36,7 +36,7 @@ jobs:
  get-label-type:
    if: github.repository_owner == 'pytorch'
    name: get-label-type
-    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.6
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@ -57,12 +57,12 @@ jobs:
      - name: Purge tools folder (free space for build)
        run: rm -rf /opt/hostedtoolcache
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          submodules: false
      - name: Calculate docker image
        if: env.WITH_PUSH == 'false'
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.6
        with:
            docker-image-name: manylinux-builder-cuda${{matrix.cuda_version}}
            docker-build-dir:  .ci/docker/manywheel
@ -102,12 +102,12 @@ jobs:
      - name: Purge tools folder (free space for build)
        run: rm -rf /opt/hostedtoolcache
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          submodules: false
      - name: Calculate docker image
        if: env.WITH_PUSH == 'false'
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.6
        with:
            docker-image-name: manylinux2_28-builder-cuda${{matrix.cuda_version}}
            docker-build-dir:  .ci/docker/manywheel
@ -147,7 +147,7 @@ jobs:
        uses: actions/checkout@v3
      - name: Calculate docker image
        if: env.WITH_PUSH == 'false'
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.6
        with:
            docker-image-name: manylinuxaarch64-builder-cuda${{matrix.cuda_version}}
            docker-build-dir:  .ci/docker/manywheel
@ -178,18 +178,18 @@ jobs:
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
    strategy:
      matrix:
-        rocm_version: ["6.2.4", "6.3"]
+        rocm_version: ["6.1", "6.2.4"]
    env:
      GPU_ARCH_TYPE: rocm-manylinux_2_28
      GPU_ARCH_VERSION: ${{ matrix.rocm_version }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          submodules: false
      - name: Calculate docker image
        if: env.WITH_PUSH == 'false'
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.6
        with:
            docker-image-name: manylinux2_28-builder-rocm${{matrix.rocm_version}}
            docker-build-dir:  .ci/docker/manywheel
@ -220,12 +220,12 @@ jobs:
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          submodules: false
      - name: Calculate docker image
        if: env.WITH_PUSH == 'false'
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.6
        with:
            docker-image-name: manylinux-builder-cpu
            docker-build-dir:  .ci/docker/manywheel
@ -258,12 +258,12 @@ jobs:
      GPU_ARCH_TYPE: cpu-manylinux_2_28
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          submodules: false
      - name: Calculate docker image
        if: env.WITH_PUSH == 'false'
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.6
        with:
            docker-image-name: manylinux2_28-builder-cpu
            docker-build-dir:  .ci/docker/manywheel
@ -296,12 +296,12 @@ jobs:
      GPU_ARCH_TYPE: cpu-aarch64
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          submodules: false
      - name: Calculate docker image
        if: env.WITH_PUSH == 'false'
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.6
        with:
            docker-image-name: manylinuxaarch64-builder-cpu-aarch64
            docker-build-dir:  .ci/docker/manywheel
@ -334,12 +334,12 @@ jobs:
      GPU_ARCH_TYPE: cpu-aarch64-2_28
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          submodules: false
      - name: Calculate docker image
        if: env.WITH_PUSH == 'false'
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.6
        with:
            docker-image-name: manylinux2_28_aarch64-builder-cpu-aarch64
            docker-build-dir:  .ci/docker/manywheel
@ -375,12 +375,12 @@ jobs:
      GPU_ARCH_TYPE: cpu-cxx11-abi
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          submodules: false
      - name: Calculate docker image
        if: env.WITH_PUSH == 'false'
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.6
        with:
            docker-image-name: manylinuxcxx11-abi-builder-cpu-cxx11-abi
            docker-build-dir:  .ci/docker/manywheel
@ -413,12 +413,12 @@ jobs:
      GPU_ARCH_TYPE: xpu
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          submodules: false
      - name: Calculate docker image
        if: env.WITH_PUSH == 'false'
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.6
        with:
            docker-image-name: manylinux2_28-builder-xpu
            docker-build-dir:  .ci/docker/manywheel
--- a/.github/workflows/build-triton-wheel.yml
+++ b/.github/workflows/build-triton-wheel.yml
@ -3,7 +3,7 @@ name: Build Triton wheels
 on:
  push:
    branches:
-      - main
+      - release/2.6
    tags:
      # NOTE: Binary build pipelines should only get triggered on release candidate builds
      # Release candidate tags look like: v1.11.0-rc1
@ -30,7 +30,7 @@ jobs:
  get-label-type:
    if: github.repository_owner == 'pytorch'
    name: get-label-type
-    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.6
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@ -54,7 +54,7 @@ jobs:
            docker-image: "pytorch/manylinux2_28-builder:cpu"
        include:
          - device: "rocm"
-            rocm_version: "6.3"
+            rocm_version: "6.2.4"
          - device: "cuda"
            rocm_version: ""
    timeout-minutes: 40
@ -65,12 +65,12 @@ jobs:
      PLATFORM: ${{ contains(matrix.docker-image, '2_28') && 'manylinux_2_28_x86_64' || 'manylinux2014_x86_64' }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.6
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}

      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          submodules: false

@ -78,7 +78,7 @@ jobs:
        uses: ./.github/actions/setup-linux

      - name: Pull Docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.6
        with:
          docker-image: ${{ env.DOCKER_IMAGE }}

@ -157,7 +157,7 @@ jobs:
          path: ${{ runner.temp }}/artifacts/wheelhouse/*

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.6
        if: always()

  upload-wheel:
--- a/.github/workflows/check-labels.yml
+++ b/.github/workflows/check-labels.yml
@ -38,7 +38,7 @@ jobs:
    runs-on: linux.20_04.4x
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          submodules: false
          fetch-depth: 1
--- a/.github/workflows/close-nonexistent-disable-issues.yml
+++ b/.github/workflows/close-nonexistent-disable-issues.yml
@ -11,7 +11,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          submodules: false
          fetch-depth: 1
--- a/.github/workflows/create_release.yml
+++ b/.github/workflows/create_release.yml
@ -19,7 +19,7 @@ jobs:
  get-label-type:
    if: github.repository_owner == 'pytorch'
    name: get-label-type
-    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.6
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@ -36,9 +36,8 @@ jobs:
    outputs:
      pt_release_name: ${{ steps.release_name.outputs.pt_release_name }}
    steps:
-      - uses: actions/checkout@v4
+      - uses: malfet/checkout@silent-checkout
        with:
-          show-progress: false
          submodules: 'recursive'
          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
      - name: Fake name for PRs
--- a/.github/workflows/docker-builds.yml
+++ b/.github/workflows/docker-builds.yml
@ -33,7 +33,7 @@ jobs:
  get-label-type:
    if: github.repository_owner == 'pytorch'
    name: get-label-type
-    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.6
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@ -99,21 +99,21 @@ jobs:
      # [see note: pytorch repo ref]
      # deep clone (fetch-depth 0) required for git merge-base
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6

      - name: Setup Linux
        uses: ./.github/actions/setup-linux

      - name: Build docker image
        id: build-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.6
        with:
          docker-image-name: ${{ matrix.docker-image-name }}
          always-rebuild: true
          push: true

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.6
        with:
          docker-image: ${{ steps.build-docker-image.outputs.docker-image }}

@ -145,5 +145,5 @@ jobs:
        if: always()

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.6
        if: always()
--- a/.github/workflows/docker-release.yml
+++ b/.github/workflows/docker-release.yml
@ -37,7 +37,7 @@ jobs:
  get-label-type:
    if: github.repository_owner == 'pytorch'
    name: get-label-type
-    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.6
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@ -52,7 +52,7 @@ jobs:
      matrix: ${{ steps.generate-matrix.outputs.matrix }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.6
        with:
          fetch-depth: 1
          submodules: true
@ -82,7 +82,7 @@ jobs:
      CUDNN_VERSION: ${{ matrix.cudnn_version }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.6
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}
      # [see note: pytorch repo ref]
@ -160,12 +160,12 @@ jobs:
          fi

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.6
        if: always()

  validate:
    needs: build
-    uses: pytorch/test-infra/.github/workflows/validate-docker-images.yml@main
+    uses: pytorch/test-infra/.github/workflows/validate-docker-images.yml@release/2.6
    with:
-      channel: nightly
+      channel: test
      ref: main
--- a/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
+++ b/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
@ -25,6 +25,7 @@ env:
  AWS_DEFAULT_REGION: us-east-1
  BINARY_ENV_FILE: /tmp/env
  BUILD_ENVIRONMENT: linux-aarch64-binary-manywheel
+  BUILDER_ROOT: /builder
  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
  PR_NUMBER: ${{ github.event.pull_request.number }}
  PYTORCH_FINAL_PACKAGE_DIR: /artifacts
@ -39,7 +40,7 @@ jobs:
  get-label-type:
    if: github.repository_owner == 'pytorch'
    name: get-label-type
-    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.6
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@ -51,12 +52,13 @@ jobs:
    needs: get-label-type
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.9"
@ -76,12 +78,13 @@ jobs:
    uses: ./.github/workflows/_binary-test-linux.yml
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.9"
@ -100,12 +103,13 @@ jobs:
    needs: manywheel-py3_9-cpu-aarch64-test
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.9"
@ -122,12 +126,13 @@ jobs:
    needs: get-label-type
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu126
      GPU_ARCH_TYPE: cuda-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.6-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.6-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.9"
@ -147,12 +152,13 @@ jobs:
    needs: manywheel-py3_9-cuda-aarch64-build
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu126
      GPU_ARCH_TYPE: cuda-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.6-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.6-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.9"
@ -169,12 +175,13 @@ jobs:
    needs: get-label-type
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.10"
@ -194,12 +201,13 @@ jobs:
    uses: ./.github/workflows/_binary-test-linux.yml
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.10"
@ -218,12 +226,13 @@ jobs:
    needs: manywheel-py3_10-cpu-aarch64-test
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.10"
@ -240,12 +249,13 @@ jobs:
    needs: get-label-type
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu126
      GPU_ARCH_TYPE: cuda-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.6-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.6-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.10"
@ -265,12 +275,13 @@ jobs:
    needs: manywheel-py3_10-cuda-aarch64-build
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu126
      GPU_ARCH_TYPE: cuda-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.6-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.6-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.10"
@ -287,12 +298,13 @@ jobs:
    needs: get-label-type
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.11"
@ -312,12 +324,13 @@ jobs:
    uses: ./.github/workflows/_binary-test-linux.yml
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.11"
@ -336,12 +349,13 @@ jobs:
    needs: manywheel-py3_11-cpu-aarch64-test
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.11"
@ -358,12 +372,13 @@ jobs:
    needs: get-label-type
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu126
      GPU_ARCH_TYPE: cuda-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.6-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.6-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.11"
@ -383,12 +398,13 @@ jobs:
    needs: manywheel-py3_11-cuda-aarch64-build
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu126
      GPU_ARCH_TYPE: cuda-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.6-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.6-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.11"
@ -405,12 +421,13 @@ jobs:
    needs: get-label-type
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.12"
@ -430,12 +447,13 @@ jobs:
    uses: ./.github/workflows/_binary-test-linux.yml
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.12"
@ -454,12 +472,13 @@ jobs:
    needs: manywheel-py3_12-cpu-aarch64-test
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.12"
@ -476,12 +495,13 @@ jobs:
    needs: get-label-type
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu126
      GPU_ARCH_TYPE: cuda-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.6-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.6-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.12"
@ -501,12 +521,13 @@ jobs:
    needs: manywheel-py3_12-cuda-aarch64-build
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu126
      GPU_ARCH_TYPE: cuda-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.6-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.6-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.12"
@ -523,12 +544,13 @@ jobs:
    needs: get-label-type
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.13"
@ -548,12 +570,13 @@ jobs:
    uses: ./.github/workflows/_binary-test-linux.yml
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.13"
@ -572,12 +595,13 @@ jobs:
    needs: manywheel-py3_13-cpu-aarch64-test
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.13"
@ -594,12 +618,13 @@ jobs:
    needs: get-label-type
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu126
      GPU_ARCH_TYPE: cuda-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.6-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.6-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.13"
@ -619,12 +644,13 @@ jobs:
    needs: manywheel-py3_13-cuda-aarch64-build
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu126
      GPU_ARCH_TYPE: cuda-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.6-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.6-2.6
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.13"
@ -634,3 +660,126 @@ jobs:
      conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
      conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
    uses: ./.github/workflows/_binary-upload.yml
+
+  manywheel-py3_13t-cpu-aarch64-build:
+    if: ${{ github.repository_owner == 'pytorch' }}
+    uses: ./.github/workflows/_binary-build-linux.yml
+    needs: get-label-type
+    with:
+      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
+      PACKAGE_TYPE: manywheel
+      # TODO: This is a legacy variable that we eventually want to get rid of in
+      #       favor of GPU_ARCH_VERSION
+      DESIRED_CUDA: cpu
+      GPU_ARCH_TYPE: cpu-aarch64
+      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-2.6
+      DESIRED_DEVTOOLSET: cxx11-abi
+      use_split_build: False
+      DESIRED_PYTHON: "3.13t"
+      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+      runs_on: linux.arm64.m7g.4xlarge.ephemeral
+      ALPINE_IMAGE: "arm64v8/alpine"
+      build_name: manywheel-py3_13t-cpu-aarch64
+      build_environment: linux-aarch64-binary-manywheel
+      PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
+    secrets:
+      github-token: ${{ secrets.GITHUB_TOKEN }}
+  manywheel-py3_13t-cpu-aarch64-test:  # Testing
+    if: ${{ github.repository_owner == 'pytorch' }}
+    needs:
+      - manywheel-py3_13t-cpu-aarch64-build
+      - get-label-type
+    uses: ./.github/workflows/_binary-test-linux.yml
+    with:
+      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
+      PACKAGE_TYPE: manywheel
+      # TODO: This is a legacy variable that we eventually want to get rid of in
+      #       favor of GPU_ARCH_VERSION
+      DESIRED_CUDA: cpu
+      GPU_ARCH_TYPE: cpu-aarch64
+      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-2.6
+      DESIRED_DEVTOOLSET: cxx11-abi
+      use_split_build: False
+      DESIRED_PYTHON: "3.13t"
+      build_name: manywheel-py3_13t-cpu-aarch64
+      build_environment: linux-aarch64-binary-manywheel
+      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+      runs_on: linux.arm64.2xlarge
+      ALPINE_IMAGE: "arm64v8/alpine"
+    secrets:
+      github-token: ${{ secrets.GITHUB_TOKEN }}
+  manywheel-py3_13t-cpu-aarch64-upload:  # Uploading
+    if: ${{ github.repository_owner == 'pytorch' }}
+    permissions:
+      id-token: write
+      contents: read
+    needs: manywheel-py3_13t-cpu-aarch64-test
+    with:
+      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
+      PACKAGE_TYPE: manywheel
+      # TODO: This is a legacy variable that we eventually want to get rid of in
+      #       favor of GPU_ARCH_VERSION
+      DESIRED_CUDA: cpu
+      GPU_ARCH_TYPE: cpu-aarch64
+      DOCKER_IMAGE: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-2.6
+      DESIRED_DEVTOOLSET: cxx11-abi
+      use_split_build: False
+      DESIRED_PYTHON: "3.13t"
+      build_name: manywheel-py3_13t-cpu-aarch64
+    secrets:
+      github-token: ${{ secrets.GITHUB_TOKEN }}
+      conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
+      conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
+    uses: ./.github/workflows/_binary-upload.yml
+
+  manywheel-py3_13t-cuda-aarch64-build:
+    if: ${{ github.repository_owner == 'pytorch' }}
+    uses: ./.github/workflows/_binary-build-linux.yml
+    needs: get-label-type
+    with:
+      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
+      PACKAGE_TYPE: manywheel
+      # TODO: This is a legacy variable that we eventually want to get rid of in
+      #       favor of GPU_ARCH_VERSION
+      DESIRED_CUDA: cu126
+      GPU_ARCH_TYPE: cuda-aarch64
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.6-2.6
+      DESIRED_DEVTOOLSET: cxx11-abi
+      use_split_build: False
+      DESIRED_PYTHON: "3.13t"
+      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+      runs_on: linux.arm64.m7g.4xlarge.ephemeral
+      ALPINE_IMAGE: "arm64v8/alpine"
+      build_name: manywheel-py3_13t-cuda-aarch64
+      build_environment: linux-aarch64-binary-manywheel
+      timeout-minutes: 420
+    secrets:
+      github-token: ${{ secrets.GITHUB_TOKEN }}
+  manywheel-py3_13t-cuda-aarch64-upload:  # Uploading
+    if: ${{ github.repository_owner == 'pytorch' }}
+    permissions:
+      id-token: write
+      contents: read
+    needs: manywheel-py3_13t-cuda-aarch64-build
+    with:
+      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
+      PACKAGE_TYPE: manywheel
+      # TODO: This is a legacy variable that we eventually want to get rid of in
+      #       favor of GPU_ARCH_VERSION
+      DESIRED_CUDA: cu126
+      GPU_ARCH_TYPE: cuda-aarch64
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.6-2.6
+      DESIRED_DEVTOOLSET: cxx11-abi
+      use_split_build: False
+      DESIRED_PYTHON: "3.13t"
+      build_name: manywheel-py3_13t-cuda-aarch64
+    secrets:
+      github-token: ${{ secrets.GITHUB_TOKEN }}
+      conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
+      conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
+    uses: ./.github/workflows/_binary-upload.yml
--- a/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-main.yml
+++ b/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-main.yml
@ -20,6 +20,7 @@ env:
  AWS_DEFAULT_REGION: us-east-1
  BINARY_ENV_FILE: /tmp/env
  BUILD_ENVIRONMENT: linux-binary-libtorch-cxx11-abi
+  BUILDER_ROOT: /builder
  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
  PR_NUMBER: ${{ github.event.pull_request.number }}
  PYTORCH_FINAL_PACKAGE_DIR: /artifacts
@ -34,7 +35,7 @@ jobs:
  get-label-type:
    if: github.repository_owner == 'pytorch'
    name: get-label-type
-    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.6
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@ -46,12 +47,13 @@ jobs:
    needs: get-label-type
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: libtorch
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cpu-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cpu-2.6
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
@ -67,12 +69,13 @@ jobs:
    uses: ./.github/workflows/_binary-test-linux.yml
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: libtorch
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cpu-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cpu-2.6
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cpu-shared-with-deps-cxx11-abi
--- a/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-nightly.yml
+++ b/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-nightly.yml
@ -25,6 +25,7 @@ env:
  AWS_DEFAULT_REGION: us-east-1
  BINARY_ENV_FILE: /tmp/env
  BUILD_ENVIRONMENT: linux-binary-libtorch-cxx11-abi
+  BUILDER_ROOT: /builder
  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
  PR_NUMBER: ${{ github.event.pull_request.number }}
  PYTORCH_FINAL_PACKAGE_DIR: /artifacts
@ -39,7 +40,7 @@ jobs:
  get-label-type:
    if: github.repository_owner == 'pytorch'
    name: get-label-type
-    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main
+    uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@release/2.6
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
@ -51,12 +52,13 @@ jobs:
    needs: get-label-type
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: libtorch
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cpu-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cpu-2.6
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
@ -72,12 +74,13 @@ jobs:
    uses: ./.github/workflows/_binary-test-linux.yml
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: libtorch
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cpu-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cpu-2.6
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cpu-shared-with-deps-cxx11-abi
@ -94,12 +97,13 @@ jobs:
    needs: libtorch-cpu-shared-with-deps-cxx11-abi-test
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: libtorch
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cpu-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cpu-2.6
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cpu-shared-with-deps-cxx11-abi
@ -115,13 +119,14 @@ jobs:
    needs: get-label-type
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: libtorch
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.8-2.6
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
@ -137,13 +142,14 @@ jobs:
    uses: ./.github/workflows/_binary-test-linux.yml
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: libtorch
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.8-2.6
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cuda11_8-shared-with-deps-cxx11-abi
@ -160,13 +166,14 @@ jobs:
    needs: libtorch-cuda11_8-shared-with-deps-cxx11-abi-test
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: libtorch
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.8-2.6
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cuda11_8-shared-with-deps-cxx11-abi
@ -182,13 +189,14 @@ jobs:
    needs: get-label-type
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: libtorch
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.4-2.6
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
@ -204,13 +212,14 @@ jobs:
    uses: ./.github/workflows/_binary-test-linux.yml
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: libtorch
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.4-2.6
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cuda12_4-shared-with-deps-cxx11-abi
@ -227,13 +236,14 @@ jobs:
    needs: libtorch-cuda12_4-shared-with-deps-cxx11-abi-test
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: libtorch
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.4-2.6
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cuda12_4-shared-with-deps-cxx11-abi
@ -249,13 +259,14 @@ jobs:
    needs: get-label-type
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: libtorch
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu126
      GPU_ARCH_VERSION: 12.6
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.6-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.6-2.6
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
@ -271,13 +282,14 @@ jobs:
    uses: ./.github/workflows/_binary-test-linux.yml
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: libtorch
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu126
      GPU_ARCH_VERSION: 12.6
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.6-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.6-2.6
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cuda12_6-shared-with-deps-cxx11-abi
@ -294,13 +306,14 @@ jobs:
    needs: libtorch-cuda12_6-shared-with-deps-cxx11-abi-test
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: libtorch
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu126
      GPU_ARCH_VERSION: 12.6
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.6-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.6-2.6
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cuda12_6-shared-with-deps-cxx11-abi
@ -310,19 +323,116 @@ jobs:
      conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
    uses: ./.github/workflows/_binary-upload.yml

+  libtorch-rocm6_1-shared-with-deps-cxx11-abi-build:
+    if: ${{ github.repository_owner == 'pytorch' }}
+    uses: ./.github/workflows/_binary-build-linux.yml
+    needs: get-label-type
+    with:
+      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
+      PACKAGE_TYPE: libtorch
+      # TODO: This is a legacy variable that we eventually want to get rid of in
+      #       favor of GPU_ARCH_VERSION
+      DESIRED_CUDA: rocm6.1
+      GPU_ARCH_VERSION: 6.1
+      GPU_ARCH_TYPE: rocm
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.1-2.6
+      LIBTORCH_VARIANT: shared-with-deps
+      DESIRED_DEVTOOLSET: cxx11-abi
+      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+      build_name: libtorch-rocm6_1-shared-with-deps-cxx11-abi
+      build_environment: linux-binary-libtorch-cxx11-abi
+    secrets:
+      github-token: ${{ secrets.GITHUB_TOKEN }}
+  libtorch-rocm6_1-shared-with-deps-cxx11-abi-test:  # Testing
+    if: ${{ github.repository_owner == 'pytorch' }}
+    needs:
+      - libtorch-rocm6_1-shared-with-deps-cxx11-abi-build
+      - get-label-type
+    runs-on: linux.rocm.gpu
+    timeout-minutes: 240
+    env:
+      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
+      PACKAGE_TYPE: libtorch
+      # TODO: This is a legacy variable that we eventually want to get rid of in
+      #       favor of GPU_ARCH_VERSION
+      DESIRED_CUDA: rocm6.1
+      GPU_ARCH_VERSION: 6.1
+      GPU_ARCH_TYPE: rocm
+      SKIP_ALL_TESTS: 1
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.1-2.6
+      LIBTORCH_VARIANT: shared-with-deps
+      DESIRED_DEVTOOLSET: cxx11-abi
+    steps:
+      - name: Setup ROCm
+        uses: ./.github/actions/setup-rocm
+      - uses: actions/download-artifact@v4.1.7
+        name: Download Build Artifacts
+        with:
+          name: libtorch-rocm6_1-shared-with-deps-cxx11-abi
+          path: "${{ runner.temp }}/artifacts/"
+      - name: Checkout PyTorch
+        uses: malfet/checkout@silent-checkout
+        with:
+          submodules: recursive
+          path: pytorch
+          quiet-checkout: true
+      - name: Clean PyTorch checkout
+        run: |
+          # Remove any artifacts from the previous checkouts
+          git clean -fxd
+        working-directory: pytorch
+      - name: ROCm set GPU_FLAG
+        run: |
+          echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
+      - name: Pull Docker image
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.6
+        with:
+          docker-image: pytorch/libtorch-cxx11-builder:rocm6.1-2.6
+      - name: Test Pytorch binary
+        uses: ./pytorch/.github/actions/test-pytorch-binary
+      - name: Teardown ROCm
+        uses: ./.github/actions/teardown-rocm
+  libtorch-rocm6_1-shared-with-deps-cxx11-abi-upload:  # Uploading
+    if: ${{ github.repository_owner == 'pytorch' }}
+    permissions:
+      id-token: write
+      contents: read
+    needs: libtorch-rocm6_1-shared-with-deps-cxx11-abi-test
+    with:
+      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
+      PACKAGE_TYPE: libtorch
+      # TODO: This is a legacy variable that we eventually want to get rid of in
+      #       favor of GPU_ARCH_VERSION
+      DESIRED_CUDA: rocm6.1
+      GPU_ARCH_VERSION: 6.1
+      GPU_ARCH_TYPE: rocm
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.1-2.6
+      LIBTORCH_VARIANT: shared-with-deps
+      DESIRED_DEVTOOLSET: cxx11-abi
+      build_name: libtorch-rocm6_1-shared-with-deps-cxx11-abi
+    secrets:
+      github-token: ${{ secrets.GITHUB_TOKEN }}
+      conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
+      conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
+    uses: ./.github/workflows/_binary-upload.yml
+
  libtorch-rocm6_2_4-shared-with-deps-cxx11-abi-build:
    if: ${{ github.repository_owner == 'pytorch' }}
    uses: ./.github/workflows/_binary-build-linux.yml
    needs: get-label-type
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: libtorch
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: rocm6.2.4
      GPU_ARCH_VERSION: 6.2.4
      GPU_ARCH_TYPE: rocm
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.2.4-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.2.4-2.6
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
@ -339,6 +449,7 @@ jobs:
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: libtorch
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
@ -346,7 +457,7 @@ jobs:
      GPU_ARCH_VERSION: 6.2.4
      GPU_ARCH_TYPE: rocm
      SKIP_ALL_TESTS: 1
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.2.4-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.2.4-2.6
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
    steps:
@ -358,12 +469,11 @@ jobs:
          name: libtorch-rocm6_2_4-shared-with-deps-cxx11-abi
          path: "${{ runner.temp }}/artifacts/"
      - name: Checkout PyTorch
-        uses: actions/checkout@v4
+        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
-          show-progress: false
+          quiet-checkout: true
      - name: Clean PyTorch checkout
        run: |
          # Remove any artifacts from the previous checkouts
@ -373,9 +483,9 @@ jobs:
        run: |
          echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
      - name: Pull Docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.6
        with:
-          docker-image: pytorch/libtorch-cxx11-builder:rocm6.2.4-main
+          docker-image: pytorch/libtorch-cxx11-builder:rocm6.2.4-2.6
      - name: Test Pytorch binary
        uses: ./pytorch/.github/actions/test-pytorch-binary
      - name: Teardown ROCm
@ -388,13 +498,14 @@ jobs:
    needs: libtorch-rocm6_2_4-shared-with-deps-cxx11-abi-test
    with:
      PYTORCH_ROOT: /pytorch
+      BUILDER_ROOT: /builder
      PACKAGE_TYPE: libtorch
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: rocm6.2.4
      GPU_ARCH_VERSION: 6.2.4
      GPU_ARCH_TYPE: rocm
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.2.4-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.2.4-2.6
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-rocm6_2_4-shared-with-deps-cxx11-abi
@ -403,97 +514,3 @@ jobs:
      conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
      conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
    uses: ./.github/workflows/_binary-upload.yml
-
-  libtorch-rocm6_3-shared-with-deps-cxx11-abi-build:
-    if: ${{ github.repository_owner == 'pytorch' }}
-    uses: ./.github/workflows/_binary-build-linux.yml
-    needs: get-label-type
-    with:
-      PYTORCH_ROOT: /pytorch
-      PACKAGE_TYPE: libtorch
-      # TODO: This is a legacy variable that we eventually want to get rid of in
-      #       favor of GPU_ARCH_VERSION
-      DESIRED_CUDA: rocm6.3
-      GPU_ARCH_VERSION: 6.3
-      GPU_ARCH_TYPE: rocm
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.3-main
-      LIBTORCH_VARIANT: shared-with-deps
-      DESIRED_DEVTOOLSET: cxx11-abi
-      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
-      build_name: libtorch-rocm6_3-shared-with-deps-cxx11-abi
-      build_environment: linux-binary-libtorch-cxx11-abi
-    secrets:
-      github-token: ${{ secrets.GITHUB_TOKEN }}
-  libtorch-rocm6_3-shared-with-deps-cxx11-abi-test:  # Testing
-    if: ${{ github.repository_owner == 'pytorch' }}
-    needs:
-      - libtorch-rocm6_3-shared-with-deps-cxx11-abi-build
-      - get-label-type
-    runs-on: linux.rocm.gpu
-    timeout-minutes: 240
-    env:
-      PYTORCH_ROOT: /pytorch
-      PACKAGE_TYPE: libtorch
-      # TODO: This is a legacy variable that we eventually want to get rid of in
-      #       favor of GPU_ARCH_VERSION
-      DESIRED_CUDA: rocm6.3
-      GPU_ARCH_VERSION: 6.3
-      GPU_ARCH_TYPE: rocm
-      SKIP_ALL_TESTS: 1
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.3-main
-      LIBTORCH_VARIANT: shared-with-deps
-      DESIRED_DEVTOOLSET: cxx11-abi
-    steps:
-      - name: Setup ROCm
-        uses: ./.github/actions/setup-rocm
-      - uses: actions/download-artifact@v4.1.7
-        name: Download Build Artifacts
-        with:
-          name: libtorch-rocm6_3-shared-with-deps-cxx11-abi
-          path: "${{ runner.temp }}/artifacts/"
-      - name: Checkout PyTorch
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-          submodules: recursive
-          path: pytorch
-          show-progress: false
-      - name: Clean PyTorch checkout
-        run: |
-          # Remove any artifacts from the previous checkouts
-          git clean -fxd
-        working-directory: pytorch
-      - name: ROCm set GPU_FLAG
-        run: |
-          echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
-      - name: Pull Docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
-        with:
-          docker-image: pytorch/libtorch-cxx11-builder:rocm6.3-main
-      - name: Test Pytorch binary
-        uses: ./pytorch/.github/actions/test-pytorch-binary
-      - name: Teardown ROCm
-        uses: ./.github/actions/teardown-rocm
-  libtorch-rocm6_3-shared-with-deps-cxx11-abi-upload:  # Uploading
-    if: ${{ github.repository_owner == 'pytorch' }}
-    permissions:
-      id-token: write
-      contents: read
-    needs: libtorch-rocm6_3-shared-with-deps-cxx11-abi-test
-    with:
-      PYTORCH_ROOT: /pytorch
-      PACKAGE_TYPE: libtorch
-      # TODO: This is a legacy variable that we eventually want to get rid of in
-      #       favor of GPU_ARCH_VERSION
-      DESIRED_CUDA: rocm6.3
-      GPU_ARCH_VERSION: 6.3
-      GPU_ARCH_TYPE: rocm
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.3-main
-      LIBTORCH_VARIANT: shared-with-deps
-      DESIRED_DEVTOOLSET: cxx11-abi
-      build_name: libtorch-rocm6_3-shared-with-deps-cxx11-abi
-    secrets:
-      github-token: ${{ secrets.GITHUB_TOKEN }}
-      conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
-      conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
-    uses: ./.github/workflows/_binary-upload.yml
--- a/Show More
+++ b/Show More