[Dynamo] Support for proxying frozen dataclasses

ghstack-source-id: fb6556cd2f9424fe223147471fe95126441954d9 Pull Request resolved: https://github.com/pytorch/pytorch/pull/134846
2025-11-06 09:17:11 +08:00 · 2024-09-01 13:30:12 -07:00
1649 changed files with 42819 additions and 56280 deletions
--- a/.ci/docker/aotriton_version.txt
+++ b/.ci/docker/aotriton_version.txt
@ -1,5 +1,5 @@
-0.7b
+0.6b
 manylinux_2_17
 rocm6.2
-9be04068c3c0857a4cfd17d7e39e71d0423ebac2
+7f07e8a1cb1f99627eb6d77f5c0e9295c775f3c7
-3e9e1959d23b93d78a08fcc5f868125dc3854dece32fd9458be9ef4467982291
+e4ab195d2bd19e939c675a13280c29714c6ef9f2cf420690da150fa0cac043b1
--- a/.ci/docker/build.sh
+++ b/.ci/docker/build.sh
@ -236,7 +236,7 @@ case "$image" in
    TRITON=yes
    ;;
  pytorch-linux-focal-py3-clang10-onnx)
-    ANACONDA_PYTHON_VERSION=3.9
+    ANACONDA_PYTHON_VERSION=3.8
    CLANG_VERSION=10
    PROTOBUF=yes
    DB=yes
@ -245,7 +245,7 @@ case "$image" in
    ONNX=yes
    ;;
  pytorch-linux-focal-py3-clang9-android-ndk-r21e)
-    ANACONDA_PYTHON_VERSION=3.9
+    ANACONDA_PYTHON_VERSION=3.8
    CLANG_VERSION=9
    LLVMDEV=yes
    PROTOBUF=yes
@ -254,8 +254,8 @@ case "$image" in
    GRADLE_VERSION=6.8.3
    NINJA_VERSION=1.9.0
    ;;
-  pytorch-linux-focal-py3.9-clang10)
+  pytorch-linux-focal-py3.8-clang10)
-    ANACONDA_PYTHON_VERSION=3.9
+    ANACONDA_PYTHON_VERSION=3.8
    CLANG_VERSION=10
    PROTOBUF=yes
    DB=yes
@ -276,8 +276,8 @@ case "$image" in
    CONDA_CMAKE=yes
    TRITON=yes
    ;;
-  pytorch-linux-focal-py3.9-gcc9)
+  pytorch-linux-focal-py3.8-gcc9)
-    ANACONDA_PYTHON_VERSION=3.9
+    ANACONDA_PYTHON_VERSION=3.8
    GCC_VERSION=9
    PROTOBUF=yes
    DB=yes
@ -286,23 +286,23 @@ case "$image" in
    TRITON=yes
    ;;
  pytorch-linux-focal-rocm-n-1-py3)
-    ANACONDA_PYTHON_VERSION=3.10
+    ANACONDA_PYTHON_VERSION=3.8
    GCC_VERSION=9
    PROTOBUF=yes
    DB=yes
    VISION=yes
-    ROCM_VERSION=6.1
+    ROCM_VERSION=6.0
    NINJA_VERSION=1.9.0
    CONDA_CMAKE=yes
    TRITON=yes
    ;;
  pytorch-linux-focal-rocm-n-py3)
-    ANACONDA_PYTHON_VERSION=3.10
+    ANACONDA_PYTHON_VERSION=3.8
    GCC_VERSION=9
    PROTOBUF=yes
    DB=yes
    VISION=yes
-    ROCM_VERSION=6.2
+    ROCM_VERSION=6.1
    NINJA_VERSION=1.9.0
    CONDA_CMAKE=yes
    TRITON=yes
@ -318,8 +318,8 @@ case "$image" in
    CONDA_CMAKE=yes
    TRITON=yes
    ;;
-    pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks)
+    pytorch-linux-jammy-py3.8-gcc11-inductor-benchmarks)
-    ANACONDA_PYTHON_VERSION=3.9
+    ANACONDA_PYTHON_VERSION=3.8
    GCC_VERSION=11
    PROTOBUF=yes
    DB=yes
@ -330,8 +330,8 @@ case "$image" in
    DOCS=yes
    INDUCTOR_BENCHMARKS=yes
    ;;
-  pytorch-linux-jammy-cuda11.8-cudnn9-py3.9-clang12)
+  pytorch-linux-jammy-cuda11.8-cudnn9-py3.8-clang12)
-    ANACONDA_PYTHON_VERSION=3.9
+    ANACONDA_PYTHON_VERSION=3.8
    CUDA_VERSION=11.8
    CUDNN_VERSION=9
    CLANG_VERSION=12
@ -355,8 +355,8 @@ case "$image" in
    CONDA_CMAKE=yes
    VISION=yes
    ;;
-  pytorch-linux-jammy-py3.9-gcc11)
+  pytorch-linux-jammy-py3.8-gcc11)
-    ANACONDA_PYTHON_VERSION=3.9
+    ANACONDA_PYTHON_VERSION=3.8
    GCC_VERSION=11
    PROTOBUF=yes
    DB=yes
@ -379,7 +379,6 @@ case "$image" in
    GCC_VERSION=11
    CONDA_CMAKE=yes
    HALIDE=yes
    TRITON=yes
    ;;
  pytorch-linux-focal-linter)
    # TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
--- a/.ci/docker/centos-rocm/Dockerfile
+++ b/.ci/docker/centos-rocm/Dockerfile
@ -108,10 +108,10 @@ ENV CMAKE_C_COMPILER cc
 ENV CMAKE_CXX_COMPILER c++
 COPY ./common/install_triton.sh install_triton.sh
 COPY ./common/common_utils.sh common_utils.sh
-COPY ci_commit_pins/triton.txt triton.txt
+COPY ci_commit_pins/triton-rocm.txt triton-rocm.txt
 COPY triton_version.txt triton_version.txt
 RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
-RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt
+RUN rm install_triton.sh common_utils.sh triton-rocm.txt triton_version.txt
 # Install AOTriton (Early fail)
 COPY ./aotriton_version.txt aotriton_version.txt
--- a/.ci/docker/ci_commit_pins/executorch.txt
+++ b/.ci/docker/ci_commit_pins/executorch.txt
@ -1 +1 @@
-cd1c833b079adb324871dcbbe75b43d42ffc0ade
+69472e5c43481324ad923ceb29392ab72830acee
--- a/.ci/docker/ci_commit_pins/halide.txt
+++ b/.ci/docker/ci_commit_pins/halide.txt
@ -1 +1 @@
-461c12871f336fe6f57b55d6a297f13ef209161b
+340136fec6d3ebc73e7a19eba1663e9b0ba8ab2d
--- a/.ci/docker/ci_commit_pins/triton-rocm.txt
+++ b/.ci/docker/ci_commit_pins/triton-rocm.txt
@ -0,0 +1 @@
 21eae954efa5bf584da70324b640288c3ee7aede
--- a/.ci/docker/ci_commit_pins/triton-xpu.txt
+++ b/.ci/docker/ci_commit_pins/triton-xpu.txt
@ -1 +1 @@
-91b14bf5593cf58a8541f3e6b9125600a867d4ef
+1b2f15840e0d70eec50d84c7a0575cb835524def
--- a/.ci/docker/ci_commit_pins/triton.txt
+++ b/.ci/docker/ci_commit_pins/triton.txt
@ -1 +1 @@
-5fe38ffd73c2ac6ed6323b554205186696631c6f
+dedb7bdf339a3546896d4820366ca562c586bfa0
--- a/.ci/docker/common/install_aotriton.sh
+++ b/.ci/docker/common/install_aotriton.sh
@ -4,12 +4,12 @@ set -ex
 source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
-TARBALL='aotriton.tar.gz'
+TARBALL='aotriton.tar.bz2'
 # This read command alwasy returns with exit code 1
 read -d "\n" VER MANYLINUX ROCMBASE PINNED_COMMIT SHA256 < aotriton_version.txt || true
 ARCH=$(uname -m)
 AOTRITON_INSTALL_PREFIX="$1"
-AOTRITON_URL="https://github.com/ROCm/aotriton/releases/download/${VER}/aotriton-${VER}-${MANYLINUX}_${ARCH}-${ROCMBASE}-shared.tar.gz"
+AOTRITON_URL="https://github.com/ROCm/aotriton/releases/download/${VER}/aotriton-${VER}-${MANYLINUX}_${ARCH}-${ROCMBASE}-shared.tar.bz2"
 cd "${AOTRITON_INSTALL_PREFIX}"
 # Must use -L to follow redirects
--- a/.ci/docker/common/install_cpython.sh
+++ b/.ci/docker/common/install_cpython.sh
@ -7,7 +7,7 @@ PYTHON_DOWNLOAD_GITHUB_BRANCH=https://github.com/python/cpython/archive/refs/hea
 GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py
 # Python versions to be installed in /opt/$VERSION_NO
-CPYTHON_VERSIONS=${CPYTHON_VERSIONS:-"3.8.1 3.9.0 3.10.1 3.11.0 3.12.0 3.13.0 3.13.0t"}
+CPYTHON_VERSIONS=${CPYTHON_VERSIONS:-"3.8.1 3.9.0 3.10.1 3.11.0 3.12.0 3.13.0"}
 function check_var {
    if [ -z "$1" ]; then
@ -22,13 +22,6 @@ function do_cpython_build {
    check_var $py_ver
    check_var $py_folder
    tar -xzf Python-$py_ver.tgz
    local additional_flags=""
    if [ "$py_ver" == "3.13.0t" ]; then
        additional_flags=" --disable-gil"
        mv cpython-3.13/ cpython-3.13t/
    fi
    pushd $py_folder
    local prefix="/opt/_internal/cpython-${py_ver}"
@ -44,10 +37,8 @@ function do_cpython_build {
        local openssl_flags="--with-openssl=${WITH_OPENSSL} --with-openssl-rpath=auto"
    fi
    # -Wformat added for https://bugs.python.org/issue17547 on Python 2.6
-    CFLAGS="-Wformat" ./configure --prefix=${prefix} ${openssl_flags} ${shared_flags} ${additional_flags} > /dev/null
+    CFLAGS="-Wformat" ./configure --prefix=${prefix} ${openssl_flags} ${shared_flags} > /dev/null
    make -j40 > /dev/null
    make install > /dev/null
@ -78,14 +69,7 @@ function build_cpython {
    check_var $py_ver
    check_var $PYTHON_DOWNLOAD_URL
    local py_ver_folder=$py_ver
-
+    if [ "$py_ver" = "3.13.0" ]; then
    if [ "$py_ver" = "3.13.0t" ]; then
        PY_VER_SHORT="3.13"
        PYT_VER_SHORT="3.13t"
        check_var $PYTHON_DOWNLOAD_GITHUB_BRANCH
        wget $PYTHON_DOWNLOAD_GITHUB_BRANCH/$PY_VER_SHORT.tar.gz -O Python-$py_ver.tgz
        do_cpython_build $py_ver cpython-$PYT_VER_SHORT
    elif [ "$py_ver" = "3.13.0" ]; then
        PY_VER_SHORT="3.13"
        check_var $PYTHON_DOWNLOAD_GITHUB_BRANCH
        wget $PYTHON_DOWNLOAD_GITHUB_BRANCH/$PY_VER_SHORT.tar.gz -O Python-$py_ver.tgz
--- a/.ci/docker/common/install_cusparselt.sh
+++ b/.ci/docker/common/install_cusparselt.sh
@ -5,7 +5,7 @@ set -ex
 # cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html
 mkdir tmp_cusparselt && cd tmp_cusparselt
-if [[ ${CUDA_VERSION:0:4} =~ ^12\.[2-6]$ ]]; then
+if [[ ${CUDA_VERSION:0:4} =~ ^12\.[2-4]$ ]]; then
    arch_path='sbsa'
    export TARGETARCH=${TARGETARCH:-$(uname -m)}
    if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
--- a/.ci/docker/common/install_miopen.sh
+++ b/.ci/docker/common/install_miopen.sh
@ -10,21 +10,6 @@ if [[ -z $ROCM_VERSION ]]; then
    exit 1;
 fi
 IS_UBUNTU=0
 ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
 case "$ID" in
  ubuntu)
    IS_UBUNTU=1
    ;;
  centos)
    IS_UBUNTU=0
    ;;
  *)
    echo "Unable to determine OS..."
    exit 1
    ;;
 esac
 # To make version comparison easier, create an integer representation.
 save_IFS="$IFS"
 IFS=. ROCM_VERSION_ARRAY=(${ROCM_VERSION})
@ -72,11 +57,9 @@ MIOPEN_CMAKE_COMMON_FLAGS="
 -DMIOPEN_BUILD_DRIVER=OFF
 "
 # Pull MIOpen repo and set DMIOPEN_EMBED_DB based on ROCm version
-if [[ $ROCM_INT -ge 60300 ]]; then
+if [[ $ROCM_INT -ge 60200 ]] && [[ $ROCM_INT -lt 60300 ]]; then
-    echo "ROCm 6.3+ MIOpen does not need any patches, do not build from source"
+    echo "ROCm 6.2 MIOpen does not need any patches, do not build from source"
    exit 0
 elif [[ $ROCM_INT -ge 60200 ]] && [[ $ROCM_INT -lt 60300 ]]; then
    MIOPEN_BRANCH="release/rocm-rel-6.2-staging"
 elif [[ $ROCM_INT -ge 60100 ]] && [[ $ROCM_INT -lt 60200 ]]; then
    echo "ROCm 6.1 MIOpen does not need any patches, do not build from source"
    exit 0
@ -110,21 +93,12 @@ else
    exit 1
 fi
-
+yum remove -y miopen-hip
 if [[ ${IS_UBUNTU} == 1 ]]; then
  apt-get remove -y miopen-hip
 else
  yum remove -y miopen-hip
 fi
 git clone https://github.com/ROCm/MIOpen -b ${MIOPEN_BRANCH}
 pushd MIOpen
 # remove .git to save disk space since CI runner was running out
 rm -rf .git
 # Don't build CK to save docker build time
 if [[ $ROCM_INT -ge 60200 ]]; then
    sed -i '/composable_kernel/d' requirements.txt
 fi
 # Don't build MLIR to save docker build time
 # since we are disabling MLIR backend for MIOpen anyway
 if [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then
@ -137,15 +111,10 @@ cmake -P install_deps.cmake --minimum
 # clean up since CI runner was running out of disk space
 rm -rf /tmp/*
-if [[ ${IS_UBUNTU} == 1 ]]; then
+yum clean all
-  apt-get autoclean && apt-get clean
+rm -rf /var/cache/yum
-  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+rm -rf /var/lib/yum/yumdb
-else
+rm -rf /var/lib/yum/history
  yum clean all
  rm -rf /var/cache/yum
  rm -rf /var/lib/yum/yumdb
  rm -rf /var/lib/yum/history
 fi
 ## Build MIOpen
 mkdir -p build
@ -162,11 +131,7 @@ make -j $(nproc) package
 # clean up since CI runner was running out of disk space
 rm -rf /usr/local/cget
-if [[ ${IS_UBUNTU} == 1 ]]; then
+yum install -y miopen-*.rpm
  sudo dpkg -i miopen-hip*.deb
 else
  yum install -y miopen-*.rpm
 fi
 popd
 rm -rf MIOpen
--- a/.ci/docker/common/install_onnx.sh
+++ b/.ci/docker/common/install_onnx.sh
@ -15,7 +15,7 @@ pip_install \
  flatbuffers==2.0 \
  mock==5.0.1 \
  ninja==1.10.2 \
-  networkx==2.5 \
+  networkx==2.0 \
  numpy==1.24.2
 # ONNXRuntime should be installed before installing
@ -30,9 +30,10 @@ pip_install \
 pip_install coloredlogs packaging
-pip_install onnxruntime==1.18.1
+pip_install onnxruntime==1.18
-pip_install onnx==1.16.2
+pip_install onnx==1.16.0
-pip_install onnxscript==0.1.0.dev20240831 --no-deps
+# pip_install "onnxscript@git+https://github.com/microsoft/onnxscript@3e869ef8ccf19b5ebd21c10d3e9c267c9a9fa729" --no-deps
 pip_install onnxscript==0.1.0.dev20240613 --no-deps
 # required by onnxscript
 pip_install ml_dtypes
--- a/.ci/docker/common/install_triton.sh
+++ b/.ci/docker/common/install_triton.sh
@ -12,7 +12,10 @@ conda_reinstall() {
  as_jenkins conda install -q -n py_$ANACONDA_PYTHON_VERSION -y --force-reinstall $*
 }
-if [ -n "${XPU_VERSION}" ]; then
+if [ -n "${ROCM_VERSION}" ]; then
  TRITON_REPO="https://github.com/openai/triton"
  TRITON_TEXT_FILE="triton-rocm"
 elif [ -n "${XPU_VERSION}" ]; then
  TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton"
  TRITON_TEXT_FILE="triton-xpu"
 else
--- a/.ci/docker/conda/build.sh
+++ b/.ci/docker/conda/build.sh
@ -37,12 +37,6 @@ esac
 (
  set -x
  # TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
  # is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
  sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
  sudo systemctl daemon-reload
  sudo systemctl restart docker
  docker build \
    --target final \
    --progress plain \
--- a/.ci/docker/manywheel/Dockerfile
+++ b/.ci/docker/manywheel/Dockerfile
@ -10,7 +10,6 @@ ENV LANG en_US.UTF-8
 ENV LANGUAGE en_US.UTF-8
 ARG DEVTOOLSET_VERSION=9
 # Note: This is required patch since CentOS have reached EOL
 # otherwise any yum install setp will fail
 RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
--- a/.ci/docker/manywheel/build.sh
+++ b/.ci/docker/manywheel/build.sh
@ -124,14 +124,7 @@ if [[ -n ${MANY_LINUX_VERSION} && -z ${DOCKERFILE_SUFFIX} ]]; then
 fi
 (
    set -x
-
+    DOCKER_BUILDKIT=1 docker build \
    # TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
    # is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
    sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
    sudo systemctl daemon-reload
    sudo systemctl restart docker
    DOCKER_BUILDKIT=1 docker build  \
        ${DOCKER_GPU_BUILD_ARG} \
        --build-arg "GPU_IMAGE=${GPU_IMAGE}" \
        --target "${TARGET}" \
--- a/.ci/docker/requirements-ci.txt
+++ b/.ci/docker/requirements-ci.txt
@ -30,14 +30,9 @@ dill==0.3.7
 #Pinned versions: 0.3.7
 #test that import: dynamo/test_replay_record.py test_dataloader.py test_datapipe.py test_serialization.py
-expecttest==0.2.1
+expecttest==0.1.6
 #Description: method for writing tests where test framework auto populates
 # the expected output based on previous runs
 #Pinned versions: 0.2.1
 #test that import:
 fbscribelogger==0.1.6
 #Description: write to scribe from authenticated jobs on CI
 #Pinned versions: 0.1.6
 #test that import:
@ -90,7 +85,7 @@ librosa>=0.6.2 ; python_version < "3.11"
 #Pinned versions:
 #test that import:
-mypy==1.11.2
+mypy==1.10.0
 # Pin MyPy version because new errors are likely to appear with each release
 #Description: linter
 #Pinned versions: 1.10.0
@ -109,7 +104,7 @@ networkx==2.8.8
 #test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py
 numba==0.49.0 ; python_version < "3.9"
-numba==0.55.2 ; python_version == "3.9"
+numba==0.54.1 ; python_version == "3.9"
 numba==0.55.2 ; python_version == "3.10"
 #Description: Just-In-Time Compiler for Numerical Functions
 #Pinned versions: 0.54.1, 0.49.0, <=0.49.1
@ -337,8 +332,3 @@ onnxscript==0.1.0.dev20240817
 #Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
 #Pinned versions:
 #test that import:
 parameterized==0.8.1
 #Description: Parameterizes unittests, both the tests themselves and the entire testing class
 #Pinned versions:
 #test that import:
--- a/.ci/docker/triton_version.txt
+++ b/.ci/docker/triton_version.txt
@ -1 +1 @@
-3.1.0
+3.0.0
--- a/.ci/docker/ubuntu-rocm/Dockerfile
+++ b/.ci/docker/ubuntu-rocm/Dockerfile
@ -68,8 +68,6 @@ RUN rm install_rocm.sh
 COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
 RUN bash ./install_rocm_magma.sh
 RUN rm install_rocm_magma.sh
 ADD ./common/install_miopen.sh install_miopen.sh
 RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
 ENV ROCM_PATH /opt/rocm
 ENV PATH /opt/rocm/bin:$PATH
 ENV PATH /opt/rocm/hcc/bin:$PATH
@ -102,10 +100,10 @@ ARG TRITON
 # try to reach out to S3, which docker build runners don't have access
 COPY ./common/install_triton.sh install_triton.sh
 COPY ./common/common_utils.sh common_utils.sh
-COPY ci_commit_pins/triton.txt triton.txt
+COPY ci_commit_pins/triton-rocm.txt triton-rocm.txt
 COPY triton_version.txt triton_version.txt
 RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
-RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt
+RUN rm install_triton.sh common_utils.sh triton-rocm.txt triton_version.txt
 # Install AOTriton
 COPY ./aotriton_version.txt aotriton_version.txt
@ -123,8 +121,5 @@ RUN bash ./install_cache.sh && rm install_cache.sh
 ARG BUILD_ENVIRONMENT
 ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
 # Install LLVM dev version (Defined in the pytorch/builder github repository)
 COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
 USER jenkins
 CMD ["bash"]
--- a/.ci/pytorch/build.sh
+++ b/.ci/pytorch/build.sh
@ -49,8 +49,13 @@ if [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
 fi
 # Enable LLVM dependency for TensorExpr testing
-export USE_LLVM=/opt/llvm
+if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
-export LLVM_DIR=/opt/llvm/lib/cmake/llvm
+  export USE_LLVM=/opt/rocm/llvm
  export LLVM_DIR=/opt/rocm/llvm/lib/cmake/llvm
 else
  export USE_LLVM=/opt/llvm
  export LLVM_DIR=/opt/llvm/lib/cmake/llvm
 fi
 if [[ "$BUILD_ENVIRONMENT" == *executorch* ]]; then
  # To build test_edge_op_registration
@ -232,7 +237,7 @@ fi
 # Do not change workspace permissions for ROCm CI jobs
 # as it can leave workspace with bad permissions for cancelled jobs
-if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* ]]; then
+if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
  # Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96)
  WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace")
  cleanup_workspace() {
@ -278,11 +283,11 @@ else
    # set only when building other architectures
    # or building non-XLA tests.
    if [[ "$BUILD_ENVIRONMENT" != *rocm*  &&
          "$BUILD_ENVIRONMENT" != *s390x*   &&
          "$BUILD_ENVIRONMENT" != *xla* ]]; then
      if [[ "$BUILD_ENVIRONMENT" != *py3.8* ]]; then
-        # Install numpy-2.0.2 for builds which are backward compatible with 1.X
+        # Install numpy-2.0 release candidate for builds
-        python -mpip install --pre numpy==2.0.2
+        # Which should be backward compatible with Numpy-1.X
        python -mpip install --pre numpy==2.0.0rc1
      fi
      WERROR=1 python setup.py clean
@ -341,11 +346,11 @@ else
    CUSTOM_OP_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-op-build"
    CUSTOM_OP_TEST="$PWD/test/custom_operator"
    python --version
-    SITE_PACKAGES="$(python -c 'import site; print(";".join([x for x in site.getsitepackages()] + [x + "/torch" for x in site.getsitepackages()]))')"
+    SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
    mkdir -p "$CUSTOM_OP_BUILD"
    pushd "$CUSTOM_OP_BUILD"
-    cmake "$CUSTOM_OP_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
+    cmake "$CUSTOM_OP_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch;$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
          -DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
    make VERBOSE=1
    popd
@ -355,10 +360,10 @@ else
    JIT_HOOK_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/jit-hook-build"
    JIT_HOOK_TEST="$PWD/test/jit_hooks"
    python --version
-    SITE_PACKAGES="$(python -c 'import site; print(";".join([x for x in site.getsitepackages()] + [x + "/torch" for x in site.getsitepackages()]))')"
+    SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
    mkdir -p "$JIT_HOOK_BUILD"
    pushd "$JIT_HOOK_BUILD"
-    cmake "$JIT_HOOK_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
+    cmake "$JIT_HOOK_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch;$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
          -DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
    make VERBOSE=1
    popd
@ -370,7 +375,7 @@ else
    python --version
    mkdir -p "$CUSTOM_BACKEND_BUILD"
    pushd "$CUSTOM_BACKEND_BUILD"
-    cmake "$CUSTOM_BACKEND_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
+    cmake "$CUSTOM_BACKEND_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch;$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \
          -DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
    make VERBOSE=1
    popd
@ -403,6 +408,6 @@ fi
 # snadampal: skipping it till sccache support added for aarch64
 # https://github.com/pytorch/pytorch/issues/121559
-if [[ "$BUILD_ENVIRONMENT" != *aarch64* &&  "$BUILD_ENVIRONMENT" != *s390x* ]]; then
+if [[ "$BUILD_ENVIRONMENT" != *aarch64* ]]; then
  print_sccache_stats
 fi
--- a/.ci/pytorch/create_test_cert.py
+++ b/.ci/pytorch/create_test_cert.py
@ -1,4 +1,4 @@
-from datetime import datetime, timedelta, timezone
+from datetime import datetime, timedelta
 from tempfile import mkdtemp
 from cryptography import x509
@ -42,10 +42,10 @@ def create_cert(path, C, ST, L, O, key):
        .issuer_name(issuer)
        .public_key(key.public_key())
        .serial_number(x509.random_serial_number())
-        .not_valid_before(datetime.now(timezone.utc))
+        .not_valid_before(datetime.utcnow())
        .not_valid_after(
            # Our certificate will be valid for 10 days
-            datetime.now(timezone.utc)
+            datetime.utcnow()
            + timedelta(days=10)
        )
        .add_extension(
@ -88,10 +88,10 @@ def sign_certificate_request(path, csr_cert, ca_cert, private_ca_key):
        .issuer_name(ca_cert.subject)
        .public_key(csr_cert.public_key())
        .serial_number(x509.random_serial_number())
-        .not_valid_before(datetime.now(timezone.utc))
+        .not_valid_before(datetime.utcnow())
        .not_valid_after(
            # Our certificate will be valid for 10 days
-            datetime.now(timezone.utc)
+            datetime.utcnow()
            + timedelta(days=10)
            # Sign our certificate with our private key
        )
--- a/.ci/pytorch/macos-test.sh
+++ b/.ci/pytorch/macos-test.sh
@ -9,13 +9,15 @@ if [[ -n "$CONDA_ENV" ]]; then
  export PATH="$CONDA_ENV/bin":$PATH
 fi
-# Test that OpenMP is enabled
+# Test that OpenMP is enabled for non-arm64 build
-pushd test
+if [[ ${BUILD_ENVIRONMENT} != *arm64* ]]; then
-if [[ ! $(python -c "import torch; print(int(torch.backends.openmp.is_available()))") == "1" ]]; then
+  pushd test
-  echo "Build should have OpenMP enabled, but torch.backends.openmp.is_available() is False"
+  if [[ ! $(python -c "import torch; print(int(torch.backends.openmp.is_available()))") == "1" ]]; then
-  exit 1
+    echo "Build should have OpenMP enabled, but torch.backends.openmp.is_available() is False"
    exit 1
  fi
  popd
 fi
 popd
 setup_test_python() {
  # The CircleCI worker hostname doesn't resolve to an address.
@ -25,9 +27,8 @@ setup_test_python() {
  echo "Ninja version: $(ninja --version)"
  echo "Python version: $(which python) ($(python --version))"
-  # Set the limit on open file handles to 16384
+  # Increase default limit on open file handles from 256 to 1024
-  # might help with intermittent compiler test failures
+  ulimit -n 1024
  ulimit -n 16384
 }
 test_python_all() {
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@ -375,8 +375,9 @@ test_inductor_cpp_wrapper_abi_compatible() {
  mkdir -p "$TEST_REPORTS_DIR"
  echo "Testing Inductor cpp wrapper mode with TORCHINDUCTOR_ABI_COMPATIBLE=1"
  # cpu stack allocation causes segfault and needs more investigation
  PYTORCH_TESTING_DEVICE_ONLY_FOR="" python test/run_test.py --include inductor/test_cpu_cpp_wrapper
-  python test/run_test.py --include inductor/test_cuda_cpp_wrapper inductor/test_cpu_repro
+  python test/run_test.py --include inductor/test_cuda_cpp_wrapper
  TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/timm_models.py --device cuda --accuracy --amp \
    --training --inductor --disable-cudagraphs --only vit_base_patch16_224 \
@ -400,9 +401,9 @@ pr_time_benchmarks() {
  TEST_REPORTS_DIR=$(pwd)/test/test-reports
  mkdir -p "$TEST_REPORTS_DIR"
-  PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks source benchmarks/dynamo/pr_time_benchmarks/benchmark_runner.sh "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv" "benchmarks/dynamo/pr_time_benchmarks/benchmarks"
+  PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks source benchmarks/dynamo/pr_time_benchmarks/benchmark_runner.sh "$TEST_REPORTS_DIR/pr_time_benchmarks_after.txt" "benchmarks/dynamo/pr_time_benchmarks/benchmarks"
  echo "benchmark results on current PR: "
-  cat  "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv"
+  cat  "$TEST_REPORTS_DIR/pr_time_benchmarks_after.txt"
 }
@ -595,9 +596,6 @@ test_single_dynamo_benchmark() {
 test_inductor_micro_benchmark() {
  TEST_REPORTS_DIR=$(pwd)/test/test-reports
  if [[ "${TEST_CONFIG}" == *cpu* ]]; then
    test_inductor_set_cpu_affinity
  fi
  python benchmarks/gpt_fast/benchmark.py --output "${TEST_REPORTS_DIR}/gpt_fast_benchmark.csv"
 }
@ -1382,16 +1380,14 @@ test_executorch() {
  assert_git_not_dirty
 }
-test_linux_aarch64() {
+test_linux_aarch64(){
  python test/run_test.py --include test_modules test_mkldnn test_mkldnn_fusion test_openmp test_torch test_dynamic_shapes \
-        test_transformers test_multiprocessing test_numpy_interop \
+       test_transformers test_multiprocessing test_numpy_interop --verbose
        --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
  # Dynamo tests
  python test/run_test.py --include dynamo/test_compile dynamo/test_backends dynamo/test_comptime dynamo/test_config \
       dynamo/test_functions dynamo/test_fx_passes_pre_grad dynamo/test_interop dynamo/test_model_output dynamo/test_modules \
-       dynamo/test_optimizers dynamo/test_recompile_ux dynamo/test_recompiles \
+       dynamo/test_optimizers dynamo/test_recompile_ux dynamo/test_recompiles --verbose
       --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
  # Inductor tests
  python test/run_test.py --include inductor/test_torchinductor inductor/test_benchmark_fusion inductor/test_codecache \
@ -1401,8 +1397,7 @@ test_linux_aarch64() {
       inductor/test_max_autotune inductor/test_memory_planning inductor/test_metrics inductor/test_multi_kernel inductor/test_pad_mm \
       inductor/test_pattern_matcher inductor/test_perf inductor/test_profiler inductor/test_select_algorithm inductor/test_smoke \
       inductor/test_split_cat_fx_passes inductor/test_standalone_compile inductor/test_torchinductor \
-       inductor/test_torchinductor_codegen_dynamic_shapes inductor/test_torchinductor_dynamic_shapes inductor/test_memory \
+       inductor/test_torchinductor_codegen_dynamic_shapes inductor/test_torchinductor_dynamic_shapes --verbose
       --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
 }
 if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then
@ -1484,7 +1479,7 @@ elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
  install_torchvision
  test_inductor_shard "${SHARD_NUMBER}"
  if [[ "${SHARD_NUMBER}" == 1 ]]; then
-    if [[ "${BUILD_ENVIRONMENT}" != linux-jammy-py3.9-gcc11-build ]]; then
+    if [[ "${BUILD_ENVIRONMENT}" != linux-jammy-py3.8-gcc11-build ]]; then
      test_inductor_distributed
    fi
  fi
--- a/.ci/pytorch/win-test-helpers/build_pytorch.bat
+++ b/.ci/pytorch/win-test-helpers/build_pytorch.bat
@ -24,12 +24,6 @@ call %INSTALLER_DIR%\install_sccache.bat
 if errorlevel 1 goto fail
 if not errorlevel 0 goto fail
 if "%USE_XPU%"=="1" (
  :: Install xpu support packages
  call %INSTALLER_DIR%\install_xpu.bat
  if errorlevel 1 exit /b 1
 )
 :: Miniconda has been installed as part of the Windows AMI with all the dependencies.
 :: We just need to activate it here
 call %INSTALLER_DIR%\activate_miniconda3.bat
@ -49,16 +43,6 @@ if "%VC_VERSION%" == "" (
 )
 if errorlevel 1 goto fail
 if not errorlevel 0 goto fail
 if "%USE_XPU%"=="1" (
  :: Activate xpu environment - VS env is required for xpu
  call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
  if errorlevel 1 exit /b 1
  :: Reduce build time. Only have MTL self-hosted runner now
  SET TORCH_XPU_ARCH_LIST=xe-lpg
  SET USE_KINETO=0
 )
@echo on
 popd
--- a/.ci/pytorch/win-test-helpers/installation-helpers/install_xpu.bat
+++ b/.ci/pytorch/win-test-helpers/installation-helpers/install_xpu.bat
@ -1,91 +0,0 @@
@echo on
 REM Description: Install Intel Support Packages on Windows
 REM BKM reference: https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-5.html
 set XPU_INSTALL_MODE=%~1
 if "%XPU_INSTALL_MODE%"=="" goto xpu_bundle_install_start
 if "%XPU_INSTALL_MODE%"=="bundle" goto xpu_bundle_install_start
 if "%XPU_INSTALL_MODE%"=="driver" goto xpu_driver_install_start
 if "%XPU_INSTALL_MODE%"=="all" goto xpu_driver_install_start
 :arg_error
 echo Illegal XPU installation mode. The value can be "bundle"/"driver"/"all"
 echo If keep the value as space, will use default "bundle" mode
 exit /b 1
 :xpu_driver_install_start
 :: TODO Need more testing for driver installation
 set XPU_DRIVER_LINK=https://downloadmirror.intel.com/830975/gfx_win_101.5972.exe
 curl -o xpu_driver.exe --retry 3 --retry-all-errors -k %XPU_DRIVER_LINK%
 echo "XPU Driver installing..."
 start /wait "Intel XPU Driver Installer" "xpu_driver.exe"
 if errorlevel 1 exit /b 1
 del xpu_driver.exe
 if "%XPU_INSTALL_MODE%"=="driver" goto xpu_install_end
 :xpu_bundle_install_start
 set XPU_BUNDLE_PARENT_DIR=C:\Program Files (x86)\Intel\oneAPI
 set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/9d1a91e2-e8b8-40a5-8c7f-5db768a6a60c/w_intel-for-pytorch-gpu-dev_p_0.5.3.37_offline.exe
 set XPU_PTI_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/9d1a91e2-e8b8-40a5-8c7f-5db768a6a60c/w_intel-pti-dev_p_0.9.0.37_offline.exe
 set XPU_BUNDLE_VERSION=0.5.3+31
 set XPU_PTI_VERSION=0.9.0+36
 set XPU_BUNDLE_PRODUCT_NAME=intel.oneapi.win.intel-for-pytorch-gpu-dev.product
 set XPU_PTI_PRODUCT_NAME=intel.oneapi.win.intel-pti-dev.product
 set XPU_BUNDLE_INSTALLED=0
 set XPU_PTI_INSTALLED=0
 set XPU_BUNDLE_UNINSTALL=0
 set XPU_PTI_UNINSTALL=0
 :: Check if XPU bundle is target version or already installed
 if exist "%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" goto xpu_bundle_ver_check
 goto xpu_bundle_install
 :xpu_bundle_ver_check
 "%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" --list-products > xpu_bundle_installed_ver.log
 for /f "tokens=1,2" %%a in (xpu_bundle_installed_ver.log) do (
    if "%%a"=="%XPU_BUNDLE_PRODUCT_NAME%" (
        echo %%a Installed Version: %%b
        set XPU_BUNDLE_INSTALLED=1
        if not "%XPU_BUNDLE_VERSION%"=="%%b" (
            start /wait "Installer Title" "%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" --action=remove --eula=accept --silent --product-id %XPU_BUNDLE_PRODUCT_NAME% --product-ver %%b --log-dir uninstall_bundle
            set XPU_BUNDLE_UNINSTALL=1
        )
    )
    if "%%a"=="%XPU_PTI_PRODUCT_NAME%" (
        echo %%a Installed Version: %%b
        set XPU_PTI_INSTALLED=1
        if not "%XPU_PTI_VERSION%"=="%%b" (
            start /wait "Installer Title" "%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" --action=remove --eula=accept --silent --product-id %XPU_PTI_PRODUCT_NAME% --product-ver %%b --log-dir uninstall_bundle
            set XPU_PTI_UNINSTALL=1
        )
    )
 )
 if errorlevel 1 exit /b 1
 if exist xpu_bundle_installed_ver.log del xpu_bundle_installed_ver.log
 if "%XPU_BUNDLE_INSTALLED%"=="0" goto xpu_bundle_install
 if "%XPU_BUNDLE_UNINSTALL%"=="1" goto xpu_bundle_install
 if "%XPU_PTI_INSTALLED%"=="0" goto xpu_pti_install
 if "%XPU_PTI_UNINSTALL%"=="1" goto xpu_pti_install
 goto xpu_install_end
 :xpu_bundle_install
 curl -o xpu_bundle.exe --retry 3 --retry-all-errors -k %XPU_BUNDLE_URL%
 echo "XPU Bundle installing..."
 start /wait "Intel Pytorch Bundle Installer" "xpu_bundle.exe" --action=install --eula=accept --silent --log-dir install_bundle
 if errorlevel 1 exit /b 1
 del xpu_bundle.exe
 :xpu_pti_install
 curl -o xpu_pti.exe --retry 3 --retry-all-errors -k %XPU_PTI_URL%
 echo "XPU PTI installing..."
 start /wait "Intel PTI Installer" "xpu_pti.exe" --action=install --eula=accept --silent --log-dir install_bundle
 if errorlevel 1 exit /b 1
 del xpu_pti.exe
 :xpu_install_end
--- a/.ci/pytorch/win-test.sh
+++ b/.ci/pytorch/win-test.sh
@ -40,12 +40,6 @@ python -m pip install pytest-rerunfailures==10.3 pytest-cpp==2.3.0 tensorboard==
 # Install Z3 optional dependency for Windows builds.
 python -m pip install z3-solver==4.12.2.0
 # Install tlparse for test\dynamo\test_structured_trace.py UTs.
 python -m pip install tlparse==0.3.25
 # Install parameterized
 python -m pip install parameterized==0.8.1
 run_tests() {
    # Run nvidia-smi if available
    for path in '/c/Program Files/NVIDIA Corporation/NVSMI/nvidia-smi.exe' /c/Windows/System32/nvidia-smi.exe; do
--- a/.circleci/scripts/binary_linux_test.sh
+++ b/.circleci/scripts/binary_linux_test.sh
@ -119,11 +119,6 @@ fi
 # Test the package
 /builder/check_binary.sh
 if [[ "\$GPU_ARCH_TYPE" != *s390x* && "\$GPU_ARCH_TYPE" != *xpu* && "\$GPU_ARCH_TYPE" != *rocm*  && "$PACKAGE_TYPE" != libtorch ]]; then
  # Exclude s390, xpu, rocm and libtorch builds from smoke testing
  python /builder/test/smoke_test/smoke_test.py --package=torchonly --torch-compile-check disabled
 fi
 # Clean temp files
 cd /builder && git clean -ffdx
--- a/.circleci/scripts/binary_populate_env.sh
+++ b/.circleci/scripts/binary_populate_env.sh
@ -90,7 +90,7 @@ fi
 if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*rocm.* && $(uname) == "Linux" ]]; then
    TRITON_REQUIREMENT="pytorch-triton-rocm==${TRITON_VERSION}; ${TRITON_CONSTRAINT}"
    if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then
-        TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton.txt)
+        TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton-rocm.txt)
        TRITON_REQUIREMENT="pytorch-triton-rocm==${TRITON_VERSION}+${TRITON_SHORTHASH}; ${TRITON_CONSTRAINT}"
    fi
    if [[ -z "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" ]]; then
--- a/.circleci/scripts/binary_windows_build.sh
+++ b/.circleci/scripts/binary_windows_build.sh
@ -10,11 +10,6 @@ export SCCACHE_BUCKET=ossci-compiler-cache
 export SCCACHE_IGNORE_SERVER_IO_ERROR=1
 export VC_YEAR=2019
 if [[ "$DESIRED_CUDA" == 'xpu' ]]; then
    export VC_YEAR=2022
    export USE_SCCACHE=0
 fi
 echo "Free space on filesystem before build:"
 df -h
--- a/.circleci/scripts/binary_windows_test.sh
+++ b/.circleci/scripts/binary_windows_test.sh
@ -6,10 +6,6 @@ source "${BINARY_ENV_FILE:-/c/w/env}"
 export CUDA_VERSION="${DESIRED_CUDA/cu/}"
 export VC_YEAR=2019
 if [[ "$DESIRED_CUDA" == 'xpu' ]]; then
    export VC_YEAR=2022
 fi
 pushd "$BUILDER_ROOT"
 ./windows/internal/smoke_test.bat
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@ -3,6 +3,8 @@ self-hosted-runner:
    # GitHub hosted x86 Linux runners
    - linux.20_04.4x
    - linux.20_04.16x
    # Repo-specific LF hosted ARC runners
    - linux.large.arc
    # Organization-wide AWS Linux Runners
    - linux.large
    - linux.2xlarge
@ -32,6 +34,30 @@ self-hosted-runner:
    - lf.linux.8xlarge.nvidia.gpu
    - lf.linux.16xlarge.nvidia.gpu
    - lf.linux.g5.4xlarge.nvidia.gpu
    # Organization-wide AWS Linux Runners with new Amazon 2023 AMI
    - amz2023.linux.large
    - amz2023.linux.2xlarge
    - amz2023.linux.4xlarge
    - amz2023.linux.12xlarge
    - amz2023.linux.24xlarge
    - amz2023.linux.arm64.2xlarge
    - amz2023.linux.arm64.m7g.4xlarge
    - amz2023.linux.arm64.m7g.4xlarge.ephemeral
    - amz2023.linux.4xlarge.nvidia.gpu
    - amz2023.linux.8xlarge.nvidia.gpu
    - amz2023.linux.16xlarge.nvidia.gpu
    - amz2023.linux.g5.4xlarge.nvidia.gpu
    # Pytorch/pytorch AWS Linux Runners with the new Amazon 2023 AMI on Linux Foundation account
    - amz2023.lf.linux.large
    - amz2023.lf.linux.2xlarge
    - amz2023.lf.linux.4xlarge
    - amz2023.lf.linux.12xlarge
    - amz2023.lf.linux.24xlarge
    - amz2023.lf.linux.arm64.2xlarge
    - amz2023.lf.linux.4xlarge.nvidia.gpu
    - amz2023.lf.linux.8xlarge.nvidia.gpu
    - amz2023.lf.linux.16xlarge.nvidia.gpu
    - amz2023.lf.linux.g5.4xlarge.nvidia.gpu
    # Repo-specific IBM hosted S390x runner
    - linux.s390x
    # Organization wide AWS Windows runners
--- a/.github/ci_commit_pins/audio.txt
+++ b/.github/ci_commit_pins/audio.txt
@ -1 +1 @@
-ba696ea3dfec4cbe693bf06a84c75dc196077f5b
+97ed7b36b7a741253d4e41e4da3c901d83294503
--- a/.github/label_to_label.yml
+++ b/.github/label_to_label.yml
@ -31,10 +31,6 @@
  - "module: flex attention"
  then:
  - "module: higher order operators"
 - any:
  - "module: aotinductor"
  then:
  - "oncall: export"
 - any:
  - "module: dynamo"
  - "module: pt2-dispatcher"
--- a/.github/lf-canary-scale-config.yml
+++ b/.github/lf-canary-scale-config.yml
@ -7,14 +7,10 @@
 #   runners. Runners listed here will be available as self hosted
 #   runners, configuration is directly pulled from the main branch.
 #
 # NOTE (Apr, 5, 2021): Linux runners are currently all an amazonlinux2
 #
-# NOTES:
+# NOTE (Jan 5, 2021): Linux runners are all non-ephemeral to reduce the amount of CreateInstaces calls
-#  - Linux runners are by default non-ephemeral to reduce the amount of CreateInstaces calls
+#                     to avoid RequestLimitExceeded issues
 #    to avoid RequestLimitExceeded issues
 #  - When updating this file, run the following command to validate the YAML and to generate
 #    corresponding versions of scale-config for the pytorch/pytorch repo and merge the
 #    pytorch/pytorch changes before merging these changes.
 #    `python .github/scripts/validate_scale_config.py --test-infra-repo-root [path_to_test-infra_root] --pytorch-repo-root [path_to_pytorch_root]``
 #
 # TODO: Add some documentation on how the auto-scaling works
 #
@ -35,36 +31,58 @@ runner_types:
    is_ephemeral: false
    max_available: 1000
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.10xlarge.avx2:
    disk_size: 200
    instance_type: m4.10xlarge
    is_ephemeral: false
    max_available: 450
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.24xl.spr-metal:
    disk_size: 200
    instance_type: c7i.metal-24xl
    is_ephemeral: false
    max_available: 150
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.16xlarge.spr:
    disk_size: 200
    instance_type: c7i.16xlarge
    is_ephemeral: false
    max_available: 150
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.9xlarge.ephemeral:
    disk_size: 200
    instance_type: c5.9xlarge
    is_ephemeral: true
    max_available: 50
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.12xlarge.ephemeral:
@ -73,140 +91,240 @@ runner_types:
    is_ephemeral: true
    max_available: 300
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.16xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g3.16xlarge
    is_ephemeral: false
    max_available: 150
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.24xlarge:
    disk_size: 150
    instance_type: c5.24xlarge
    is_ephemeral: false
    max_available: 500
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.24xlarge.ephemeral:
    disk_size: 150
    instance_type: c5.24xlarge
    is_ephemeral: true
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.2xlarge:
    disk_size: 150
    instance_type: c5.2xlarge
    is_ephemeral: false
    max_available: 3120
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.4xlarge:
    disk_size: 150
    instance_type: c5.4xlarge
    is_ephemeral: false
    max_available: 1000
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.4xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g3.4xlarge
    is_ephemeral: false
    max_available: 1000
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.8xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g3.8xlarge
    is_ephemeral: false
    max_available: 400
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.g4dn.12xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g4dn.12xlarge
    is_ephemeral: false
    max_available: 250
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.g4dn.metal.nvidia.gpu:
    disk_size: 150
    instance_type: g4dn.metal
    is_ephemeral: false
    max_available: 300
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.g5.48xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g5.48xlarge
    is_ephemeral: false
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.g5.12xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g5.12xlarge
    is_ephemeral: false
    max_available: 150
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.g5.4xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g5.4xlarge
    is_ephemeral: false
    max_available: 2400
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.g6.4xlarge.experimental.nvidia.gpu:
    disk_size: 150
    instance_type: g6.4xlarge
    is_ephemeral: false
    max_available: 50
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.large:
    max_available: 1200
    disk_size: 15
    instance_type: c5.large
    is_ephemeral: false
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.c.linux.arm64.2xlarge:
    disk_size: 256
    instance_type: t4g.2xlarge
    is_ephemeral: false
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-arm64-gp2
  lf.c.linux.arm64.m7g.4xlarge:
    disk_size: 256
    instance_type: m7g.4xlarge
    is_ephemeral: false
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-arm64-gp2
  lf.c.linux.arm64.2xlarge.ephemeral:
    disk_size: 256
    instance_type: t4g.2xlarge
    is_ephemeral: true
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-arm64-gp2
  lf.c.linux.arm64.m7g.4xlarge.ephemeral:
    disk_size: 256
    instance_type: m7g.4xlarge
    is_ephemeral: true
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-arm64-gp2
  lf.c.linux.arm64.m7g.metal:
    disk_size: 256
    instance_type: m7g.metal
    is_ephemeral: false
    max_available: 100
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-arm64-gp2
  lf.c.windows.g4dn.xlarge:
    disk_size: 256
    instance_type: g4dn.xlarge
--- a/.github/lf-scale-config.yml
+++ b/.github/lf-scale-config.yml
@ -7,14 +7,10 @@
 #   runners. Runners listed here will be available as self hosted
 #   runners, configuration is directly pulled from the main branch.
 #
 # NOTE (Apr, 5, 2021): Linux runners are currently all an amazonlinux2
 #
-# NOTES:
+# NOTE (Jan 5, 2021): Linux runners are all non-ephemeral to reduce the amount of CreateInstaces calls
-#  - Linux runners are by default non-ephemeral to reduce the amount of CreateInstaces calls
+#                     to avoid RequestLimitExceeded issues
 #    to avoid RequestLimitExceeded issues
 #  - When updating this file, run the following command to validate the YAML and to generate
 #    corresponding versions of scale-config for the pytorch/pytorch repo and merge the
 #    pytorch/pytorch changes before merging these changes.
 #    `python .github/scripts/validate_scale_config.py --test-infra-repo-root [path_to_test-infra_root] --pytorch-repo-root [path_to_pytorch_root]``
 #
 # TODO: Add some documentation on how the auto-scaling works
 #
@ -35,36 +31,58 @@ runner_types:
    is_ephemeral: false
    max_available: 1000
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.10xlarge.avx2:
    disk_size: 200
    instance_type: m4.10xlarge
    is_ephemeral: false
    max_available: 450
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.24xl.spr-metal:
    disk_size: 200
    instance_type: c7i.metal-24xl
    is_ephemeral: false
    max_available: 150
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.16xlarge.spr:
    disk_size: 200
    instance_type: c7i.16xlarge
    is_ephemeral: false
    max_available: 150
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.9xlarge.ephemeral:
    disk_size: 200
    instance_type: c5.9xlarge
    is_ephemeral: true
    max_available: 50
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.12xlarge.ephemeral:
@ -73,140 +91,240 @@ runner_types:
    is_ephemeral: true
    max_available: 300
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.16xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g3.16xlarge
    is_ephemeral: false
    max_available: 150
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.24xlarge:
    disk_size: 150
    instance_type: c5.24xlarge
    is_ephemeral: false
    max_available: 500
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.24xlarge.ephemeral:
    disk_size: 150
    instance_type: c5.24xlarge
    is_ephemeral: true
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.2xlarge:
    disk_size: 150
    instance_type: c5.2xlarge
    is_ephemeral: false
    max_available: 3120
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.4xlarge:
    disk_size: 150
    instance_type: c5.4xlarge
    is_ephemeral: false
    max_available: 1000
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.4xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g3.4xlarge
    is_ephemeral: false
    max_available: 1000
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.8xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g3.8xlarge
    is_ephemeral: false
    max_available: 400
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.g4dn.12xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g4dn.12xlarge
    is_ephemeral: false
    max_available: 250
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.g4dn.metal.nvidia.gpu:
    disk_size: 150
    instance_type: g4dn.metal
    is_ephemeral: false
    max_available: 300
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.g5.48xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g5.48xlarge
    is_ephemeral: false
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.g5.12xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g5.12xlarge
    is_ephemeral: false
    max_available: 150
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.g5.4xlarge.nvidia.gpu:
    disk_size: 150
    instance_type: g5.4xlarge
    is_ephemeral: false
    max_available: 2400
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.g6.4xlarge.experimental.nvidia.gpu:
    disk_size: 150
    instance_type: g6.4xlarge
    is_ephemeral: false
    max_available: 50
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.large:
    max_available: 1200
    disk_size: 15
    instance_type: c5.large
    is_ephemeral: false
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-x86_64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
  lf.linux.arm64.2xlarge:
    disk_size: 256
    instance_type: t4g.2xlarge
    is_ephemeral: false
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-arm64-gp2
  lf.linux.arm64.m7g.4xlarge:
    disk_size: 256
    instance_type: m7g.4xlarge
    is_ephemeral: false
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-arm64-gp2
  lf.linux.arm64.2xlarge.ephemeral:
    disk_size: 256
    instance_type: t4g.2xlarge
    is_ephemeral: true
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-arm64-gp2
  lf.linux.arm64.m7g.4xlarge.ephemeral:
    disk_size: 256
    instance_type: m7g.4xlarge
    is_ephemeral: true
    max_available: 200
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-arm64-gp2
  lf.linux.arm64.m7g.metal:
    disk_size: 256
    instance_type: m7g.metal
    is_ephemeral: false
    max_available: 100
    os: linux
-    ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
+    ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
    variants:
      amz2023:
        ami: al2023-ami-2023.5.20240701.0-kernel-6.1-arm64
      am2:
        ami: amzn2-ami-hvm-2.0.20240306.2-arm64-gp2
  lf.windows.g4dn.xlarge:
    disk_size: 256
    instance_type: g4dn.xlarge
--- a/.github/merge_rules.yaml
+++ b/.github/merge_rules.yaml
@ -86,18 +86,6 @@
  - pull
  - inductor
 - name: OSS CI / pytorchbot / slow tests
  patterns:
  - test/slow_tests.json
  approved_by:
  - pytorchbot
  ignore_flaky_failures: false
  mandatory_checks_name:
  - EasyCLA
  - Lint
  - pull
  - slow
 - name: OSS CI /pytorchbot / Executorch
  patterns:
  - .ci/docker/ci_commit_pins/executorch.txt
@ -119,8 +107,8 @@
  mandatory_checks_name:
  - EasyCLA
  - Lint
-  - pull / linux-focal-py3_9-clang9-xla / build
+  - pull / linux-focal-py3_8-clang9-xla / build
-  - pull / linux-focal-py3_9-clang9-xla / test (xla, 1, 1, linux.12xlarge)
+  - pull / linux-focal-py3_8-clang9-xla / test (xla, 1, 1, linux.12xlarge)
 - name: Documentation
  patterns:
@ -544,7 +532,6 @@
  - anijain2305
  - bdhirsh
  - zou3519
  - isuruf
  mandatory_checks_name:
  - EasyCLA
  - Lint
--- a/.github/pytorch-probot.yml
+++ b/.github/pytorch-probot.yml
@ -9,7 +9,6 @@ ciflow_push_tags:
 - ciflow/inductor-rocm
 - ciflow/inductor-perf-compare
 - ciflow/inductor-micro-benchmark
 - ciflow/inductor-micro-benchmark-cpu-x86
 - ciflow/inductor-cu124
 - ciflow/linux-aarch64
 - ciflow/mps
--- a/.github/requirements/pip-requirements-macOS.txt
+++ b/.github/requirements/pip-requirements-macOS.txt
@ -1,7 +1,6 @@
 boto3==1.19.12
 hypothesis==6.56.4
-expecttest==0.2.1
+expecttest==0.1.6
 fbscribelogger==0.1.6
 librosa>=0.6.2
 mpmath==1.3.0
 networkx==2.8.7
@ -31,4 +30,3 @@ optree==0.12.1
 # NB: test_hparams_* from test_tensorboard is failing with protobuf 5.26.0 in
 # which the stringify metadata is wrong when escaping double quote
 protobuf==3.20.2
 parameterized==0.8.1
--- a/.github/scripts/build_triton_wheel.py
+++ b/.github/scripts/build_triton_wheel.py
@ -15,7 +15,9 @@ REPO_DIR = SCRIPT_DIR.parent.parent
 def read_triton_pin(device: str = "cuda") -> str:
    triton_file = "triton.txt"
-    if device == "xpu":
+    if device == "rocm":
        triton_file = "triton-rocm.txt"
    elif device == "xpu":
        triton_file = "triton-xpu.txt"
    with open(REPO_DIR / ".ci" / "docker" / "ci_commit_pins" / triton_file) as f:
        return f.read().strip()
--- a/.github/scripts/generate_binary_build_matrix.py
+++ b/.github/scripts/generate_binary_build_matrix.py
@ -325,7 +325,6 @@ def generate_wheels_matrix(
    os: str,
    arches: Optional[List[str]] = None,
    python_versions: Optional[List[str]] = None,
    use_split_build: bool = False,
 ) -> List[Dict[str, str]]:
    package_type = "wheel"
    if os == "linux" or os == "linux-aarch64" or os == "linux-s390x":
@ -341,7 +340,7 @@ def generate_wheels_matrix(
        if os == "linux":
            arches += CPU_CXX11_ABI_ARCH + CUDA_ARCHES + ROCM_ARCHES + XPU_ARCHES
        elif os == "windows":
-            arches += CUDA_ARCHES + XPU_ARCHES
+            arches += CUDA_ARCHES
        elif os == "linux-aarch64":
            # Only want the one arch as the CPU type is different and
            # uses different build/test scripts
@ -372,17 +371,7 @@ def generate_wheels_matrix(
            ) and python_version == "3.13":
                continue
            if use_split_build and (
                arch_version not in ["12.4", "12.1", "11.8", "cpu"] or os != "linux"
            ):
                raise RuntimeError(
                    "Split build is only supported on linux with cuda 12.4, 12.1, 11.8, and cpu.\n"
                    f"Currently attempting to build on arch version {arch_version} and os {os}.\n"
                    "Please modify the matrix generation to exclude this combination."
                )
            # 12.1 linux wheels require PYTORCH_EXTRA_INSTALL_REQUIREMENTS to install
            if (
                arch_version in ["12.4", "12.1", "11.8"]
                and os == "linux"
@ -396,7 +385,6 @@ def generate_wheels_matrix(
                        "desired_cuda": translate_desired_cuda(
                            gpu_arch_type, gpu_arch_version
                        ),
                        "use_split_build": "True" if use_split_build else "False",
                        "devtoolset": (
                            "cxx11-abi" if arch_version == "cuda-aarch64" else ""
                        ),
@ -412,8 +400,7 @@ def generate_wheels_matrix(
                        ),
                    }
                )
-                # Special build building to use on Colab. Python 3.11 for 12.1 CUDA
+                if arch_version != "cuda-aarch64":
                if python_version == "3.11" and arch_version == "12.1":
                    ret.append(
                        {
                            "python_version": python_version,
@ -422,16 +409,40 @@ def generate_wheels_matrix(
                            "desired_cuda": translate_desired_cuda(
                                gpu_arch_type, gpu_arch_version
                            ),
-                            "use_split_build": "True" if use_split_build else "False",
+                            "use_split_build": "True",
                            "devtoolset": "",
                            "container_image": WHEEL_CONTAINER_IMAGES[arch_version],
                            "package_type": package_type,
-                            "pytorch_extra_install_requirements": "",
+                            "pytorch_extra_install_requirements": (
-                            "build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}-full".replace(  # noqa: B950
+                                PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version]  # fmt: skip
                                if os != "linux-aarch64"
                                else ""
                            ),
                            "build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}-split".replace(  # noqa: B950
                                ".", "_"
                            ),
                        }
                    )
                    # Special build building to use on Colab. PyThon 3.10 for 12.1 CUDA
                    if python_version == "3.10" and arch_version == "12.1":
                        ret.append(
                            {
                                "python_version": python_version,
                                "gpu_arch_type": gpu_arch_type,
                                "gpu_arch_version": gpu_arch_version,
                                "desired_cuda": translate_desired_cuda(
                                    gpu_arch_type, gpu_arch_version
                                ),
                                "use_split_build": "False",
                                "devtoolset": "",
                                "container_image": WHEEL_CONTAINER_IMAGES[arch_version],
                                "package_type": package_type,
                                "pytorch_extra_install_requirements": "",
                                "build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}-full".replace(  # noqa: B950
                                    ".", "_"
                                ),
                            }
                        )
            else:
                ret.append(
                    {
@ -441,7 +452,6 @@ def generate_wheels_matrix(
                        "desired_cuda": translate_desired_cuda(
                            gpu_arch_type, gpu_arch_version
                        ),
                        "use_split_build": "True" if use_split_build else "False",
                        "devtoolset": (
                            "cxx11-abi" if arch_version == "cpu-cxx11-abi" else ""
                        ),
@ -452,12 +462,11 @@ def generate_wheels_matrix(
                        ),
                        "pytorch_extra_install_requirements": (
                            PYTORCH_EXTRA_INSTALL_REQUIREMENTS["12.1"]  # fmt: skip
-                            if os != "linux" and gpu_arch_type != "xpu"
+                            if os != "linux"
                            else ""
                        ),
                    }
                )
    return ret
--- a/.github/scripts/generate_ci_workflows.py
+++ b/.github/scripts/generate_ci_workflows.py
@ -61,7 +61,6 @@ class BinaryBuildWorkflow:
    # Mainly for macos
    cross_compile_arm64: bool = False
    macos_runner: str = "macos-14-xlarge"
    use_split_build: bool = False
    def __post_init__(self) -> None:
        if self.abi_version:
@ -70,9 +69,6 @@ class BinaryBuildWorkflow:
            )
        else:
            self.build_environment = f"{self.os}-binary-{self.package_type}"
        if self.use_split_build:
            # added to distinguish concurrency groups
            self.build_environment += "-split"
    def generate_workflow_file(self, workflow_template: jinja2.Template) -> None:
        output_file_path = (
@ -114,20 +110,6 @@ LINUX_BINARY_BUILD_WORFKLOWS = [
            isolated_workflow=True,
        ),
    ),
    BinaryBuildWorkflow(
        os=OperatingSystem.LINUX,
        package_type="manywheel",
        build_configs=generate_binary_build_matrix.generate_wheels_matrix(
            OperatingSystem.LINUX,
            use_split_build=True,
            arches=["11.8", "12.1", "12.4", "cpu"],
        ),
        ciflow_config=CIFlowConfig(
            labels={LABEL_CIFLOW_BINARIES, LABEL_CIFLOW_BINARIES_WHEEL},
            isolated_workflow=True,
        ),
        use_split_build=True,
    ),
    BinaryBuildWorkflow(
        os=OperatingSystem.LINUX,
        package_type="conda",
@ -180,21 +162,6 @@ LINUX_BINARY_SMOKE_WORKFLOWS = [
        ),
        branches="main",
    ),
    BinaryBuildWorkflow(
        os=OperatingSystem.LINUX,
        package_type="manywheel",
        build_configs=generate_binary_build_matrix.generate_wheels_matrix(
            OperatingSystem.LINUX,
            arches=["11.8", "12.1", "12.4"],
            python_versions=["3.9"],
            use_split_build=True,
        ),
        ciflow_config=CIFlowConfig(
            labels={LABEL_CIFLOW_PERIODIC},
        ),
        branches="main",
        use_split_build=True,
    ),
    BinaryBuildWorkflow(
        os=OperatingSystem.LINUX,
        package_type="libtorch",
--- a/.github/scripts/github_utils.py
+++ b/.github/scripts/github_utils.py
@ -46,24 +46,16 @@ def gh_fetch_url_and_headers(
        with urlopen(Request(url, headers=headers, data=data_, method=method)) as conn:
            return conn.headers, reader(conn)
    except HTTPError as err:
-        if (
+        if err.code == 403 and all(
-            err.code == 403
+            key in err.headers for key in ["X-RateLimit-Limit", "X-RateLimit-Used"]
            and all(
                key in err.headers
                for key in ["X-RateLimit-Limit", "X-RateLimit-Remaining"]
            )
            and int(err.headers["X-RateLimit-Remaining"]) == 0
        ):
            print(
-                f"""{url}
+                f"""Rate limit exceeded:
                Rate limit exceeded:
                Used: {err.headers['X-RateLimit-Used']}
                Limit: {err.headers['X-RateLimit-Limit']}
                Remaining: {err.headers['X-RateLimit-Remaining']}
                Resets at: {err.headers['x-RateLimit-Reset']}"""
            )
        else:
            print(f"Error fetching {url} {err}")
        raise
@ -168,14 +160,6 @@ def gh_post_commit_comment(
    )
 def gh_close_pr(org: str, repo: str, pr_num: int, dry_run: bool = False) -> None:
    url = f"{GITHUB_API_URL}/repos/{org}/{repo}/pulls/{pr_num}"
    if dry_run:
        print(f"Dry run closing PR {pr_num}")
    else:
        gh_fetch_url(url, method="PATCH", data={"state": "closed"})
 def gh_delete_comment(org: str, repo: str, comment_id: int) -> None:
    url = f"{GITHUB_API_URL}/repos/{org}/{repo}/issues/comments/{comment_id}"
    gh_fetch_url(url, method="DELETE")
--- a/.github/scripts/lintrunner.sh
+++ b/.github/scripts/lintrunner.sh
@ -17,11 +17,6 @@ if [[ -d "${CACHE_DIRECTORY}" ]]; then
    cp -r "${CACHE_DIRECTORY}" . || true
 fi
 # if lintrunner is not installed, install it
 if ! command -v lintrunner &> /dev/null; then
    python3 -m pip install lintrunner==0.12.5
 fi
 # This has already been cached in the docker image
 lintrunner init 2> /dev/null
@ -38,7 +33,7 @@ python3 torch/utils/data/datapipes/gen_pyi.py
 RC=0
 # Run lintrunner on all files
-if ! lintrunner --force-color --tee-json=lint.json ${ADDITIONAL_LINTRUNNER_ARGS} 2> /dev/null; then
+if ! lintrunner --force-color --all-files --tee-json=lint.json ${ADDITIONAL_LINTRUNNER_ARGS} 2> /dev/null; then
    echo ""
    echo -e "\e[1m\e[36mYou can reproduce these results locally by using \`lintrunner -m origin/main\`. (If you don't get the same results, run \'lintrunner init\' to update your local linter)\e[0m"
    echo -e "\e[1m\e[36mSee https://github.com/pytorch/pytorch/wiki/lintrunner for setup instructions.\e[0m"
--- a/.github/scripts/runner_determinator.py
+++ b/.github/scripts/runner_determinator.py
@ -3,94 +3,49 @@
 """
 This runner determinator is used to determine which set of runners to run a
 GitHub job on. It uses the first comment of a GitHub issue (by default
-https://github.com/pytorch/test-infra/issues/5132) to define the configuration
+https://github.com/pytorch/test-infra/issues/5132) as a user list to determine
-of which runners should be used to run which job.
+which users will get their jobs to run on experimental runners. This user list
-
+is also a comma separated list of additional features or experiments which the
-The configuration has two parts, the settings and a list of opted-in users,
+user could be opted in to.
 separated by a line containing "---".  If the line is not present, the
 settings are considered to be empty with only the second part, the user
 list, defined.
 The first part is a YAML block that defines the rollout settings. This can be
 used to define any settings that are needed to determine which runners to use.
 It's fields are defined by the RolloutSettings class below.
 The second part is a list of users who are explicitly opted in to the LF fleet.
 The user list is also a comma separated list of additional features or
 experiments which the user could be opted in to.
 The user list has the following rules:
- Users are GitHub usernames, which must start with the @ prefix
+- Users are GitHub usernames with the @ prefix
 - If the first line is a "*" then all users will use the new runners
 - If the first line is a "!" then all users will use the old runners
 - Each user is also a comma-separated list of features/experiments to enable
- A "#" prefix opts the user out of all experiments
+- A "#" prefix indicates the user is opted out of the new runners but is opting
  into features/experiments.
-Example config:
+Example user list:
    # A list of experiments that can be opted into.
    # This defines the behavior they'll induce when opted into.
    # Expected syntax is:
    #   [experiment_name]: # Name of the experiment. Also used for the label prefix.
    #      rollout_perc: [int] # % of workflows to run with this experiment when users are not opted in.
-    experiments:
+    @User1
-      lf:
+    @User2,amz2023
-        rollout_percent: 25
+    #@UserOptOutOfNewRunner,amz2023
    ---
    # Opt-ins:
    # Users can opt into the LF fleet by adding their GitHub username to this list
    # and specifying experiments to enable in a comma-separated list.
    # Experiments should be from the above list.
    @User1,lf,split_build
    @User2,lf
    @User3,split_build
 """
 import logging
 import os
 import random
 from argparse import ArgumentParser
 from logging import LogRecord
-from typing import Any, Dict, Iterable, List, NamedTuple, Tuple
+from typing import Any, Iterable
 import yaml
 from github import Auth, Github
 from github.Issue import Issue
-DEFAULT_LABEL_PREFIX = ""  # use meta runners
+WORKFLOW_LABEL_META = ""  # use meta runners
 WORKFLOW_LABEL_LF = "lf."  # use runners from the linux foundation
 WORKFLOW_LABEL_LF_CANARY = "lf.c."  # use canary runners from the linux foundation
 RUNNER_AMI_LEGACY = ""
 RUNNER_AMI_AMZ2023 = "amz2023"
 GITHUB_OUTPUT = os.getenv("GITHUB_OUTPUT", "")
 GH_OUTPUT_KEY_AMI = "runner-ami"
 GH_OUTPUT_KEY_LABEL_TYPE = "label-type"
 SETTING_EXPERIMENTS = "experiments"
 LF_FLEET_EXPERIMENT = "lf"
 CANARY_FLEET_SUFFIX = ".c"
 class Experiment(NamedTuple):
    rollout_perc: float = (
        0  # Percentage of workflows to experiment on when user is not opted-in.
    )
    # Add more fields as needed
 class Settings(NamedTuple):
    """
    Settings for the experiments that can be opted into.
    """
    experiments: Dict[str, Experiment] = {}
 class ColorFormatter(logging.Formatter):
    """Color codes the log messages based on the log level"""
@ -182,14 +137,11 @@ def get_issue(gh: Github, repo: str, issue_num: int) -> Issue:
 def get_potential_pr_author(
-    github_token: str, repo: str, username: str, ref_type: str, ref_name: str
+    gh: Github, repo: str, username: str, ref_type: str, ref_name: str
 ) -> str:
    # If the trigger was a new tag added by a bot, this is a ciflow case
    # Fetch the actual username from the original PR. The PR number is
    # embedded in the tag name: ciflow/<name>/<pr-number>
    gh = get_gh_client(github_token)
    if username == "pytorch-bot[bot]" and ref_type == "tag":
        split_tag = ref_name.split("/")
        if (
@ -211,233 +163,126 @@ def get_potential_pr_author(
 def is_exception_branch(branch: str) -> bool:
    """
    Branches that get opted out of all experiments and should always use Meta runners
    """
    return branch.split("/")[0] in {"main", "nightly", "release", "landchecks"}
-def load_yaml(yaml_text: str) -> Any:
+def get_workflow_type(issue: Issue, workflow_requestors: Iterable[str]) -> str:
    try:
-        data = yaml.safe_load(yaml_text)
+        first_comment = issue.get_comments()[0].body.strip("\n\t ")
        return data
    except yaml.YAMLError as exc:
        log.exception("Error loading YAML")
        raise
-
+        if first_comment[0] == "!":
-def extract_settings_user_opt_in_from_text(rollout_state: str) -> Tuple[str, str]:
+            log.info("LF Workflows are disabled for everyone. Using meta runners.")
-    """
+            return WORKFLOW_LABEL_META
-    Extracts the text with settings, if any, and the opted in users from the rollout state.
+        elif first_comment[0] == "*":
-
+            log.info("LF Workflows are enabled for everyone. Using LF runners.")
-    If the issue body contains "---" then the text above that is the settings
+            return WORKFLOW_LABEL_LF
-    and the text below is the list of opted in users.
+        else:
-
+            all_opted_in_users = {
-    If it doesn't contain "---" then the settings are empty and the rest is the users.
+                usr_raw.strip("\n\t@ ").split(",")[0]
-    """
+                for usr_raw in first_comment.split()
-    rollout_state_parts = rollout_state.split("---")
+            }
-    if len(rollout_state_parts) >= 2:
+            opted_in_requestors = {
-        return rollout_state_parts[0], rollout_state_parts[1]
+                usr for usr in workflow_requestors if usr in all_opted_in_users
-    else:
+            }
-        return "", rollout_state
+            if opted_in_requestors:
 class UserOptins(Dict[str, List[str]]):
    """
    Dictionary of users with a list of features they have opted into
    """
 def parse_user_opt_in_from_text(user_optin_text: str) -> UserOptins:
    """
    Parse the user opt-in text into a key value pair of username and the list of features they have opted into
    Users are GitHub usernames with the @ prefix. Each user is also a comma-separated list of features/experiments to enable.
        - Example line: "@User1,lf,split_build"
        - A "#" prefix indicates the user is opted out of all experiments
    """
    optins = UserOptins()
    for user in user_optin_text.split("\n"):
        user = user.strip("\r\n\t -")
        if not user or not user.startswith("@"):
            # Not a valid user. Skip
            continue
        if user:
            usr_name = user.split(",")[0].strip("@")
            optins[usr_name] = [exp.strip(" ") for exp in user.split(",")[1:]]
    return optins
 def parse_settings_from_text(settings_text: str) -> Settings:
    """
    Parse the experiments from the issue body into a list of ExperimentSettings
    """
    try:
        if settings_text:
            # Escape the backtick as well so that we can have the settings in a code block on the GH issue
            # for easy reading
            # Note: Using ascii for the backtick so that the cat step in _runner-determinator.yml doesn't choke on
            #       the backtick character in shell commands.
            backtick = chr(96)  # backtick character
            settings_text = settings_text.strip(f"\r\n\t{backtick} ")
            settings = load_yaml(settings_text)
            # For now we just load experiments. We can expand this if/when we add more settings
            experiments = {}
            for exp_name, exp_settings in settings.get(SETTING_EXPERIMENTS).items():
                valid_settings = {}
                for setting in exp_settings:
                    if setting not in Experiment._fields:
                        log.warning(
                            f"Unexpected setting in experiment: {setting} = {exp_settings[setting]}"
                        )
                    else:
                        valid_settings[setting] = exp_settings[setting]
                experiments[exp_name] = Experiment(**valid_settings)
            return Settings(experiments)
    except Exception:
        log.exception("Failed to parse settings")
    return Settings()
 def parse_settings(rollout_state: str) -> Settings:
    """
    Parse settings, if any, from the rollout state.
    If the issue body contains "---" then the text above that is the settings
    and the text below is the list of opted in users.
    If it doesn't contain "---" then the settings are empty and the default values are used.
    """
    settings_text, _ = extract_settings_user_opt_in_from_text(rollout_state)
    return parse_settings_from_text(settings_text)
 def parse_users(rollout_state: str) -> UserOptins:
    """
    Parse users from the rollout state.
    """
    _, users_text = extract_settings_user_opt_in_from_text(rollout_state)
    return parse_user_opt_in_from_text(users_text)
 def is_user_opted_in(user: str, user_optins: UserOptins, experiment_name: str) -> bool:
    """
    Check if a user is opted into an experiment
    """
    return experiment_name in user_optins.get(user, [])
 def get_runner_prefix(
    rollout_state: str, workflow_requestors: Iterable[str], is_canary: bool = False
 ) -> str:
    settings = parse_settings(rollout_state)
    user_optins = parse_users(rollout_state)
    fleet_prefix = ""
    prefixes = []
    for experiment_name, experiment_settings in settings.experiments.items():
        enabled = False
        # Is any workflow_requestor opted in to this experiment?
        opted_in_users = [
            requestor
            for requestor in workflow_requestors
            if is_user_opted_in(requestor, user_optins, experiment_name)
        ]
        if opted_in_users:
            log.info(
                f"{', '.join(opted_in_users)} have opted into experiment {experiment_name}."
            )
            enabled = True
        elif experiment_settings.rollout_perc:
            # If no user is opted in, then we randomly enable the experiment based on the rollout percentage
            if random.uniform(0, 100) <= experiment_settings.rollout_perc:
                log.info(
-                    f"Based on rollout percentage of {experiment_settings.rollout_perc}%, enabling experiment {experiment_name}."
+                    f"LF Workflows are enabled for {', '.join(opted_in_requestors)}. Using LF runners."
                )
-                enabled = True
+                return WORKFLOW_LABEL_LF
        if enabled:
            label = experiment_name
            if experiment_name == LF_FLEET_EXPERIMENT:
                # We give some special treatment to the "lf" experiment since determines the fleet we use
                #  - If it's enabled, then we always list it's prefix first
                #  - If we're in the canary branch, then we append ".c" to the lf prefix
                if is_canary:
                    label += CANARY_FLEET_SUFFIX
                fleet_prefix = label
            else:
-                prefixes.append(label)
+                log.info(
                    f"LF Workflows are disabled for {', '.join(workflow_requestors)}. Using meta runners."
                )
                return WORKFLOW_LABEL_META
-    if len(prefixes) > 1:
+    except Exception as e:
        log.error(
-            f"Only a fleet and one other experiment can be enabled for a job at any time. Enabling {prefixes[0]} and ignoring the rest, which are {', '.join(prefixes[1:])}"
+            f"Failed to get determine workflow type. Falling back to meta runners. Exception: {e}"
        )
-        prefixes = prefixes[:1]
+        return WORKFLOW_LABEL_META
    # Fleet always comes first
    if fleet_prefix:
        prefixes.insert(0, fleet_prefix)
    return ".".join(prefixes) + "." if prefixes else ""
-def get_rollout_state_from_issue(github_token: str, repo: str, issue_num: int) -> str:
+def get_optin_feature(
-    """
+    issue: Issue, workflow_requestors: Iterable[str], feature: str, fallback: str
-    Gets the first comment of the issue, which contains the desired rollout state.
+) -> str:
    try:
        first_comment = issue.get_comments()[0].body.strip("\n\t ")
        userlist = {u.lstrip("#").strip("\n\t@ ") for u in first_comment.split()}
        all_opted_in_users = set()
        for user in userlist:
            for i in user.split(","):
                if i == feature:
                    all_opted_in_users.add(user.split(",")[0])
        opted_in_requestors = {
            usr for usr in workflow_requestors if usr in all_opted_in_users
        }
-    The default issue we use - https://github.com/pytorch/test-infra/issues/5132
+        if opted_in_requestors:
-    """
+            log.info(
-    gh = get_gh_client(github_token)
+                f"Feature {feature} is enabled for {', '.join(opted_in_requestors)}. Using feature {feature}."
-    issue = get_issue(gh, repo, issue_num)
+            )
-    return str(issue.get_comments()[0].body.strip("\n\t "))
+            return feature
        else:
            log.info(
                f"Feature {feature} is disabled for {', '.join(workflow_requestors)}. Using fallback \"{fallback}\"."
            )
            return fallback
    except Exception as e:
        log.error(
            f'Failed to determine if user has opted-in to feature {feature}. Using fallback "{fallback}". Exception: {e}'
        )
        return fallback
 def main() -> None:
    args = parse_args()
    if args.github_ref_type == "branch" and is_exception_branch(args.github_branch):
-        log.info(
+        log.info(f"Exception branch: '{args.github_branch}', using meta runners")
-            f"Exception branch: '{args.github_branch}', using Meta runners and no experiments."
+        label_type = WORKFLOW_LABEL_META
-        )
+        runner_ami = RUNNER_AMI_LEGACY
        runner_label_prefix = DEFAULT_LABEL_PREFIX
    else:
        try:
-            rollout_state = get_rollout_state_from_issue(
+            gh = get_gh_client(args.github_token)
-                args.github_token, args.github_issue_repo, args.github_issue
+            # The default issue we use - https://github.com/pytorch/test-infra/issues/5132
-            )
+            issue = get_issue(gh, args.github_issue_repo, args.github_issue)
            username = get_potential_pr_author(
-                args.github_token,
+                gh,
                args.github_repo,
                args.github_actor,
                args.github_ref_type,
                args.github_branch,
            )
-
+            label_type = get_workflow_type(
-            is_canary = args.github_repo == "pytorch/pytorch-canary"
+                issue,
-
+                (
-            runner_label_prefix = get_runner_prefix(
+                    args.github_issue_owner,
-                rollout_state, (args.github_issue_owner, username), is_canary
+                    username,
                ),
            )
            runner_ami = get_optin_feature(
                issue=issue,
                workflow_requestors=(
                    args.github_issue_owner,
                    username,
                ),
                feature=RUNNER_AMI_AMZ2023,
                fallback=RUNNER_AMI_LEGACY,
            )
        except Exception as e:
            log.error(
-                f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
+                f"Failed to get issue. Falling back to meta runners. Exception: {e}"
            )
            label_type = WORKFLOW_LABEL_META
            runner_ami = RUNNER_AMI_LEGACY
-    set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, runner_label_prefix)
+    # For Canary builds use canary runners
    if args.github_repo == "pytorch/pytorch-canary" and label_type == WORKFLOW_LABEL_LF:
        label_type = WORKFLOW_LABEL_LF_CANARY
    set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, label_type)
    set_github_output(GH_OUTPUT_KEY_AMI, runner_ami)
 if __name__ == "__main__":
--- a/.github/scripts/s390x-ci/README.md
+++ b/.github/scripts/s390x-ci/README.md
@ -3,7 +3,7 @@
 ## Install prerequisites.
 ```
-$ sudo dnf install podman podman-docker jq
+$ sudo dnf install docker
 ```
 ## Add services.
@ -27,48 +27,23 @@ $ sudo systemctl enable --now qemu-user-static
 ## Rebuild the image
-First build s390x builder image `docker.io/pytorch/manylinuxs390x-builder`,
+In order to build or update the `iiilinuxibmcom/actions-runner` image, e.g. to get the
-using following commands:
+latest OS security fixes, use the following commands:
 ```
 $ cd ~
 $ git clone https://github.com/pytorch/pytorch
 $ cd pytorch
 $ git submodule update --init --recursive
 $ GPU_ARCH_TYPE=cpu-s390x "$(pwd)/.ci/docker/manywheel/build.sh" manylinuxs390x-builder
 $ docker image tag localhost/pytorch/manylinuxs390x-builder docker.io/pytorch/manylinuxs390x-builder:cpu-s390x
 $ docker image save -o ~/manywheel-s390x.tar docker.io/pytorch/manylinuxs390x-builder:cpu-s390x
 ```
 Next step is to build `actions-runner` image using:
 ```
 $ cd self-hosted-builder
 $ sudo docker build \
      --build-arg repo=<owner>/<name> \
      --build-arg token=<***> \
      --pull \
      -f actions-runner.Dockerfile \
-      -t iiilinuxibmcom/actions-runner.<name> \
+      -t iiilinuxibmcom/actions-runner \
      .
 ```
-If there are failures, ensure that selinux doesn't prevent it from working.
+If it fails, ensure that selinux doesn't prevent it from working.
 In worst case, selinux can be disabled with `setenforce 0`.
 Now prepare all necessary files for runner registration:
 ```
 $ sudo mkdir -p /etc/actions-runner/<name>
 $ sudo chmod 700 /etc/actions-runner/<name>
 $ sudo /bin/cp <github_app_private_key_file> /etc/actions-runner/<name>/key_private.pem
 $ sudo echo <github_app_id> | sudo tee /etc/actions-runner/<name>/appid.env
 $ sudo echo <github_app_install_id> | sudo tee /etc/actions-runner/<name>/installid.env
 $ sudo echo NAME=<worker_name> | sudo tee    /etc/actions-runner/<name>/env
 $ sudo echo ORG=<github_org>   | sudo tee -a /etc/actions-runner/<name>/env
 $ cd self-hosted-builder
 $ sudo /bin/cp helpers/*.sh /usr/local/bin/
 $ sudo chmod 755 /usr/local/bin/app_token.sh /usr/local/bin/gh_token_generator.sh
 ```
 ## Autostart the runner.
 ```
--- a/.github/scripts/s390x-ci/self-hosted-builder/actions-runner.Dockerfile
+++ b/.github/scripts/s390x-ci/self-hosted-builder/actions-runner.Dockerfile
@ -1,12 +1,12 @@
 # Self-Hosted IBM Z Github Actions Runner.
 # Temporary image: amd64 dependencies.
-FROM docker.io/amd64/ubuntu:23.10 as ld-prefix
+FROM docker.io/amd64/ubuntu:22.04 as ld-prefix
 ENV DEBIAN_FRONTEND=noninteractive
-RUN apt-get update && apt-get -y install ca-certificates libicu72 libssl3
+RUN apt-get update && apt-get -y install ca-certificates libicu70 libssl3
 # Main image.
-FROM docker.io/s390x/ubuntu:23.10
+FROM docker.io/s390x/ubuntu:22.04
 # Packages for pytorch building and testing.
 ENV DEBIAN_FRONTEND=noninteractive
@ -16,7 +16,6 @@ RUN apt-get update && apt-get -y install \
        gcc \
        git \
        jq \
        zip \
        libxml2-dev \
        libxslt-dev \
        ninja-build \
@ -44,28 +43,24 @@ COPY fs/ /
 RUN chmod +x /usr/bin/actions-runner /usr/bin/entrypoint
 # install podman
 RUN apt -y install podman podman-docker
 # amd64 Github Actions Runner.
 RUN useradd -m actions-runner
 USER actions-runner
 WORKDIR /home/actions-runner
 RUN curl -L https://github.com/actions/runner/releases/download/v2.309.0/actions-runner-linux-x64-2.309.0.tar.gz | tar -xz
-# set up python virtual environment which is later used by runner.
+# repository
-# build workflows use "python -m pip install ...",
+ARG repo
 # and it doesn't work for non-root user
 RUN virtualenv --system-site-packages venv
-# copy prebuilt manywheel docker image for builds and tests
+# repository token
-# build command is:
+ARG token
 # GPU_ARCH_TYPE=cpu-s390x "$(pwd)/manywheel/build_docker.sh"
 # and save command is:
 # docker image save -o manywheel-s390x.tar pytorch/manylinuxs390x-builder:cpu-s390x
 #
 COPY --chown=actions-runner:actions-runner manywheel-s390x.tar /home/actions-runner/manywheel-s390x.tar
-RUN curl -L https://github.com/actions/runner/releases/download/v2.317.0/actions-runner-linux-x64-2.317.0.tar.gz | tar -xz
+RUN ./config.sh \
        --unattended \
        --url "https://github.com/${repo}" \
        --token "${token}" \
        --no-default-labels \
        --labels self-hosted,linux.s390x
 ENTRYPOINT ["/usr/bin/entrypoint"]
 CMD ["/usr/bin/actions-runner"]
--- a/.github/scripts/s390x-ci/self-hosted-builder/actions-runner@.service
+++ b/.github/scripts/s390x-ci/self-hosted-builder/actions-runner@.service
@ -8,16 +8,12 @@ StartLimitIntervalSec=0
 Type=simple
 Restart=always
 ExecStartPre=-/usr/bin/docker rm --force actions-runner.%i
 ExecStartPre=-/usr/local/bin/gh_token_generator.sh /etc/actions-runner/%i/appid.env /etc/actions-runner/%i/installid.env /etc/actions-runner/%i/key_private.pem /etc/actions-runner/%i/ghtoken.env
 ExecStart=/usr/bin/docker run \
              --env-file=/etc/actions-runner/%i/env \
              --env-file=/etc/actions-runner/%i/ghtoken.env \
              --init \
              --interactive \
              --name=actions-runner.%i \
              --rm \
-              --privileged \
+              iiilinuxibmcom/actions-runner
              iiilinuxibmcom/actions-runner.%i
 ExecStop=/bin/sh -c "docker exec actions-runner.%i kill -INT -- -1"
 ExecStop=/bin/sh -c "docker wait actions-runner.%i"
 ExecStop=/bin/sh -c "docker rm actions-runner.%i"
--- a/.github/scripts/s390x-ci/self-hosted-builder/fs/usr/bin/actions-runner
+++ b/.github/scripts/s390x-ci/self-hosted-builder/fs/usr/bin/actions-runner
@ -2,45 +2,5 @@
 set -e -u
 # first import docker image
 if [ -f ./manywheel-s390x.tar ] ; then
        docker image load --input manywheel-s390x.tar
        docker image tag docker.io/pytorch/manylinuxs390x-builder:cpu-s390x docker.io/pytorch/manylinuxs390x-builder:cpu-s390x-main
        rm -f manywheel-s390x.tar
 fi
 token_file=registration-token.json
 # Generate registration token
 curl \
        -X POST \
        -H "Accept: application/vnd.github.v3+json" \
        -H "Authorization: Bearer ${ACCESS_TOKEN}" \
        "https://api.github.com/orgs/${ORG}/actions/runners/registration-token" \
        -o "$token_file"
 unset ACCESS_TOKEN
 # register runner as ephemeral runner
 # it does one job, stops and unregisters
 registration_token=$(jq --raw-output .token "$token_file")
 ./config.sh \
        --unattended \
        --ephemeral \
        --url "https://github.com/${ORG}" \
        --token "${registration_token}" \
        --name "${NAME}" \
        --no-default-labels \
        --labels self-hosted,linux.s390x
 unset registration_token
 rm -f "$token_file"
 # enter into python virtual environment.
 # build workflows use "python -m pip install ...",
 # and it doesn't work for non-root user
 source venv/bin/activate
 # Run one job.
-./run.sh
+./run.sh --once
--- a/.github/scripts/s390x-ci/self-hosted-builder/helpers/app_token.sh
+++ b/.github/scripts/s390x-ci/self-hosted-builder/helpers/app_token.sh
@ -1,84 +0,0 @@
 #!/usr/bin/env bash
 #
 # Request an ACCESS_TOKEN to be used by a GitHub APP
 # Environment variable that need to be set up:
 # * APP_ID, the GitHub's app ID
 # * INSTALL_ID, the Github's app's installation ID
 # * APP_PRIVATE_KEY, the content of GitHub app's private key in PEM format.
 #
 # https://github.com/orgs/community/discussions/24743#discussioncomment-3245300
 #
 set -o pipefail
 _GITHUB_HOST=${GITHUB_HOST:="github.com"}
 # If URL is not github.com then use the enterprise api endpoint
 if [[ ${GITHUB_HOST} = "github.com" ]]; then
  URI="https://api.${_GITHUB_HOST}"
 else
  URI="https://${_GITHUB_HOST}/api/v3"
 fi
 API_VERSION=v3
 API_HEADER="Accept: application/vnd.github.${API_VERSION}+json"
 CONTENT_LENGTH_HEADER="Content-Length: 0"
 APP_INSTALLATIONS_URI="${URI}/app/installations"
 # JWT parameters based off
 # https://docs.github.com/en/developers/apps/building-github-apps/authenticating-with-github-apps#authenticating-as-a-github-app
 #
 # JWT token issuance and expiration parameters
 JWT_IAT_DRIFT=60
 JWT_EXP_DELTA=600
 JWT_JOSE_HEADER='{
    "alg": "RS256",
    "typ": "JWT"
 }'
 build_jwt_payload() {
    now=$(date +%s)
    iat=$((now - JWT_IAT_DRIFT))
    jq -c \
        --arg iat_str "${iat}" \
        --arg exp_delta_str "${JWT_EXP_DELTA}" \
        --arg app_id_str "${APP_ID}" \
    '
        ($iat_str | tonumber) as $iat
        | ($exp_delta_str | tonumber) as $exp_delta
        | ($app_id_str | tonumber) as $app_id
        | .iat = $iat
        | .exp = ($iat + $exp_delta)
        | .iss = $app_id
    ' <<< "{}" | tr -d '\n'
 }
 base64url() {
    base64 | tr '+/' '-_' | tr -d '=\n'
 }
 rs256_sign() {
    openssl dgst -binary -sha256 -sign <(echo "$1")
 }
 request_access_token() {
    jwt_payload=$(build_jwt_payload)
    encoded_jwt_parts=$(base64url <<<"${JWT_JOSE_HEADER}").$(base64url <<<"${jwt_payload}")
    encoded_mac=$(echo -n "$encoded_jwt_parts" | rs256_sign "${APP_PRIVATE_KEY}" | base64url)
    generated_jwt="${encoded_jwt_parts}.${encoded_mac}"
    auth_header="Authorization: Bearer ${generated_jwt}"
    app_installations_response=$(curl -sX POST \
        -H "${auth_header}" \
        -H "${API_HEADER}" \
        --header "X-GitHub-Api-Version: 2022-11-28" \
        --url "https://api.github.com/app/installations/${INSTALL_ID}/access_tokens" \
    )
    echo "$app_installations_response" | jq --raw-output '.token'
 }
 request_access_token
--- a/.github/scripts/s390x-ci/self-hosted-builder/helpers/gh_token_generator.sh
+++ b/.github/scripts/s390x-ci/self-hosted-builder/helpers/gh_token_generator.sh
@ -1,10 +0,0 @@
 #!/usr/bin/env bash
 SCRIPT_DIR=$(dirname "$0")
 APP_ID=$1
 INSTALL_ID=$2
 APP_PRIVATE_KEY=$3
 DST_FILE="$4"
 ACCESS_TOKEN="$(APP_ID="$(<"${APP_ID}")" INSTALL_ID="$(<"${INSTALL_ID}")" APP_PRIVATE_KEY="$(<"${APP_PRIVATE_KEY}")" "${SCRIPT_DIR}/app_token.sh")"
 echo "ACCESS_TOKEN=${ACCESS_TOKEN}" > "${DST_FILE}"
--- a/.github/scripts/sync_distributed_folder_prototype.sh
+++ b/.github/scripts/sync_distributed_folder_prototype.sh
@ -0,0 +1,35 @@
 #!/bin/bash
 set -eoux pipefail
 SYNC_BRANCH=pytorch-stable-prototype
 git config user.email "fake@example.com"
 git config user.name  "PyTorch Stable Bot"
 git fetch origin main
 git fetch origin "$SYNC_BRANCH"
 git checkout "$SYNC_BRANCH"
 # Using a hardcoded SHA here is a massive speedup as we can skip the entire history of the pytorch GitHub repo.
 # This specific SHA was chosen as it was before the "branch point" of the stable branch
 for SHA in $(git log ba3b05fdf37ddbc3c301294d6a560a816335e717..origin/main --pretty="%h" -- torch/distributed torch/csrc/distributed test/distributed test/cpp/c10d benchmarks/distributed)
 do
    # `git merge-base --is-ancestor` exits with code 0 if the given SHA is an ancestor, and non-0 otherwise
    if git merge-base --is-ancestor $SHA HEAD || [[ $(git log --grep="(cherry picked from commit $SHA") ]]
    then
        echo "Skipping $SHA"
        continue
    fi
    echo "Copying $SHA"
    git cherry-pick -x "$SHA" -X theirs
    git reset --soft HEAD~1
    git add torch/distributed torch/csrc/distributed test/distributed test/cpp/c10d benchmarks/distributed
    git checkout .
    git commit --reuse-message=HEAD@{1}
    git clean -f
 done
 if [[ "${WITH_PUSH}" == true ]]; then
  git push
 fi
--- a/.github/scripts/tag_docker_images_for_release.py
+++ b/.github/scripts/tag_docker_images_for_release.py
@ -51,8 +51,6 @@ def main() -> None:
    for platform_image in platform_images:  # type: ignore[attr-defined]
        for arch in platform_image.keys():  # type: ignore[attr-defined]
            if arch == "cpu-s390x":
                continue
            tag_image(
                platform_image[arch],  # type: ignore[index]
                default_tag,
--- a/.github/scripts/test_runner_determinator.py
+++ b/.github/scripts/test_runner_determinator.py
@ -1,237 +0,0 @@
 from unittest import main, TestCase
 from unittest.mock import Mock, patch
 import runner_determinator as rd
 class TestRunnerDeterminatorIssueParser(TestCase):
    def test_parse_settings(self) -> None:
        settings_text = """
        experiments:
            lf:
                rollout_perc: 25
            otherExp:
                rollout_perc: 0
        ---
        Users:
        @User1,lf
        @User2,lf,otherExp
        """
        settings = rd.parse_settings(settings_text)
        self.assertTupleEqual(
            rd.Experiment(rollout_perc=25),
            settings.experiments["lf"],
            "lf settings not parsed correctly",
        )
        self.assertTupleEqual(
            rd.Experiment(rollout_perc=0),
            settings.experiments["otherExp"],
            "otherExp settings not parsed correctly",
        )
    def test_parse_settings_in_code_block(self) -> None:
        settings_text = """
        ```
        experiments:
            lf:
                rollout_perc: 25
            otherExp:
                rollout_perc: 0
        ```
        ---
        Users:
        @User1,lf
        @User2,lf,otherExp
        """
        settings = rd.parse_settings(settings_text)
        self.assertTupleEqual(
            rd.Experiment(rollout_perc=25),
            settings.experiments["lf"],
            "lf settings not parsed correctly",
        )
        self.assertTupleEqual(
            rd.Experiment(rollout_perc=0),
            settings.experiments["otherExp"],
            "otherExp settings not parsed correctly",
        )
    def test_parse_users(self) -> None:
        settings_text = """
        experiments:
            lf:
                rollout_perc: 0
            otherExp:
                rollout_perc: 0
        ---
        Users:
        @User1,lf
        @User2,lf,otherExp
        """
        users = rd.parse_users(settings_text)
        self.assertDictEqual(
            {"User1": ["lf"], "User2": ["lf", "otherExp"]},
            users,
            "Users not parsed correctly",
        )
    def test_parse_users_without_settings(self) -> None:
        settings_text = """
        @User1,lf
        @User2,lf,otherExp
        """
        users = rd.parse_users(settings_text)
        self.assertDictEqual(
            {"User1": ["lf"], "User2": ["lf", "otherExp"]},
            users,
            "Users not parsed correctly",
        )
 class TestRunnerDeterminatorGetRunnerPrefix(TestCase):
    def test_opted_in_user(self) -> None:
        settings_text = """
        experiments:
            lf:
                rollout_perc: 0
            otherExp:
                rollout_perc: 0
        ---
        Users:
        @User1,lf
        @User2,lf,otherExp
        """
        prefix = rd.get_runner_prefix(settings_text, ["User1"])
        self.assertEqual("lf.", prefix, "Runner prefix not correct for User1")
    def test_opted_in_user_two_experiments(self) -> None:
        settings_text = """
        experiments:
            lf:
                rollout_perc: 0
            otherExp:
                rollout_perc: 0
        ---
        Users:
        @User1,lf
        @User2,lf,otherExp
        """
        prefix = rd.get_runner_prefix(settings_text, ["User2"])
        self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for User2")
    @patch("random.uniform", return_value=50)
    def test_opted_out_user(self, mock_uniform: Mock) -> None:
        settings_text = """
        experiments:
            lf:
                rollout_perc: 25
            otherExp:
                rollout_perc: 25
        ---
        Users:
        @User1,lf
        @User2,lf,otherExp
        """
        prefix = rd.get_runner_prefix(settings_text, ["User3"])
        self.assertEqual("", prefix, "Runner prefix not correct for user")
    @patch("random.uniform", return_value=10)
    def test_opted_out_user_was_pulled_in_by_rollout(self, mock_uniform: Mock) -> None:
        settings_text = """
        experiments:
            lf:
                rollout_perc: 25
            otherExp:
                rollout_perc: 25
        ---
        Users:
        @User1,lf
        @User2,lf,otherExp
        """
        # User3 is opted out, but is pulled into both experiments by the 10% rollout
        prefix = rd.get_runner_prefix(settings_text, ["User3"])
        self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for user")
    def test_lf_prefix_always_comes_first(self) -> None:
        settings_text = """
        experiments:
            otherExp:
                rollout_perc: 0
            lf:
                rollout_perc: 0
        ---
        Users:
        @User1,lf
        @User2,otherExp,lf
        """
        prefix = rd.get_runner_prefix(settings_text, ["User2"])
        self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for user")
    def test_ignores_commented_users(self) -> None:
        settings_text = """
        experiments:
            lf:
                rollout_perc: 0
            otherExp:
                rollout_perc: 0
        ---
        Users:
        #@User1,lf
        @User2,lf,otherExp
        """
        prefix = rd.get_runner_prefix(settings_text, ["User1"])
        self.assertEqual("", prefix, "Runner prefix not correct for user")
    def test_ignores_extra_experiments(self) -> None:
        settings_text = """
        experiments:
            lf:
                rollout_perc: 0
            otherExp:
                rollout_perc: 0
            foo:
                rollout_perc: 0
        ---
        Users:
        @User1,lf,otherExp,foo
        """
        prefix = rd.get_runner_prefix(settings_text, ["User1"])
        self.assertEqual("lf.otherExp.", prefix, "Runner prefix not correct for user")
 if __name__ == "__main__":
    main()
--- a/.github/scripts/trymerge.py
+++ b/.github/scripts/trymerge.py
@ -36,7 +36,6 @@ from warnings import warn
 import yaml
 from github_utils import (
    gh_close_pr,
    gh_fetch_json_list,
    gh_fetch_merge_base,
    gh_fetch_url,
@ -1175,11 +1174,11 @@ class GitHubPR:
            for pr in additional_merged_prs:
                pr.add_numbered_label(MERGE_COMPLETE_LABEL, dry_run)
        # When the merge process reaches this part, we can assume that the commit
        # has been successfully pushed to trunk
        merge_commit_sha = repo.rev_parse(name=self.default_branch())
        if comment_id and self.pr_num:
            # When the merge process reaches this part, we can assume that the commit
            # has been successfully pushed to trunk
            merge_commit_sha = repo.rev_parse(name=REMOTE_MAIN_BRANCH)
            # Finally, upload the record to Rockset. The list of pending and failed
            # checks are at the time of the merge
            save_merge_record(
@ -1204,17 +1203,6 @@ class GitHubPR:
        else:
            print("Missing comment ID or PR number, couldn't upload to Rockset")
        # Usually Github will see that the commit has "resolves <pr_num>" in the
        # commit message and close the PR, but sometimes it doesn't, leading to
        # confusion.  When it doesn't, we close it manually.
        time.sleep(60)  # Give Github some time to close the PR
        manually_close_merged_pr(
            pr=self,
            additional_merged_prs=additional_merged_prs,
            merge_commit_sha=merge_commit_sha,
            dry_run=dry_run,
        )
    def merge_changes(
        self,
        repo: GitRepo,
@ -1515,34 +1503,6 @@ def checks_to_markdown_bullets(
    ]
 def manually_close_merged_pr(
    pr: GitHubPR,
    additional_merged_prs: List[GitHubPR],
    merge_commit_sha: str,
    dry_run: bool,
 ) -> None:
    def _comment_and_close(pr: GitHubPR, comment: str) -> None:
        pr = GitHubPR(pr.org, pr.project, pr.pr_num)  # Refresh the PR
        if not pr.is_closed():
            gh_post_pr_comment(pr.org, pr.project, pr.pr_num, comment, dry_run)
            gh_close_pr(pr.org, pr.project, pr.pr_num, dry_run)
    message = (
        f"This PR (#{pr.pr_num}) was merged in {merge_commit_sha} but it is still open, likely due to a Github bug, "
        "so mergebot is closing it manually.  If you think this is a mistake, please feel free to reopen and contact Dev Infra."
    )
    _comment_and_close(pr, message)
    for additional_pr in additional_merged_prs:
        message = (
            f"This PR (#{additional_pr.pr_num}) was merged as part of PR #{pr.pr_num} in the stack under {merge_commit_sha} "
            "but it is still open, likely due to a Github bug, so mergebot is closing it manually. "
            "If you think this is a mistake, please feel free to reopen and contact Dev Infra."
        )
        _comment_and_close(additional_pr, message)
    print(f"PR {pr.pr_num} and all additional PRs in the stack have been closed.")
@retries_decorator()
 def save_merge_record(
    comment_id: int,
--- a/.github/templates/common.yml.j2
+++ b/.github/templates/common.yml.j2
@ -1,7 +1,7 @@
 {%- set upload_artifact_s3_action = "seemethere/upload-artifact-s3@v5" -%}
 {%- set download_artifact_s3_action = "seemethere/download-artifact-s3@v4" -%}
-{%- set upload_artifact_action = "actions/upload-artifact@v4.4.0" -%}
+{%- set upload_artifact_action = "actions/upload-artifact@v3" -%}
-{%- set download_artifact_action = "actions/download-artifact@v4.1.7" -%}
+{%- set download_artifact_action = "actions/download-artifact@v3" -%}
 {%- set timeout_minutes = 240 -%}
--- a/.github/templates/macos_binary_build_workflow.yml.j2
+++ b/.github/templates/macos_binary_build_workflow.yml.j2
@ -101,7 +101,7 @@ jobs:
          # shellcheck disable=SC1091
          source "${RUNNER_TEMP}/anaconda/bin/activate"
          "${PYTORCH_ROOT}/.circleci/scripts/binary_macos_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: !{{ config["build_name"] }}
--- a/.github/templates/upload.yml.j2
+++ b/.github/templates/upload.yml.j2
@ -45,7 +45,7 @@
  {%- if is_windows %}
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
  {%- endif %}
 {%- else %}
--- a/.github/templates/windows_binary_build_workflow.yml.j2
+++ b/.github/templates/windows_binary_build_workflow.yml.j2
@ -104,9 +104,9 @@ jobs:
      - get-label-type
 {%- if config["gpu_arch_type"] == "cuda" %}
 {%- if branches == "nightly" %}
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
+    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.8xlarge.nvidia.gpu"
 {%- else %}
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge.nonephemeral"
+    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.8xlarge.nvidia.gpu.nonephemeral"
 {%- endif %}
 {%- else %}
    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.4xlarge.nonephemeral"
--- a/.github/workflows/_binary-build-linux.yml
+++ b/.github/workflows/_binary-build-linux.yml
@ -283,7 +283,7 @@ jobs:
          # Ensure the working directory gets chowned back to the current user
          docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' }}
        with:
          name: ${{ inputs.build_name }}
--- a/.github/workflows/_binary-test-linux.yml
+++ b/.github/workflows/_binary-test-linux.yml
@ -210,7 +210,7 @@ jobs:
      - name: Download Build Artifacts
        if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' }}
-        uses: actions/download-artifact@v4.1.7
+        uses: actions/download-artifact@v3
        with:
          name: ${{ inputs.build_name }}
          path: "${{ runner.temp }}/artifacts/"
--- a/.github/workflows/_binary-upload.yml
+++ b/.github/workflows/_binary-upload.yml
@ -126,7 +126,7 @@ jobs:
        # NB: When the previous build job is skipped, there won't be any artifacts and
        # this step will fail. Binary build jobs can only be skipped on CI, not nightly
        continue-on-error: true
-        uses: actions/download-artifact@v4.1.7
+        uses: actions/download-artifact@v3
        with:
          name: ${{ inputs.build_name }}
          path: "${{ runner.temp }}/artifacts/"
--- a/.github/workflows/_ios-build-test.yml
+++ b/.github/workflows/_ios-build-test.yml
@ -292,7 +292,7 @@ jobs:
          bundler-cache: true
      - name: Download arm64 artifacts
-        uses: actions/download-artifact@v4.1.7
+        uses: actions/download-artifact@v3
        with:
          name: pytorch-ios-build-artifacts-arm64
--- a/.github/workflows/_linux-build.yml
+++ b/.github/workflows/_linux-build.yml
@ -109,7 +109,6 @@ jobs:
    steps:
      - name: Setup SSH (Click me for login details)
        uses: pytorch/test-infra/.github/actions/setup-ssh@main
        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}
@ -119,16 +118,13 @@ jobs:
      # checkout. In other cases you should prefer a local checkout.
      - name: Checkout PyTorch
        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
        with:
          no-sudo: ${{ inputs.build-environment == 'linux-s390x-binary-manywheel' }}
      - name: Setup Linux
        uses: ./.github/actions/setup-linux
        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
      - name: configure aws credentials
        uses: aws-actions/configure-aws-credentials@v3
-        if: ${{ inputs.aws-role-to-assume != '' && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
+        if: ${{ inputs.aws-role-to-assume != '' }}
        with:
          role-to-assume: ${{ inputs.aws-role-to-assume }}
          role-session-name: gha-linux-build
@ -137,13 +133,11 @@ jobs:
      - name: Calculate docker image
        id: calculate-docker-image
        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
        with:
          docker-image-name: ${{ inputs.docker-image-name }}
      - name: Use following to pull public copy of the image
        id: print-ghcr-mirror
        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
        env:
          ECR_DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
        shell: bash
@ -153,7 +147,6 @@ jobs:
      - name: Pull docker image
        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
@ -181,7 +174,6 @@ jobs:
      - name: Download pytest cache
        uses: ./.github/actions/pytest-cache-download
        continue-on-error: true
        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
        with:
          cache_dir: .pytest_cache
          job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
@ -203,7 +195,6 @@ jobs:
          PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
          TORCH_CUDA_ARCH_LIST: ${{ inputs.cuda-arch-list }}
          DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
          DOCKER_IMAGE_S390X: ${{ inputs.docker-image-name }}
          XLA_CUDA: ${{ contains(inputs.build-environment, 'xla') && '0' || '' }}
          DEBUG: ${{ inputs.build-with-debug && '1' || '0' }}
          OUR_GITHUB_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
@ -211,21 +202,7 @@ jobs:
          SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
          USE_SPLIT_BUILD: ${{ inputs.use_split_build }}
        run: |
          if [[ ${BUILD_ENVIRONMENT} == *"s390x"* ]]; then
            JENKINS_USER=
            USED_IMAGE="${DOCKER_IMAGE_S390X}"
            # since some steps are skipped on s390x, if they are necessary, run them here
            env | grep '^GITHUB' >> "/tmp/github_env_${GITHUB_RUN_ID}"
            env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"
          else
            JENKINS_USER="--user jenkins"
            USED_IMAGE="${DOCKER_IMAGE}"
          fi
          # detached container should get cleaned up by teardown_ec2_linux
          # Used for JENKINS_USER, which can be empty
          # shellcheck disable=SC2086
          container_name=$(docker run \
            -e BUILD_ENVIRONMENT \
            -e MAX_JOBS="$(nproc --ignore=2)" \
@ -248,10 +225,10 @@ jobs:
            --cap-add=SYS_PTRACE \
            --tty \
            --detach \
-            ${JENKINS_USER} \
+            --user jenkins \
            -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
            -w /var/lib/jenkins/workspace \
-            "${USED_IMAGE}"
+            "${DOCKER_IMAGE}"
          )
          docker exec -t "${container_name}" sh -c '.ci/pytorch/build.sh'
@ -262,7 +239,7 @@ jobs:
      - name: Store PyTorch Build Artifacts on S3
        uses: seemethere/upload-artifact-s3@v5
-        if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build && inputs.build-environment != 'linux-s390x-binary-manywheel'
+        if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build
        with:
          name: ${{ inputs.build-environment }}
          retention-days: 14
@ -272,7 +249,7 @@ jobs:
      - name: Store PyTorch Build Artifacts on S3 for split build
        uses: seemethere/upload-artifact-s3@v5
-        if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build && inputs.build-environment != 'linux-s390x-binary-manywheel'
+        if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build
        with:
          name: ${{ inputs.build-environment }}-experimental-split-build
          retention-days: 14
@ -280,26 +257,8 @@ jobs:
          path: artifacts.zip
          s3-bucket: ${{ inputs.s3-bucket }}
      - name: Store PyTorch Build Artifacts for s390x
        uses: actions/upload-artifact@v3
        if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build && inputs.build-environment == 'linux-s390x-binary-manywheel'
        with:
          name: ${{ inputs.build-environment }}
          retention-days: 14
          if-no-files-found: error
          path: artifacts.zip
      - name: Store PyTorch Build Artifacts for s390x for split build
        uses: actions/upload-artifact@v3
        if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build && inputs.build-environment == 'linux-s390x-binary-manywheel'
        with:
          name: ${{ inputs.build-environment }}-experimental-split-build
          retention-days: 14
          if-no-files-found: error
          path: artifacts.zip
      - name: Upload sccache stats
-        if: steps.build.outcome != 'skipped' && inputs.build-environment != 'linux-s390x-binary-manywheel'
+        if: steps.build.outcome != 'skipped'
        uses: seemethere/upload-artifact-s3@v5
        with:
          s3-prefix: |
@ -311,13 +270,4 @@ jobs:
      - name: Teardown Linux
        uses: pytorch/test-infra/.github/actions/teardown-linux@main
-        if: always() && inputs.build-environment != 'linux-s390x-binary-manywheel'
+        if: always()
      - name: Cleanup docker
        if: always() && inputs.build-environment == 'linux-s390x-binary-manywheel'
        shell: bash
        run: |
          # on s390x stop the container for clean worker stop
          # ignore expansion of "docker ps -q" since it could be empty
          # shellcheck disable=SC2046
          docker stop $(docker ps -q) || true
--- a/.github/workflows/_mac-test-mps.yml
+++ b/.github/workflows/_mac-test-mps.yml
@ -88,13 +88,6 @@ jobs:
          environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}
          pip-requirements-file: .github/requirements/pip-requirements-${{ runner.os }}.txt
      - name: Get workflow job id
        id: get-job-id
        uses: ./.github/actions/get-workflow-job-id
        if: always()
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
      - name: Install PyTorch and run MPS tests
        id: test
        env:
@ -110,14 +103,6 @@ jobs:
          NO_TEST_TIMEOUT: ${{ needs.filter.outputs.ci-no-test-timeout }}
          NO_TD: ${{ needs.filter.outputs.ci-no-td }}
          PIP_REQUIREMENTS_FILE: .github/requirements/pip-requirements-${{ runner.os }}.txt
          GITHUB_REPOSITORY: ${{ github.repository }}
          GITHUB_WORKFLOW: ${{ github.workflow }}
          GITHUB_JOB: ${{ github.job }}
          GITHUB_RUN_ID: ${{ github.run_id }}
          GITHUB_RUN_NUMBER: ${{ github.run_number }}
          GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
          JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
          JOB_NAME: ${{ steps.get-job-id.outputs.job-name }}
          REENABLED_ISSUES: ${{ needs.filter.outputs.reenabled-issues }}
        run: |
          # shellcheck disable=SC1090
@ -159,6 +144,13 @@ jobs:
        run: |
          cat test/**/*_toprint.log || true
      - name: Get workflow job id
        id: get-job-id
        uses: ./.github/actions/get-workflow-job-id
        if: always()
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
      - name: Upload test artifacts
        uses: ./.github/actions/upload-test-artifacts
        if: always() && steps.test.conclusion && steps.test.conclusion != 'skipped'
--- a/.github/workflows/_runner-determinator.yml
+++ b/.github/workflows/_runner-determinator.yml
@ -62,94 +62,49 @@ jobs:
          """
          This runner determinator is used to determine which set of runners to run a
          GitHub job on. It uses the first comment of a GitHub issue (by default
-          https://github.com/pytorch/test-infra/issues/5132) to define the configuration
+          https://github.com/pytorch/test-infra/issues/5132) as a user list to determine
-          of which runners should be used to run which job.
+          which users will get their jobs to run on experimental runners. This user list
-
+          is also a comma separated list of additional features or experiments which the
-          The configuration has two parts, the settings and a list of opted-in users,
+          user could be opted in to.
          separated by a line containing "---".  If the line is not present, the
          settings are considered to be empty with only the second part, the user
          list, defined.
          The first part is a YAML block that defines the rollout settings. This can be
          used to define any settings that are needed to determine which runners to use.
          It's fields are defined by the RolloutSettings class below.
          The second part is a list of users who are explicitly opted in to the LF fleet.
          The user list is also a comma separated list of additional features or
          experiments which the user could be opted in to.
          The user list has the following rules:
-          - Users are GitHub usernames, which must start with the @ prefix
+          - Users are GitHub usernames with the @ prefix
          - If the first line is a "*" then all users will use the new runners
          - If the first line is a "!" then all users will use the old runners
          - Each user is also a comma-separated list of features/experiments to enable
-          - A "#" prefix opts the user out of all experiments
+          - A "#" prefix indicates the user is opted out of the new runners but is opting
            into features/experiments.
-          Example config:
+          Example user list:
              # A list of experiments that can be opted into.
              # This defines the behavior they'll induce when opted into.
              # Expected syntax is:
              #   [experiment_name]: # Name of the experiment. Also used for the label prefix.
              #      rollout_perc: [int] # % of workflows to run with this experiment when users are not opted in.
-              experiments:
+              @User1
-                lf:
+              @User2,amz2023
-                  rollout_percent: 25
+              #@UserOptOutOfNewRunner,amz2023
              ---
              # Opt-ins:
              # Users can opt into the LF fleet by adding their GitHub username to this list
              # and specifying experiments to enable in a comma-separated list.
              # Experiments should be from the above list.
              @User1,lf,split_build
              @User2,lf
              @User3,split_build
          """
          import logging
          import os
          import random
          from argparse import ArgumentParser
          from logging import LogRecord
-          from typing import Any, Dict, Iterable, List, NamedTuple, Tuple
+          from typing import Any, Iterable
          import yaml
          from github import Auth, Github
          from github.Issue import Issue
-          DEFAULT_LABEL_PREFIX = ""  # use meta runners
+          WORKFLOW_LABEL_META = ""  # use meta runners
          WORKFLOW_LABEL_LF = "lf."  # use runners from the linux foundation
          WORKFLOW_LABEL_LF_CANARY = "lf.c."  # use canary runners from the linux foundation
          RUNNER_AMI_LEGACY = ""
          RUNNER_AMI_AMZ2023 = "amz2023"
          GITHUB_OUTPUT = os.getenv("GITHUB_OUTPUT", "")
          GH_OUTPUT_KEY_AMI = "runner-ami"
          GH_OUTPUT_KEY_LABEL_TYPE = "label-type"
          SETTING_EXPERIMENTS = "experiments"
          LF_FLEET_EXPERIMENT = "lf"
          CANARY_FLEET_SUFFIX = ".c"
          class Experiment(NamedTuple):
              rollout_perc: float = (
                  0  # Percentage of workflows to experiment on when user is not opted-in.
              )
              # Add more fields as needed
          class Settings(NamedTuple):
              """
              Settings for the experiments that can be opted into.
              """
              experiments: Dict[str, Experiment] = {}
          class ColorFormatter(logging.Formatter):
              """Color codes the log messages based on the log level"""
@ -241,14 +196,11 @@ jobs:
          def get_potential_pr_author(
-              github_token: str, repo: str, username: str, ref_type: str, ref_name: str
+              gh: Github, repo: str, username: str, ref_type: str, ref_name: str
          ) -> str:
              # If the trigger was a new tag added by a bot, this is a ciflow case
              # Fetch the actual username from the original PR. The PR number is
              # embedded in the tag name: ciflow/<name>/<pr-number>
              gh = get_gh_client(github_token)
              if username == "pytorch-bot[bot]" and ref_type == "tag":
                  split_tag = ref_name.split("/")
                  if (
@ -270,238 +222,130 @@ jobs:
          def is_exception_branch(branch: str) -> bool:
              """
              Branches that get opted out of all experiments and should always use Meta runners
              """
              return branch.split("/")[0] in {"main", "nightly", "release", "landchecks"}
-          def load_yaml(yaml_text: str) -> Any:
+          def get_workflow_type(issue: Issue, workflow_requestors: Iterable[str]) -> str:
              try:
-                  data = yaml.safe_load(yaml_text)
+                  first_comment = issue.get_comments()[0].body.strip("\n\t ")
                  return data
              except yaml.YAMLError as exc:
                  log.exception("Error loading YAML")
                  raise
-
+                  if first_comment[0] == "!":
-          def extract_settings_user_opt_in_from_text(rollout_state: str) -> Tuple[str, str]:
+                      log.info("LF Workflows are disabled for everyone. Using meta runners.")
-              """
+                      return WORKFLOW_LABEL_META
-              Extracts the text with settings, if any, and the opted in users from the rollout state.
+                  elif first_comment[0] == "*":
-
+                      log.info("LF Workflows are enabled for everyone. Using LF runners.")
-              If the issue body contains "---" then the text above that is the settings
+                      return WORKFLOW_LABEL_LF
-              and the text below is the list of opted in users.
+                  else:
-
+                      all_opted_in_users = {
-              If it doesn't contain "---" then the settings are empty and the rest is the users.
+                          usr_raw.strip("\n\t@ ").split(",")[0]
-              """
+                          for usr_raw in first_comment.split()
-              rollout_state_parts = rollout_state.split("---")
+                      }
-              if len(rollout_state_parts) >= 2:
+                      opted_in_requestors = {
-                  return rollout_state_parts[0], rollout_state_parts[1]
+                          usr for usr in workflow_requestors if usr in all_opted_in_users
-              else:
+                      }
-                  return "", rollout_state
+                      if opted_in_requestors:
          class UserOptins(Dict[str, List[str]]):
              """
              Dictionary of users with a list of features they have opted into
              """
          def parse_user_opt_in_from_text(user_optin_text: str) -> UserOptins:
              """
              Parse the user opt-in text into a key value pair of username and the list of features they have opted into
              Users are GitHub usernames with the @ prefix. Each user is also a comma-separated list of features/experiments to enable.
                  - Example line: "@User1,lf,split_build"
                  - A "#" prefix indicates the user is opted out of all experiments
              """
              optins = UserOptins()
              for user in user_optin_text.split("\n"):
                  user = user.strip("\r\n\t -")
                  if not user or not user.startswith("@"):
                      # Not a valid user. Skip
                      continue
                  if user:
                      usr_name = user.split(",")[0].strip("@")
                      optins[usr_name] = [exp.strip(" ") for exp in user.split(",")[1:]]
              return optins
          def parse_settings_from_text(settings_text: str) -> Settings:
              """
              Parse the experiments from the issue body into a list of ExperimentSettings
              """
              try:
                  if settings_text:
                      # Escape the backtick as well so that we can have the settings in a code block on the GH issue
                      # for easy reading
                      # Note: Using ascii for the backtick so that the cat step in _runner-determinator.yml doesn't choke on
                      #       the backtick character in shell commands.
                      backtick = chr(96)  # backtick character
                      settings_text = settings_text.strip(f"\r\n\t{backtick} ")
                      settings = load_yaml(settings_text)
                      # For now we just load experiments. We can expand this if/when we add more settings
                      experiments = {}
                      for exp_name, exp_settings in settings.get(SETTING_EXPERIMENTS).items():
                          valid_settings = {}
                          for setting in exp_settings:
                              if setting not in Experiment._fields:
                                  log.warning(
                                      f"Unexpected setting in experiment: {setting} = {exp_settings[setting]}"
                                  )
                              else:
                                  valid_settings[setting] = exp_settings[setting]
                          experiments[exp_name] = Experiment(**valid_settings)
                      return Settings(experiments)
              except Exception:
                  log.exception("Failed to parse settings")
              return Settings()
          def parse_settings(rollout_state: str) -> Settings:
              """
              Parse settings, if any, from the rollout state.
              If the issue body contains "---" then the text above that is the settings
              and the text below is the list of opted in users.
              If it doesn't contain "---" then the settings are empty and the default values are used.
              """
              settings_text, _ = extract_settings_user_opt_in_from_text(rollout_state)
              return parse_settings_from_text(settings_text)
          def parse_users(rollout_state: str) -> UserOptins:
              """
              Parse users from the rollout state.
              """
              _, users_text = extract_settings_user_opt_in_from_text(rollout_state)
              return parse_user_opt_in_from_text(users_text)
          def is_user_opted_in(user: str, user_optins: UserOptins, experiment_name: str) -> bool:
              """
              Check if a user is opted into an experiment
              """
              return experiment_name in user_optins.get(user, [])
          def get_runner_prefix(
              rollout_state: str, workflow_requestors: Iterable[str], is_canary: bool = False
          ) -> str:
              settings = parse_settings(rollout_state)
              user_optins = parse_users(rollout_state)
              fleet_prefix = ""
              prefixes = []
              for experiment_name, experiment_settings in settings.experiments.items():
                  enabled = False
                  # Is any workflow_requestor opted in to this experiment?
                  opted_in_users = [
                      requestor
                      for requestor in workflow_requestors
                      if is_user_opted_in(requestor, user_optins, experiment_name)
                  ]
                  if opted_in_users:
                      log.info(
                          f"{', '.join(opted_in_users)} have opted into experiment {experiment_name}."
                      )
                      enabled = True
                  elif experiment_settings.rollout_perc:
                      # If no user is opted in, then we randomly enable the experiment based on the rollout percentage
                      if random.uniform(0, 100) <= experiment_settings.rollout_perc:
                          log.info(
-                              f"Based on rollout percentage of {experiment_settings.rollout_perc}%, enabling experiment {experiment_name}."
+                              f"LF Workflows are enabled for {', '.join(opted_in_requestors)}. Using LF runners."
                          )
-                          enabled = True
+                          return WORKFLOW_LABEL_LF
                  if enabled:
                      label = experiment_name
                      if experiment_name == LF_FLEET_EXPERIMENT:
                          # We give some special treatment to the "lf" experiment since determines the fleet we use
                          #  - If it's enabled, then we always list it's prefix first
                          #  - If we're in the canary branch, then we append ".c" to the lf prefix
                          if is_canary:
                              label += CANARY_FLEET_SUFFIX
                          fleet_prefix = label
                      else:
-                          prefixes.append(label)
+                          log.info(
                              f"LF Workflows are disabled for {', '.join(workflow_requestors)}. Using meta runners."
                          )
                          return WORKFLOW_LABEL_META
-              if len(prefixes) > 1:
+              except Exception as e:
                  log.error(
-                      f"Only a fleet and one other experiment can be enabled for a job at any time. Enabling {prefixes[0]} and ignoring the rest, which are {', '.join(prefixes[1:])}"
+                      f"Failed to get determine workflow type. Falling back to meta runners. Exception: {e}"
                  )
-                  prefixes = prefixes[:1]
+                  return WORKFLOW_LABEL_META
              # Fleet always comes first
              if fleet_prefix:
                  prefixes.insert(0, fleet_prefix)
              return ".".join(prefixes) + "." if prefixes else ""
-          def get_rollout_state_from_issue(github_token: str, repo: str, issue_num: int) -> str:
+          def get_optin_feature(
-              """
+              issue: Issue, workflow_requestors: Iterable[str], feature: str, fallback: str
-              Gets the first comment of the issue, which contains the desired rollout state.
+          ) -> str:
              try:
                  first_comment = issue.get_comments()[0].body.strip("\n\t ")
                  userlist = {u.lstrip("#").strip("\n\t@ ") for u in first_comment.split()}
                  all_opted_in_users = set()
                  for user in userlist:
                      for i in user.split(","):
                          if i == feature:
                              all_opted_in_users.add(user.split(",")[0])
                  opted_in_requestors = {
                      usr for usr in workflow_requestors if usr in all_opted_in_users
                  }
-              The default issue we use - https://github.com/pytorch/test-infra/issues/5132
+                  if opted_in_requestors:
-              """
+                      log.info(
-              gh = get_gh_client(github_token)
+                          f"Feature {feature} is enabled for {', '.join(opted_in_requestors)}. Using feature {feature}."
-              issue = get_issue(gh, repo, issue_num)
+                      )
-              return str(issue.get_comments()[0].body.strip("\n\t "))
+                      return feature
                  else:
                      log.info(
                          f"Feature {feature} is disabled for {', '.join(workflow_requestors)}. Using fallback \"{fallback}\"."
                      )
                      return fallback
              except Exception as e:
                  log.error(
                      f'Failed to determine if user has opted-in to feature {feature}. Using fallback "{fallback}". Exception: {e}'
                  )
                  return fallback
          def main() -> None:
              args = parse_args()
              if args.github_ref_type == "branch" and is_exception_branch(args.github_branch):
-                  log.info(
+                  log.info(f"Exception branch: '{args.github_branch}', using meta runners")
-                      f"Exception branch: '{args.github_branch}', using Meta runners and no experiments."
+                  label_type = WORKFLOW_LABEL_META
-                  )
+                  runner_ami = RUNNER_AMI_LEGACY
                  runner_label_prefix = DEFAULT_LABEL_PREFIX
              else:
                  try:
-                      rollout_state = get_rollout_state_from_issue(
+                      gh = get_gh_client(args.github_token)
-                          args.github_token, args.github_issue_repo, args.github_issue
+                      # The default issue we use - https://github.com/pytorch/test-infra/issues/5132
-                      )
+                      issue = get_issue(gh, args.github_issue_repo, args.github_issue)
                      username = get_potential_pr_author(
-                          args.github_token,
+                          gh,
                          args.github_repo,
                          args.github_actor,
                          args.github_ref_type,
                          args.github_branch,
                      )
-
+                      label_type = get_workflow_type(
-                      is_canary = args.github_repo == "pytorch/pytorch-canary"
+                          issue,
-
+                          (
-                      runner_label_prefix = get_runner_prefix(
+                              args.github_issue_owner,
-                          rollout_state, (args.github_issue_owner, username), is_canary
+                              username,
                          ),
                      )
                      runner_ami = get_optin_feature(
                          issue=issue,
                          workflow_requestors=(
                              args.github_issue_owner,
                              username,
                          ),
                          feature=RUNNER_AMI_AMZ2023,
                          fallback=RUNNER_AMI_LEGACY,
                      )
                  except Exception as e:
                      log.error(
-                          f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
+                          f"Failed to get issue. Falling back to meta runners. Exception: {e}"
                      )
                      label_type = WORKFLOW_LABEL_META
                      runner_ami = RUNNER_AMI_LEGACY
-              set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, runner_label_prefix)
+              # For Canary builds use canary runners
              if args.github_repo == "pytorch/pytorch-canary" and label_type == WORKFLOW_LABEL_LF:
                  label_type = WORKFLOW_LABEL_LF_CANARY
              set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, label_type)
              set_github_output(GH_OUTPUT_KEY_AMI, runner_ami)
          if __name__ == "__main__":
              main()
          EOF
          cat runner_determinator.py
--- a/.github/workflows/_win-build.yml
+++ b/.github/workflows/_win-build.yml
@ -11,16 +11,6 @@ on:
        required: true
        type: string
        description: What CUDA version to build with, "cpu" for none.
      use-xpu:
        required: false
        type: boolean
        default: false
        description: If set, build with XPU support.
      vc-year:
        required: false
        type: string
        default: "2019"
        description: The Visual Studio year to use for building.
      build-with-debug:
        required: false
        type: boolean
@ -151,7 +141,7 @@ jobs:
          SCCACHE_REGION: us-east-1
          VC_PRODUCT: "BuildTools"
          VC_VERSION: ""
-          VC_YEAR: "${{ inputs.vc-year }}"
+          VC_YEAR: "2019"
          ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
          AWS_DEFAULT_REGION: us-east-1
          PR_NUMBER: ${{ github.event.pull_request.number }}
@ -159,7 +149,6 @@ jobs:
          DEBUG: ${{ inputs.build-with-debug && '1' || '0' }}
          TORCH_CUDA_ARCH_LIST: "8.6"
          USE_CUDA: ${{ inputs.cuda-version != 'cpu' && '1' || '0' }}
          USE_XPU: ${{ inputs.use-xpu == true && '1' || '0' }}
          OUR_GITHUB_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
        run: |
          .ci/pytorch/win-build.sh
--- a/.github/workflows/build-conda-images.yml
+++ b/.github/workflows/build-conda-images.yml
@ -32,7 +32,7 @@ concurrency:
 jobs:
  build-docker:
    environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
-    runs-on: linux.9xlarge.ephemeral
+    runs-on: am2.linux.9xlarge.ephemeral
    strategy:
      matrix:
        cuda_version: ["11.8", "12.1", "12.4", "cpu"]
--- a/.github/workflows/build-libtorch-images.yml
+++ b/.github/workflows/build-libtorch-images.yml
@ -29,19 +29,9 @@ concurrency:
  cancel-in-progress: true
 jobs:
  get-label-type:
    name: get-label-type
    uses: ./.github/workflows/_runner-determinator.yml
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
      curr_branch: ${{ github.head_ref || github.ref_name }}
      curr_ref_type: ${{ github.ref_type }}
  build-docker-cuda:
    environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
-    needs: get-label-type
+    runs-on: linux.9xlarge.ephemeral
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
    strategy:
      matrix:
        cuda_version: ["12.4", "12.1", "11.8"]
@ -76,8 +66,7 @@ jobs:
          .ci/docker/libtorch/build.sh libtorch-cxx11-builder:cuda${{matrix.cuda_version}}
  build-docker-rocm:
    environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
-    needs: get-label-type
+    runs-on: linux.9xlarge.ephemeral
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
    strategy:
      matrix:
        rocm_version: ["6.1", "6.2"]
@ -112,8 +101,7 @@ jobs:
          .ci/docker/libtorch/build.sh libtorch-cxx11-builder:rocm${{matrix.rocm_version}}
  build-docker-cpu:
    environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
-    needs: get-label-type
+    runs-on: linux.9xlarge.ephemeral
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
    steps:
      - name: Checkout PyTorch
        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
--- a/.github/workflows/build-manywheel-images.yml
+++ b/.github/workflows/build-manywheel-images.yml
@ -33,19 +33,9 @@ concurrency:
  cancel-in-progress: true
 jobs:
  get-label-type:
    name: get-label-type
    uses: ./.github/workflows/_runner-determinator.yml
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
      curr_branch: ${{ github.head_ref || github.ref_name }}
      curr_ref_type: ${{ github.ref_type }}
  build-docker-cuda:
    environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
-    needs: get-label-type
+    runs-on: am2.linux.9xlarge.ephemeral
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
    strategy:
      matrix:
        cuda_version: ["12.4", "12.1", "11.8"]
@ -83,8 +73,7 @@ jobs:
  # NOTE: manylinux_2_28 are still experimental, see https://github.com/pytorch/pytorch/issues/123649
  build-docker-cuda-manylinux_2_28:
    environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
-    needs: get-label-type
+    runs-on: linux.9xlarge.ephemeral
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
    strategy:
      matrix:
        cuda_version: ["12.4", "12.1", "11.8"]
@ -121,8 +110,7 @@ jobs:
          .ci/docker/manywheel/build.sh manylinux2_28-builder:cuda${{matrix.cuda_version}}
  build-docker-cuda-aarch64:
    environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
-    needs: get-label-type
+    runs-on: linux.arm64.2xlarge.ephemeral
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.2xlarge.ephemeral"
    strategy:
      matrix:
        cuda_version: ["12.4"]
@ -155,8 +143,7 @@ jobs:
          .ci/docker/manywheel/build.sh manylinuxaarch64-builder:cuda${{matrix.cuda_version}}
  build-docker-rocm:
    environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
-    needs: get-label-type
+    runs-on: am2.linux.9xlarge.ephemeral
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
    strategy:
      matrix:
        rocm_version: ["6.1", "6.2"]
@ -191,8 +178,7 @@ jobs:
          .ci/docker/manywheel/build.sh manylinux-builder:rocm${{matrix.rocm_version}}
  build-docker-cpu:
    environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
-    needs: get-label-type
+    runs-on: am2.linux.9xlarge.ephemeral
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
    steps:
      - name: Checkout PyTorch
        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
@ -221,8 +207,7 @@ jobs:
          .ci/docker/manywheel/build.sh manylinux-builder:cpu
  build-docker-cpu-manylinux_2_28:
    environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
-    needs: get-label-type
+    runs-on: linux.9xlarge.ephemeral
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
    env:
      GPU_ARCH_TYPE: cpu-manylinux_2_28
    steps:
@ -253,8 +238,7 @@ jobs:
          .ci/docker/manywheel/build.sh manylinux2_28-builder:cpu
  build-docker-cpu-aarch64:
    environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
-    needs: get-label-type
+    runs-on: linux.arm64.2xlarge.ephemeral
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.2xlarge.ephemeral"
    env:
      GPU_ARCH_TYPE: cpu-aarch64
    steps:
@ -285,8 +269,7 @@ jobs:
          .ci/docker/manywheel/build.sh manylinuxaarch64-builder:cpu-aarch64
  build-docker-cpu-aarch64-2_28:
    environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
-    needs: get-label-type
+    runs-on: linux.arm64.2xlarge.ephemeral
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.arm64.2xlarge.ephemeral"
    env:
      GPU_ARCH_TYPE: cpu-aarch64-2_28
    steps:
@ -320,8 +303,7 @@ jobs:
          .ci/docker/manywheel/build.sh manylinux2_28_aarch64-builder:cpu-aarch64
  build-docker-cpu-cxx11-abi:
    environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
-    needs: get-label-type
+    runs-on: linux.9xlarge.ephemeral
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
    env:
      GPU_ARCH_TYPE: cpu-cxx11-abi
    steps:
@ -352,8 +334,7 @@ jobs:
          .ci/docker/manywheel/build.sh manylinuxcxx11-abi-builder:cpu-cxx11-abi
  build-docker-xpu:
    environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
-    needs: get-label-type
+    runs-on: linux.9xlarge.ephemeral
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.9xlarge.ephemeral"
    env:
      GPU_ARCH_TYPE: xpu
    steps:
--- a/.github/workflows/build-triton-wheel.yml
+++ b/.github/workflows/build-triton-wheel.yml
@ -13,6 +13,7 @@ on:
      - .github/scripts/build_triton_wheel.py
      - .github/ci_commit_pins/triton.txt
      - .ci/docker/ci_commit_pins/triton.txt
      - .ci/docker/ci_commit_pins/triton-rocm.txt
      - .ci/docker/ci_commit_pins/triton-xpu.txt
  pull_request:
    paths:
@ -20,6 +21,7 @@ on:
      - .github/scripts/build_triton_wheel.py
      - .github/ci_commit_pins/triton.txt
      - .ci/docker/ci_commit_pins/triton.txt
      - .ci/docker/ci_commit_pins/triton-rocm.txt
      - .ci/docker/ci_commit_pins/triton-xpu.txt
 concurrency:
@ -27,19 +29,9 @@ concurrency:
  cancel-in-progress: true
 jobs:
  get-label-type:
    name: get-label-type
    uses: ./.github/workflows/_runner-determinator.yml
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
      curr_branch: ${{ github.head_ref || github.ref_name }}
      curr_ref_type: ${{ github.ref_type }}
  build-wheel:
    name: "Build Triton Wheel"
-    needs: get-label-type
+    runs-on: [self-hosted, linux.2xlarge]
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge"
    strategy:
      fail-fast: false
      matrix:
@ -128,7 +120,7 @@ jobs:
          fi
          docker exec -t "${container_name}" chown -R 1000.1000 /artifacts
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        with:
          name: pytorch-triton-wheel-${{ matrix.py_vers }}-${{ matrix.device }}
          if-no-files-found: error
@ -165,7 +157,7 @@ jobs:
          aws-region: us-east-1
      - name: Download Build Artifacts
-        uses: actions/download-artifact@v4.1.7
+        uses: actions/download-artifact@v3
        with:
          # Download all available artifacts
          path: ${{ runner.temp }}/artifacts-all
@ -209,8 +201,7 @@ jobs:
  build-conda:
    name: "Build Triton Conda"
-    needs: get-label-type
+    runs-on: [self-hosted, linux.2xlarge]
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge"
    strategy:
      fail-fast: false
      matrix:
@ -262,7 +253,7 @@ jobs:
          docker exec -t "${container_name}" python /pytorch/.github/scripts/build_triton_wheel.py --build-conda --py-version="${PY_VERS}" $RELEASE
          docker exec -t "${container_name}" chown -R 1000.1000 /artifacts
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        with:
          name: pytorch-triton-conda-${{ matrix.py_vers }}
          if-no-files-found: error
@ -282,7 +273,7 @@ jobs:
      - uses: actions/checkout@v3
      - name: Download Build Artifacts
-        uses: actions/download-artifact@v4.1.7
+        uses: actions/download-artifact@v3
        with:
          # Download all available artifacts
          path: ${{ runner.temp }}/artifacts-all
--- a/.github/workflows/check-labels.yml
+++ b/.github/workflows/check-labels.yml
@ -30,9 +30,6 @@ concurrency:
 jobs:
  check-labels:
    permissions:
      contents: read
      pull-requests: write
    name: Check labels
    if: github.repository_owner == 'pytorch'
    runs-on: linux.20_04.4x
--- a/.github/workflows/create_release.yml
+++ b/.github/workflows/create_release.yml
@ -16,15 +16,6 @@ on:
    paths: [.github/workflows/create_release.yml]
 jobs:
  get-label-type:
    name: get-label-type
    uses: ./.github/workflows/_runner-determinator.yml
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
      curr_branch: ${{ github.head_ref || github.ref_name }}
      curr_ref_type: ${{ github.ref_type }}
  release:
    if: ${{ github.repository == 'pytorch/pytorch' }}
    name: Create Release
@ -72,7 +63,7 @@ jobs:
          files: ${{env.PT_RELEASE_FILE}}
      - name: Upload source distribution to GHA artifacts for release tags
        if: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') && contains(github.ref, 'rc') }}
-        uses: actions/upload-artifact@v4.4.0
+        uses: actions/upload-artifact@v2
        with:
          name: ${{ env.PT_RELEASE_FILE }}
          path: ${{ env.PT_RELEASE_FILE }}
@ -82,16 +73,14 @@ jobs:
  upload_source_code_to_s3:
    if: ${{ github.repository == 'pytorch/pytorch' && github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') && contains(github.ref, 'rc') }}
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge"
+    runs-on: linux.2xlarge
    environment: sourcecode-upload
    name: Upload source code to S3 for release tags
    permissions:
      id-token: write
-    needs:
+    needs: release
      - get-label-type
      - release
    steps:
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v2
        with:
          name: ${{ needs.release.outputs.pt_release_name }}
      - name: Configure AWS credentials(PyTorch account)
--- a/.github/workflows/docker-builds.yml
+++ b/.github/workflows/docker-builds.yml
@ -30,18 +30,8 @@ env:
 permissions: read-all
 jobs:
  get-label-type:
    name: get-label-type
    uses: ./.github/workflows/_runner-determinator.yml
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
      curr_branch: ${{ github.head_ref || github.ref_name }}
      curr_ref_type: ${{ github.ref_type }}
  docker-build:
    environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
    needs: get-label-type
    timeout-minutes: 240
    strategy:
      fail-fast: false
@ -55,15 +45,15 @@ jobs:
          pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks,
          pytorch-linux-focal-cuda12.1-cudnn9-py3.12-gcc9-inductor-benchmarks,
          pytorch-linux-focal-cuda11.8-cudnn9-py3-gcc9,
-          pytorch-linux-focal-py3.9-clang10,
+          pytorch-linux-focal-py3.8-clang10,
          pytorch-linux-focal-py3.11-clang10,
          pytorch-linux-focal-py3.12-clang10,
          pytorch-linux-focal-rocm-n-1-py3,
          pytorch-linux-focal-rocm-n-py3,
-          pytorch-linux-jammy-cuda11.8-cudnn9-py3.9-clang12,
+          pytorch-linux-jammy-cuda11.8-cudnn9-py3.8-clang12,
          pytorch-linux-focal-py3-clang9-android-ndk-r21e,
-          pytorch-linux-jammy-py3.9-gcc11,
+          pytorch-linux-jammy-py3.8-gcc11,
-          pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks,
+          pytorch-linux-jammy-py3.8-gcc11-inductor-benchmarks,
          pytorch-linux-jammy-py3.12-halide,
          pytorch-linux-jammy-xpu-2024.0-py3,
          pytorch-linux-jammy-py3-clang15-asan,
@ -78,7 +68,7 @@ jobs:
          - docker-image-name: pytorch-linux-jammy-aarch64-py3.10-gcc11-inductor-benchmarks
            runner: linux.arm64.m7g.4xlarge
            timeout-minutes: 600
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}${{ matrix.runner }}"
+    runs-on: [self-hosted, "${{ matrix.runner }}"]
    env:
      DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/${{ matrix.docker-image-name }}
    steps:
--- a/.github/workflows/docker-release.yml
+++ b/.github/workflows/docker-release.yml
@ -34,19 +34,9 @@ env:
 permissions: read-all
 jobs:
  get-label-type:
    name: get-label-type
    uses: ./.github/workflows/_runner-determinator.yml
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
      curr_branch: ${{ github.head_ref || github.ref_name }}
      curr_ref_type: ${{ github.ref_type }}
  generate-matrix:
    if: github.repository_owner == 'pytorch'
-    needs: get-label-type
+    runs-on: [self-hosted, linux.large]
    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.large"
    outputs:
      matrix: ${{ steps.generate-matrix.outputs.matrix }}
    steps:
@ -64,12 +54,10 @@ jobs:
  build:
    if: ${{ github.repository == 'pytorch/pytorch' }}
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge"
+    runs-on: [self-hosted, linux.2xlarge]
    environment: ${{ (github.ref == 'refs/heads/nightly' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
    timeout-minutes: 240
-    needs:
+    needs: generate-matrix
      - generate-matrix
      - get-label-type
    strategy:
      matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }}
      fail-fast: false
--- a/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
+++ b/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
@ -58,7 +58,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
      use_split_build: False
      DESIRED_PYTHON: "3.9"
      runs_on: linux.arm64.m7g.4xlarge.ephemeral
      ALPINE_IMAGE: "arm64v8/alpine"
@ -82,7 +81,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
      use_split_build: False
      DESIRED_PYTHON: "3.9"
      build_name: manywheel-py3_9-cpu-aarch64
      build_environment: linux-aarch64-binary-manywheel
@ -105,7 +103,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
      use_split_build: False
      DESIRED_PYTHON: "3.9"
      build_name: manywheel-py3_9-cpu-aarch64
    secrets:
@ -128,7 +125,6 @@ jobs:
      GPU_ARCH_TYPE: cuda-aarch64
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-main
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.9"
      runs_on: linux.arm64.m7g.4xlarge.ephemeral
      ALPINE_IMAGE: "arm64v8/alpine"
@ -153,7 +149,6 @@ jobs:
      GPU_ARCH_TYPE: cuda-aarch64
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-main
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.9"
      build_name: manywheel-py3_9-cuda-aarch64
    secrets:
@ -175,7 +170,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
      use_split_build: False
      DESIRED_PYTHON: "3.10"
      runs_on: linux.arm64.m7g.4xlarge.ephemeral
      ALPINE_IMAGE: "arm64v8/alpine"
@ -199,7 +193,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
      use_split_build: False
      DESIRED_PYTHON: "3.10"
      build_name: manywheel-py3_10-cpu-aarch64
      build_environment: linux-aarch64-binary-manywheel
@ -222,7 +215,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
      use_split_build: False
      DESIRED_PYTHON: "3.10"
      build_name: manywheel-py3_10-cpu-aarch64
    secrets:
@ -245,7 +237,6 @@ jobs:
      GPU_ARCH_TYPE: cuda-aarch64
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-main
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.10"
      runs_on: linux.arm64.m7g.4xlarge.ephemeral
      ALPINE_IMAGE: "arm64v8/alpine"
@ -270,7 +261,6 @@ jobs:
      GPU_ARCH_TYPE: cuda-aarch64
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-main
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.10"
      build_name: manywheel-py3_10-cuda-aarch64
    secrets:
@ -292,7 +282,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
      use_split_build: False
      DESIRED_PYTHON: "3.11"
      runs_on: linux.arm64.m7g.4xlarge.ephemeral
      ALPINE_IMAGE: "arm64v8/alpine"
@ -316,7 +305,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
      use_split_build: False
      DESIRED_PYTHON: "3.11"
      build_name: manywheel-py3_11-cpu-aarch64
      build_environment: linux-aarch64-binary-manywheel
@ -339,7 +327,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
      use_split_build: False
      DESIRED_PYTHON: "3.11"
      build_name: manywheel-py3_11-cpu-aarch64
    secrets:
@ -362,7 +349,6 @@ jobs:
      GPU_ARCH_TYPE: cuda-aarch64
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-main
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.11"
      runs_on: linux.arm64.m7g.4xlarge.ephemeral
      ALPINE_IMAGE: "arm64v8/alpine"
@ -387,7 +373,6 @@ jobs:
      GPU_ARCH_TYPE: cuda-aarch64
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-main
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.11"
      build_name: manywheel-py3_11-cuda-aarch64
    secrets:
@ -409,7 +394,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
      use_split_build: False
      DESIRED_PYTHON: "3.12"
      runs_on: linux.arm64.m7g.4xlarge.ephemeral
      ALPINE_IMAGE: "arm64v8/alpine"
@ -433,7 +417,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
      use_split_build: False
      DESIRED_PYTHON: "3.12"
      build_name: manywheel-py3_12-cpu-aarch64
      build_environment: linux-aarch64-binary-manywheel
@ -456,7 +439,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
      use_split_build: False
      DESIRED_PYTHON: "3.12"
      build_name: manywheel-py3_12-cpu-aarch64
    secrets:
@ -479,7 +461,6 @@ jobs:
      GPU_ARCH_TYPE: cuda-aarch64
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-main
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.12"
      runs_on: linux.arm64.m7g.4xlarge.ephemeral
      ALPINE_IMAGE: "arm64v8/alpine"
@ -504,7 +485,6 @@ jobs:
      GPU_ARCH_TYPE: cuda-aarch64
      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-main
      DESIRED_DEVTOOLSET: cxx11-abi
      use_split_build: False
      DESIRED_PYTHON: "3.12"
      build_name: manywheel-py3_12-cuda-aarch64
    secrets:
--- a/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-nightly.yml
+++ b/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-nightly.yml
@ -366,7 +366,7 @@ jobs:
    steps:
      - name: Setup ROCm
        uses: ./.github/actions/setup-rocm
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: libtorch-rocm6_1-shared-with-deps-cxx11-abi
@ -476,7 +476,7 @@ jobs:
    steps:
      - name: Setup ROCm
        uses: ./.github/actions/setup-rocm
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: libtorch-rocm6_2-shared-with-deps-cxx11-abi
--- a/.github/workflows/generated-linux-binary-libtorch-pre-cxx11-nightly.yml
+++ b/.github/workflows/generated-linux-binary-libtorch-pre-cxx11-nightly.yml
@ -366,7 +366,7 @@ jobs:
    steps:
      - name: Setup ROCm
        uses: ./.github/actions/setup-rocm
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: libtorch-rocm6_1-shared-with-deps-pre-cxx11
@ -476,7 +476,7 @@ jobs:
    steps:
      - name: Setup ROCm
        uses: ./.github/actions/setup-rocm
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: libtorch-rocm6_2-shared-with-deps-pre-cxx11
--- a/.github/workflows/generated-linux-binary-manywheel-main.yml
+++ b/.github/workflows/generated-linux-binary-manywheel-main.yml
@ -54,7 +54,6 @@ jobs:
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
      use_split_build: False
      DESIRED_PYTHON: "3.9"
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      build_name: manywheel-py3_9-cuda11_8
@ -78,7 +77,6 @@ jobs:
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
      use_split_build: False
      DESIRED_PYTHON: "3.9"
      build_name: manywheel-py3_9-cuda11_8
      build_environment: linux-binary-manywheel
@ -87,6 +85,53 @@ jobs:
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
  manywheel-py3_9-cuda11_8-split-build:
    if: ${{ github.repository_owner == 'pytorch' }}
    uses: ./.github/workflows/_binary-build-linux.yml
    needs: get-label-type
    with:
      PYTORCH_ROOT: /pytorch
      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
      use_split_build: True
      DESIRED_PYTHON: "3.9"
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      build_name: manywheel-py3_9-cuda11_8-split
      build_environment: linux-binary-manywheel
      PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
  manywheel-py3_9-cuda11_8-split-test:  # Testing
    if: ${{ github.repository_owner == 'pytorch' }}
    needs:
      - manywheel-py3_9-cuda11_8-split-build
      - get-label-type
    uses: ./.github/workflows/_binary-test-linux.yml
    with:
      PYTORCH_ROOT: /pytorch
      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
      use_split_build: True
      DESIRED_PYTHON: "3.9"
      build_name: manywheel-py3_9-cuda11_8-split
      build_environment: linux-binary-manywheel
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      runs_on: linux.4xlarge.nvidia.gpu
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
  manywheel-py3_9-cuda12_1-build:
    if: ${{ github.repository_owner == 'pytorch' }}
    uses: ./.github/workflows/_binary-build-linux.yml
@ -101,7 +146,6 @@ jobs:
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
      use_split_build: False
      DESIRED_PYTHON: "3.9"
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      build_name: manywheel-py3_9-cuda12_1
@ -125,7 +169,6 @@ jobs:
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
      use_split_build: False
      DESIRED_PYTHON: "3.9"
      build_name: manywheel-py3_9-cuda12_1
      build_environment: linux-binary-manywheel
@ -134,6 +177,53 @@ jobs:
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
  manywheel-py3_9-cuda12_1-split-build:
    if: ${{ github.repository_owner == 'pytorch' }}
    uses: ./.github/workflows/_binary-build-linux.yml
    needs: get-label-type
    with:
      PYTORCH_ROOT: /pytorch
      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
      use_split_build: True
      DESIRED_PYTHON: "3.9"
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      build_name: manywheel-py3_9-cuda12_1-split
      build_environment: linux-binary-manywheel
      PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
  manywheel-py3_9-cuda12_1-split-test:  # Testing
    if: ${{ github.repository_owner == 'pytorch' }}
    needs:
      - manywheel-py3_9-cuda12_1-split-build
      - get-label-type
    uses: ./.github/workflows/_binary-test-linux.yml
    with:
      PYTORCH_ROOT: /pytorch
      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
      use_split_build: True
      DESIRED_PYTHON: "3.9"
      build_name: manywheel-py3_9-cuda12_1-split
      build_environment: linux-binary-manywheel
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      runs_on: linux.4xlarge.nvidia.gpu
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
  manywheel-py3_9-cuda12_4-build:
    if: ${{ github.repository_owner == 'pytorch' }}
    uses: ./.github/workflows/_binary-build-linux.yml
@ -148,7 +238,6 @@ jobs:
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
      use_split_build: False
      DESIRED_PYTHON: "3.9"
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      build_name: manywheel-py3_9-cuda12_4
@ -172,7 +261,6 @@ jobs:
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
      use_split_build: False
      DESIRED_PYTHON: "3.9"
      build_name: manywheel-py3_9-cuda12_4
      build_environment: linux-binary-manywheel
@ -180,3 +268,50 @@ jobs:
      runs_on: linux.4xlarge.nvidia.gpu
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
  manywheel-py3_9-cuda12_4-split-build:
    if: ${{ github.repository_owner == 'pytorch' }}
    uses: ./.github/workflows/_binary-build-linux.yml
    needs: get-label-type
    with:
      PYTORCH_ROOT: /pytorch
      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
      use_split_build: True
      DESIRED_PYTHON: "3.9"
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      build_name: manywheel-py3_9-cuda12_4-split
      build_environment: linux-binary-manywheel
      PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
  manywheel-py3_9-cuda12_4-split-test:  # Testing
    if: ${{ github.repository_owner == 'pytorch' }}
    needs:
      - manywheel-py3_9-cuda12_4-split-build
      - get-label-type
    uses: ./.github/workflows/_binary-test-linux.yml
    with:
      PYTORCH_ROOT: /pytorch
      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
      use_split_build: True
      DESIRED_PYTHON: "3.9"
      build_name: manywheel-py3_9-cuda12_4-split
      build_environment: linux-binary-manywheel
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      runs_on: linux.4xlarge.nvidia.gpu
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/generated-linux-binary-manywheel-nightly.yml
+++ b/.github/workflows/generated-linux-binary-manywheel-nightly.yml
--- a/.github/workflows/generated-linux-binary-manywheel-split-main.yml
+++ b/.github/workflows/generated-linux-binary-manywheel-split-main.yml
@ -1,182 +0,0 @@
 # @generated DO NOT EDIT MANUALLY
 # Template is at:    .github/templates/linux_binary_build_workflow.yml.j2
 # Generation script: .github/scripts/generate_ci_workflows.py
 name: linux-binary-manywheel-split
 on:
  push:
    branches:
      - main
    tags:
      - 'ciflow/periodic/*'
  workflow_dispatch:
 env:
  # Needed for conda builds
  ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
  ANACONDA_USER: pytorch
  AWS_DEFAULT_REGION: us-east-1
  BINARY_ENV_FILE: /tmp/env
  BUILD_ENVIRONMENT: linux-binary-manywheel-split
  BUILDER_ROOT: /builder
  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
  PR_NUMBER: ${{ github.event.pull_request.number }}
  PYTORCH_FINAL_PACKAGE_DIR: /artifacts
  PYTORCH_ROOT: /pytorch
  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
  SKIP_ALL_TESTS: 0
 concurrency:
  group: linux-binary-manywheel-split-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
  cancel-in-progress: true
 jobs:
  get-label-type:
    name: get-label-type
    uses: ./.github/workflows/_runner-determinator.yml
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
      curr_branch: ${{ github.head_ref || github.ref_name }}
      curr_ref_type: ${{ github.ref_type }}
  manywheel-py3_9-cuda11_8-build:
    if: ${{ github.repository_owner == 'pytorch' }}
    uses: ./.github/workflows/_binary-build-linux.yml
    needs: get-label-type
    with:
      PYTORCH_ROOT: /pytorch
      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
      use_split_build: True
      DESIRED_PYTHON: "3.9"
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      build_name: manywheel-py3_9-cuda11_8
      build_environment: linux-binary-manywheel-split
      PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
  manywheel-py3_9-cuda11_8-test:  # Testing
    if: ${{ github.repository_owner == 'pytorch' }}
    needs:
      - manywheel-py3_9-cuda11_8-build
      - get-label-type
    uses: ./.github/workflows/_binary-test-linux.yml
    with:
      PYTORCH_ROOT: /pytorch
      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
      use_split_build: True
      DESIRED_PYTHON: "3.9"
      build_name: manywheel-py3_9-cuda11_8
      build_environment: linux-binary-manywheel-split
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      runs_on: linux.4xlarge.nvidia.gpu
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
  manywheel-py3_9-cuda12_1-build:
    if: ${{ github.repository_owner == 'pytorch' }}
    uses: ./.github/workflows/_binary-build-linux.yml
    needs: get-label-type
    with:
      PYTORCH_ROOT: /pytorch
      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
      use_split_build: True
      DESIRED_PYTHON: "3.9"
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      build_name: manywheel-py3_9-cuda12_1
      build_environment: linux-binary-manywheel-split
      PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
  manywheel-py3_9-cuda12_1-test:  # Testing
    if: ${{ github.repository_owner == 'pytorch' }}
    needs:
      - manywheel-py3_9-cuda12_1-build
      - get-label-type
    uses: ./.github/workflows/_binary-test-linux.yml
    with:
      PYTORCH_ROOT: /pytorch
      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
      use_split_build: True
      DESIRED_PYTHON: "3.9"
      build_name: manywheel-py3_9-cuda12_1
      build_environment: linux-binary-manywheel-split
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      runs_on: linux.4xlarge.nvidia.gpu
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
  manywheel-py3_9-cuda12_4-build:
    if: ${{ github.repository_owner == 'pytorch' }}
    uses: ./.github/workflows/_binary-build-linux.yml
    needs: get-label-type
    with:
      PYTORCH_ROOT: /pytorch
      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
      use_split_build: True
      DESIRED_PYTHON: "3.9"
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      build_name: manywheel-py3_9-cuda12_4
      build_environment: linux-binary-manywheel-split
      PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
  manywheel-py3_9-cuda12_4-test:  # Testing
    if: ${{ github.repository_owner == 'pytorch' }}
    needs:
      - manywheel-py3_9-cuda12_4-build
      - get-label-type
    uses: ./.github/workflows/_binary-test-linux.yml
    with:
      PYTORCH_ROOT: /pytorch
      BUILDER_ROOT: /builder
      PACKAGE_TYPE: manywheel
      # TODO: This is a legacy variable that we eventually want to get rid of in
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
      use_split_build: True
      DESIRED_PYTHON: "3.9"
      build_name: manywheel-py3_9-cuda12_4
      build_environment: linux-binary-manywheel-split
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      runs_on: linux.4xlarge.nvidia.gpu
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/generated-linux-binary-manywheel-split-nightly.yml
+++ b/.github/workflows/generated-linux-binary-manywheel-split-nightly.yml
--- a/.github/workflows/generated-linux-s390x-binary-manywheel-nightly.yml
+++ b/.github/workflows/generated-linux-s390x-binary-manywheel-nightly.yml
@ -58,7 +58,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
      use_split_build: False
      DESIRED_PYTHON: "3.9"
      runs_on: linux.s390x
      ALPINE_IMAGE: "docker.io/s390x/alpine"
@ -82,7 +81,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
      use_split_build: False
      DESIRED_PYTHON: "3.9"
      build_name: manywheel-py3_9-cpu-s390x
      build_environment: linux-s390x-binary-manywheel
@ -105,7 +103,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
      use_split_build: False
      DESIRED_PYTHON: "3.9"
      build_name: manywheel-py3_9-cpu-s390x
    secrets:
@ -127,7 +124,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
      use_split_build: False
      DESIRED_PYTHON: "3.10"
      runs_on: linux.s390x
      ALPINE_IMAGE: "docker.io/s390x/alpine"
@ -151,7 +147,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
      use_split_build: False
      DESIRED_PYTHON: "3.10"
      build_name: manywheel-py3_10-cpu-s390x
      build_environment: linux-s390x-binary-manywheel
@ -174,7 +169,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
      use_split_build: False
      DESIRED_PYTHON: "3.10"
      build_name: manywheel-py3_10-cpu-s390x
    secrets:
@ -196,7 +190,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
      use_split_build: False
      DESIRED_PYTHON: "3.11"
      runs_on: linux.s390x
      ALPINE_IMAGE: "docker.io/s390x/alpine"
@ -220,7 +213,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
      use_split_build: False
      DESIRED_PYTHON: "3.11"
      build_name: manywheel-py3_11-cpu-s390x
      build_environment: linux-s390x-binary-manywheel
@ -243,7 +235,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
      use_split_build: False
      DESIRED_PYTHON: "3.11"
      build_name: manywheel-py3_11-cpu-s390x
    secrets:
@ -265,7 +256,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
      use_split_build: False
      DESIRED_PYTHON: "3.12"
      runs_on: linux.s390x
      ALPINE_IMAGE: "docker.io/s390x/alpine"
@ -289,7 +279,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
      use_split_build: False
      DESIRED_PYTHON: "3.12"
      build_name: manywheel-py3_12-cpu-s390x
      build_environment: linux-s390x-binary-manywheel
@ -312,7 +301,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
      use_split_build: False
      DESIRED_PYTHON: "3.12"
      build_name: manywheel-py3_12-cpu-s390x
    secrets:
@ -334,7 +322,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
      use_split_build: False
      DESIRED_PYTHON: "3.13"
      runs_on: linux.s390x
      ALPINE_IMAGE: "docker.io/s390x/alpine"
@ -358,7 +345,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
      use_split_build: False
      DESIRED_PYTHON: "3.13"
      build_name: manywheel-py3_13-cpu-s390x
      build_environment: linux-s390x-binary-manywheel
@ -381,7 +367,6 @@ jobs:
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
      use_split_build: False
      DESIRED_PYTHON: "3.13"
      build_name: manywheel-py3_13-cpu-s390x
    secrets:
--- a/.github/workflows/generated-macos-arm64-binary-conda-nightly.yml
+++ b/.github/workflows/generated-macos-arm64-binary-conda-nightly.yml
@ -117,7 +117,7 @@ jobs:
          # shellcheck disable=SC1091
          source "${RUNNER_TEMP}/anaconda/bin/activate"
          "${PYTORCH_ROOT}/.circleci/scripts/binary_macos_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: conda-py3_9-cpu
@ -232,7 +232,7 @@ jobs:
          # shellcheck disable=SC1091
          source "${RUNNER_TEMP}/anaconda/bin/activate"
          "${PYTORCH_ROOT}/.circleci/scripts/binary_macos_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: conda-py3_10-cpu
@ -347,7 +347,7 @@ jobs:
          # shellcheck disable=SC1091
          source "${RUNNER_TEMP}/anaconda/bin/activate"
          "${PYTORCH_ROOT}/.circleci/scripts/binary_macos_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: conda-py3_11-cpu
@ -462,7 +462,7 @@ jobs:
          # shellcheck disable=SC1091
          source "${RUNNER_TEMP}/anaconda/bin/activate"
          "${PYTORCH_ROOT}/.circleci/scripts/binary_macos_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: conda-py3_12-cpu
--- a/.github/workflows/generated-macos-arm64-binary-libtorch-cxx11-abi-nightly.yml
+++ b/.github/workflows/generated-macos-arm64-binary-libtorch-cxx11-abi-nightly.yml
@ -49,7 +49,7 @@ jobs:
      DESIRED_DEVTOOLSET: cxx11-abi
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
    steps:
      # NOTE: These environment variables are put here so that they can be applied on every job equally
      #       They are also here because setting them at a workflow level doesn't give us access to the
@ -121,7 +121,7 @@ jobs:
          # shellcheck disable=SC1091
          source "${RUNNER_TEMP}/anaconda/bin/activate"
          "${PYTORCH_ROOT}/.circleci/scripts/binary_macos_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: libtorch-cpu-shared-with-deps-cxx11-abi
--- a/.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml
+++ b/.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml
@ -118,7 +118,7 @@ jobs:
          # shellcheck disable=SC1091
          source "${RUNNER_TEMP}/anaconda/bin/activate"
          "${PYTORCH_ROOT}/.circleci/scripts/binary_macos_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: wheel-py3_9-cpu
@ -234,7 +234,7 @@ jobs:
          # shellcheck disable=SC1091
          source "${RUNNER_TEMP}/anaconda/bin/activate"
          "${PYTORCH_ROOT}/.circleci/scripts/binary_macos_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: wheel-py3_10-cpu
@ -350,7 +350,7 @@ jobs:
          # shellcheck disable=SC1091
          source "${RUNNER_TEMP}/anaconda/bin/activate"
          "${PYTORCH_ROOT}/.circleci/scripts/binary_macos_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: wheel-py3_11-cpu
@ -466,7 +466,7 @@ jobs:
          # shellcheck disable=SC1091
          source "${RUNNER_TEMP}/anaconda/bin/activate"
          "${PYTORCH_ROOT}/.circleci/scripts/binary_macos_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: wheel-py3_12-cpu
--- a/.github/workflows/generated-windows-binary-conda-nightly.yml
+++ b/.github/workflows/generated-windows-binary-conda-nightly.yml
@ -132,7 +132,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: conda-py3_9-cpu
@ -213,7 +213,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: conda-py3_9-cpu
@ -378,7 +378,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: conda-py3_9-cuda11_8
@ -403,7 +403,7 @@ jobs:
    needs:
      - conda-py3_9-cuda11_8-build
      - get-label-type
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
+    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.8xlarge.nvidia.gpu"
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -460,7 +460,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: conda-py3_9-cuda11_8
@ -626,7 +626,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: conda-py3_9-cuda12_1
@ -651,7 +651,7 @@ jobs:
    needs:
      - conda-py3_9-cuda12_1-build
      - get-label-type
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
+    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.8xlarge.nvidia.gpu"
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -708,7 +708,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: conda-py3_9-cuda12_1
@ -874,7 +874,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: conda-py3_9-cuda12_4
@ -899,7 +899,7 @@ jobs:
    needs:
      - conda-py3_9-cuda12_4-build
      - get-label-type
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
+    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.8xlarge.nvidia.gpu"
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -956,7 +956,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: conda-py3_9-cuda12_4
@ -1121,7 +1121,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: conda-py3_10-cpu
@ -1202,7 +1202,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: conda-py3_10-cpu
@ -1367,7 +1367,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: conda-py3_10-cuda11_8
@ -1392,7 +1392,7 @@ jobs:
    needs:
      - conda-py3_10-cuda11_8-build
      - get-label-type
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
+    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.8xlarge.nvidia.gpu"
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -1449,7 +1449,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: conda-py3_10-cuda11_8
@ -1615,7 +1615,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: conda-py3_10-cuda12_1
@ -1640,7 +1640,7 @@ jobs:
    needs:
      - conda-py3_10-cuda12_1-build
      - get-label-type
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
+    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.8xlarge.nvidia.gpu"
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -1697,7 +1697,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: conda-py3_10-cuda12_1
@ -1863,7 +1863,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: conda-py3_10-cuda12_4
@ -1888,7 +1888,7 @@ jobs:
    needs:
      - conda-py3_10-cuda12_4-build
      - get-label-type
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
+    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.8xlarge.nvidia.gpu"
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -1945,7 +1945,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: conda-py3_10-cuda12_4
@ -2110,7 +2110,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: conda-py3_11-cpu
@ -2191,7 +2191,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: conda-py3_11-cpu
@ -2356,7 +2356,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: conda-py3_11-cuda11_8
@ -2381,7 +2381,7 @@ jobs:
    needs:
      - conda-py3_11-cuda11_8-build
      - get-label-type
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
+    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.8xlarge.nvidia.gpu"
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -2438,7 +2438,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: conda-py3_11-cuda11_8
@ -2604,7 +2604,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: conda-py3_11-cuda12_1
@ -2629,7 +2629,7 @@ jobs:
    needs:
      - conda-py3_11-cuda12_1-build
      - get-label-type
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
+    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.8xlarge.nvidia.gpu"
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -2686,7 +2686,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: conda-py3_11-cuda12_1
@ -2852,7 +2852,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: conda-py3_11-cuda12_4
@ -2877,7 +2877,7 @@ jobs:
    needs:
      - conda-py3_11-cuda12_4-build
      - get-label-type
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
+    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.8xlarge.nvidia.gpu"
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -2934,7 +2934,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: conda-py3_11-cuda12_4
@ -3099,7 +3099,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: conda-py3_12-cpu
@ -3180,7 +3180,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: conda-py3_12-cpu
@ -3345,7 +3345,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: conda-py3_12-cuda11_8
@ -3370,7 +3370,7 @@ jobs:
    needs:
      - conda-py3_12-cuda11_8-build
      - get-label-type
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
+    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.8xlarge.nvidia.gpu"
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -3427,7 +3427,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: conda-py3_12-cuda11_8
@ -3593,7 +3593,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: conda-py3_12-cuda12_1
@ -3618,7 +3618,7 @@ jobs:
    needs:
      - conda-py3_12-cuda12_1-build
      - get-label-type
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
+    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.8xlarge.nvidia.gpu"
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -3675,7 +3675,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: conda-py3_12-cuda12_1
@ -3841,7 +3841,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: conda-py3_12-cuda12_4
@ -3866,7 +3866,7 @@ jobs:
    needs:
      - conda-py3_12-cuda12_4-build
      - get-label-type
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
+    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.8xlarge.nvidia.gpu"
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -3923,7 +3923,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: conda-py3_12-cuda12_4
--- a/.github/workflows/generated-windows-binary-libtorch-debug-main.yml
+++ b/.github/workflows/generated-windows-binary-libtorch-debug-main.yml
@ -51,7 +51,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
    steps:
      - name: Display EC2 information
        shell: bash
@ -129,7 +129,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: libtorch-cpu-shared-with-deps-debug
@ -169,7 +169,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
    steps:
      - name: Display EC2 information
        shell: bash
@ -214,7 +214,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: libtorch-cpu-shared-with-deps-debug
--- a/.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml
+++ b/.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml
@ -58,7 +58,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
    steps:
      - name: Display EC2 information
        shell: bash
@ -136,7 +136,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: libtorch-cpu-shared-with-deps-debug
@ -176,7 +176,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
    steps:
      - name: Display EC2 information
        shell: bash
@ -221,7 +221,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: libtorch-cpu-shared-with-deps-debug
@ -290,7 +290,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
      build_name: libtorch-cpu-shared-with-deps-debug
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
@ -316,7 +316,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
    steps:
      - name: Display EC2 information
        shell: bash
@ -394,7 +394,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: libtorch-cuda11_8-shared-with-deps-debug
@ -419,7 +419,7 @@ jobs:
    needs:
      - libtorch-cuda11_8-shared-with-deps-debug-build
      - get-label-type
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
+    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.8xlarge.nvidia.gpu"
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -435,7 +435,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
    steps:
      - name: Display EC2 information
        shell: bash
@ -480,7 +480,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: libtorch-cuda11_8-shared-with-deps-debug
@ -550,7 +550,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
      build_name: libtorch-cuda11_8-shared-with-deps-debug
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
@ -576,7 +576,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
    steps:
      - name: Display EC2 information
        shell: bash
@ -654,7 +654,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: libtorch-cuda12_1-shared-with-deps-debug
@ -679,7 +679,7 @@ jobs:
    needs:
      - libtorch-cuda12_1-shared-with-deps-debug-build
      - get-label-type
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
+    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.8xlarge.nvidia.gpu"
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -695,7 +695,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
    steps:
      - name: Display EC2 information
        shell: bash
@ -740,7 +740,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: libtorch-cuda12_1-shared-with-deps-debug
@ -810,7 +810,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
      build_name: libtorch-cuda12_1-shared-with-deps-debug
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
@ -836,7 +836,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
    steps:
      - name: Display EC2 information
        shell: bash
@ -914,7 +914,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: libtorch-cuda12_4-shared-with-deps-debug
@ -939,7 +939,7 @@ jobs:
    needs:
      - libtorch-cuda12_4-shared-with-deps-debug-build
      - get-label-type
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
+    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.8xlarge.nvidia.gpu"
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -955,7 +955,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
    steps:
      - name: Display EC2 information
        shell: bash
@ -1000,7 +1000,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: libtorch-cuda12_4-shared-with-deps-debug
@ -1070,7 +1070,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
      build_name: libtorch-cuda12_4-shared-with-deps-debug
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/generated-windows-binary-libtorch-release-main.yml
+++ b/.github/workflows/generated-windows-binary-libtorch-release-main.yml
@ -51,7 +51,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
    steps:
      - name: Display EC2 information
        shell: bash
@ -129,7 +129,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: libtorch-cpu-shared-with-deps-release
@ -169,7 +169,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
    steps:
      - name: Display EC2 information
        shell: bash
@ -214,7 +214,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: libtorch-cpu-shared-with-deps-release
--- a/.github/workflows/generated-windows-binary-libtorch-release-nightly.yml
+++ b/.github/workflows/generated-windows-binary-libtorch-release-nightly.yml
@ -58,7 +58,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
    steps:
      - name: Display EC2 information
        shell: bash
@ -136,7 +136,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: libtorch-cpu-shared-with-deps-release
@ -176,7 +176,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
    steps:
      - name: Display EC2 information
        shell: bash
@ -221,7 +221,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: libtorch-cpu-shared-with-deps-release
@ -290,7 +290,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
      build_name: libtorch-cpu-shared-with-deps-release
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
@ -316,7 +316,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
    steps:
      - name: Display EC2 information
        shell: bash
@ -394,7 +394,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: libtorch-cuda11_8-shared-with-deps-release
@ -419,7 +419,7 @@ jobs:
    needs:
      - libtorch-cuda11_8-shared-with-deps-release-build
      - get-label-type
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
+    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.8xlarge.nvidia.gpu"
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -435,7 +435,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
    steps:
      - name: Display EC2 information
        shell: bash
@ -480,7 +480,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: libtorch-cuda11_8-shared-with-deps-release
@ -550,7 +550,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
      build_name: libtorch-cuda11_8-shared-with-deps-release
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
@ -576,7 +576,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
    steps:
      - name: Display EC2 information
        shell: bash
@ -654,7 +654,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: libtorch-cuda12_1-shared-with-deps-release
@ -679,7 +679,7 @@ jobs:
    needs:
      - libtorch-cuda12_1-shared-with-deps-release-build
      - get-label-type
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
+    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.8xlarge.nvidia.gpu"
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -695,7 +695,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
    steps:
      - name: Display EC2 information
        shell: bash
@ -740,7 +740,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: libtorch-cuda12_1-shared-with-deps-release
@ -810,7 +810,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
      build_name: libtorch-cuda12_1-shared-with-deps-release
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
@ -836,7 +836,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
    steps:
      - name: Display EC2 information
        shell: bash
@ -914,7 +914,7 @@ jobs:
        shell: bash
        run: |
          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: actions/upload-artifact@v4.4.0
+      - uses: actions/upload-artifact@v3
        if: always()
        with:
          name: libtorch-cuda12_4-shared-with-deps-release
@ -939,7 +939,7 @@ jobs:
    needs:
      - libtorch-cuda12_4-shared-with-deps-release-build
      - get-label-type
-    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.g4dn.xlarge"
+    runs-on: "${{ needs.get-label-type.outputs.label-type }}windows.8xlarge.nvidia.gpu"
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -955,7 +955,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
    steps:
      - name: Display EC2 information
        shell: bash
@ -1000,7 +1000,7 @@ jobs:
          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-      - uses: actions/download-artifact@v4.1.7
+      - uses: actions/download-artifact@v3
        name: Download Build Artifacts
        with:
          name: libtorch-cuda12_4-shared-with-deps-release
@ -1070,7 +1070,7 @@ jobs:
      LIBTORCH_VARIANT: shared-with-deps
      # This is a dummy value for libtorch to work correctly with our batch scripts
      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.9"
+      DESIRED_PYTHON: "3.8"
      build_name: libtorch-cuda12_4-shared-with-deps-release
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/generated-windows-binary-wheel-nightly.yml
+++ b/.github/workflows/generated-windows-binary-wheel-nightly.yml
--- a/.github/workflows/inductor-cu124.yml
+++ b/.github/workflows/inductor-cu124.yml
@ -18,22 +18,11 @@ concurrency:
 permissions: read-all
 jobs:
  get-label-type:
    name: get-label-type
    uses: ./.github/workflows/_runner-determinator.yml
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
      curr_branch: ${{ github.head_ref || github.ref_name }}
      curr_ref_type: ${{ github.ref_type }}
  linux-focal-cuda12_4-py3_10-gcc9-inductor-build:
    # Should be synced with the one in inductor.yml, but this doesn't run inductor_timm
    name: cuda12.4-py3.10-gcc9-sm86
    uses: ./.github/workflows/_linux-build.yml
    needs: get-label-type
    with:
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      sync-tag: linux-focal-cuda12_4-py3_10-gcc9-inductor-build
      build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86
      docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks
--- a/.github/workflows/inductor-micro-benchmark-x86.yml
+++ b/.github/workflows/inductor-micro-benchmark-x86.yml
@ -1,40 +0,0 @@
 name: inductor-micro-benchmark-x86
 on:
  schedule:
    - cron: 0 7 * * *
  push:
    tags:
      - ciflow/inductor-micro-benchmark-cpu-x86/*
  workflow_dispatch:
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
  cancel-in-progress: true
 permissions: read-all
 jobs:
  linux-jammy-cpu-py3_9-gcc11-inductor-build:
    name: linux-jammy-cpu-py3.9-gcc11-inductor
    uses: ./.github/workflows/_linux-build.yml
    with:
      build-environment: linux-jammy-py3.9-gcc11
      docker-image-name: pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks
      # Use metal host for benchmark jobs
      test-matrix: |
        { include: [
          { config: "inductor-micro-benchmark-cpu-x86", shard: 1, num_shards: 1, runner: "linux.24xl.spr-metal" },
        ]}
  linux-jammy-cpu-py3_9-gcc11-inductor-micro-benchmark-test:
    name: linux-jammy-cpu-py3.9-gcc11-inductor
    uses: ./.github/workflows/_linux-test.yml
    needs: linux-jammy-cpu-py3_9-gcc11-inductor-build
    with:
      build-environment: linux-jammy-py3.9-gcc11
      docker-image: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.docker-image }}
      test-matrix: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.test-matrix }}
      use-gha: anything-non-empty-to-use-gha
      timeout-minutes: 720
--- a/.github/workflows/inductor-micro-benchmark.yml
+++ b/.github/workflows/inductor-micro-benchmark.yml
@ -16,21 +16,10 @@ concurrency:
 permissions: read-all
 jobs:
  get-label-type:
    name: get-label-type
    uses: ./.github/workflows/_runner-determinator.yml
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
      curr_branch: ${{ github.head_ref || github.ref_name }}
      curr_ref_type: ${{ github.ref_type }}
  linux-focal-cuda12_1-py3_10-gcc9-inductor-micro-benchmark-build:
    name: cuda12.1-py3.10-gcc9-sm80
    uses: ./.github/workflows/_linux-build.yml
    needs: get-label-type
    with:
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
      docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
      cuda-arch-list: '8.0'
--- a/.github/workflows/inductor-perf-compare.yml
+++ b/.github/workflows/inductor-perf-compare.yml
@ -13,21 +13,10 @@ concurrency:
 permissions: read-all
 jobs:
  get-label-type:
    name: get-label-type
    uses: ./.github/workflows/_runner-determinator.yml
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
      curr_branch: ${{ github.head_ref || github.ref_name }}
      curr_ref_type: ${{ github.ref_type }}
  linux-focal-cuda12_1-py3_10-gcc9-inductor-build:
    name: cuda12.1-py3.10-gcc9-sm80
    uses: ./.github/workflows/_linux-build.yml
    needs: get-label-type
    with:
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
      docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
      cuda-arch-list: '8.0'
--- a/.github/workflows/inductor-perf-test-nightly-a10g.yml
+++ b/.github/workflows/inductor-perf-test-nightly-a10g.yml
@ -68,21 +68,10 @@ concurrency:
 permissions: read-all
 jobs:
  get-label-type:
    name: get-label-type
    uses: ./.github/workflows/_runner-determinator.yml
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
      curr_branch: ${{ github.head_ref || github.ref_name }}
      curr_ref_type: ${{ github.ref_type }}
  linux-focal-cuda12_1-py3_10-gcc9-inductor-build:
    name: cuda12.1-py3.10-gcc9-sm80
    uses: ./.github/workflows/_linux-build.yml
    needs: get-label-type
    with:
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
      docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
      cuda-arch-list: '8.0'
--- a/.github/workflows/inductor-perf-test-nightly-aarch64.yml
+++ b/.github/workflows/inductor-perf-test-nightly-aarch64.yml
@ -5,7 +5,9 @@ on:
    # - cron: 0 7 * * 1-6
    # - cron: 0 7 * * 0
    # Does not perform max_autotune on CPU, so skip the weekly run setup
-    - cron: 0 7 * * *
+    # Run 6 times everyday to see if perf instablity can be reproduced
    # Will change this back
    - cron: 0 */4 * * *
  # NB: GitHub has an upper limit of 10 inputs here
  workflow_dispatch:
    inputs:
@ -48,21 +50,10 @@ concurrency:
 permissions: read-all
 jobs:
  get-label-type:
    name: get-label-type
    uses: ./.github/workflows/_runner-determinator.yml
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
      curr_branch: ${{ github.head_ref || github.ref_name }}
      curr_ref_type: ${{ github.ref_type }}
  linux-jammy-aarch64-py3_10-inductor-build:
    name: linux-jammy-aarch64-py3.10-inductor
    uses: ./.github/workflows/_linux-build.yml
    needs: get-label-type
    with:
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      runner: linux.arm64.m7g.4xlarge
      build-environment: linux-jammy-aarch64-py3.10
      docker-image-name: pytorch-linux-jammy-aarch64-py3.10-gcc11-inductor-benchmarks
@ -114,7 +105,7 @@ jobs:
    name: linux-jammy-aarch64-py3.10-inductor
    uses: ./.github/workflows/_linux-test.yml
    needs: linux-jammy-aarch64-py3_10-inductor-build
-    if: github.event.schedule == '0 7 * * *'
+    if: github.event.schedule == '0 */4 * * *'
    with:
      build-environment: linux-jammy-aarch64-py3.10
      # Turn off dynamic-shapes and aotinductor tests for now, to have faster iteration for debugging perf instability.
--- a/.github/workflows/inductor-perf-test-nightly-x86.yml
+++ b/.github/workflows/inductor-perf-test-nightly-x86.yml
@ -48,23 +48,12 @@ concurrency:
 permissions: read-all
 jobs:
-  get-label-type:
+  linux-jammy-cpu-py3_8-gcc11-inductor-build:
-    name: get-label-type
+    name: linux-jammy-cpu-py3.8-gcc11-inductor
    uses: ./.github/workflows/_runner-determinator.yml
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
      curr_branch: ${{ github.head_ref || github.ref_name }}
      curr_ref_type: ${{ github.ref_type }}
  linux-jammy-cpu-py3_9-gcc11-inductor-build:
    name: linux-jammy-cpu-py3.9-gcc11-inductor
    uses: ./.github/workflows/_linux-build.yml
    needs: get-label-type
    with:
-      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
+      build-environment: linux-jammy-py3.8-gcc11-build
-      build-environment: linux-jammy-py3.9-gcc11-build
+      docker-image-name: pytorch-linux-jammy-py3.8-gcc11-inductor-benchmarks
      docker-image-name: pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks
      test-matrix: |
        { include: [
          { config: "inductor_huggingface_perf_cpu_x86", shard: 1, num_shards: 3, runner: "linux.24xl.spr-metal" },
@ -85,32 +74,32 @@ jobs:
      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-  linux-jammy-cpu-py3_9-gcc11-inductor-test-nightly:
+  linux-jammy-cpu-py3_8-gcc11-inductor-test-nightly:
-    name: linux-jammy-cpu-py3.9-gcc11-inductor
+    name: linux-jammy-cpu-py3.8-gcc11-inductor
    uses: ./.github/workflows/_linux-test.yml
-    needs: linux-jammy-cpu-py3_9-gcc11-inductor-build
+    needs: linux-jammy-cpu-py3_8-gcc11-inductor-build
    if: github.event.schedule == '0 7 * * *'
    with:
-      build-environment: linux-jammy-py3.9-gcc11-build
+      build-environment: linux-jammy-py3.8-gcc11-build
      dashboard-tag: training-false-inference-true-default-true-dynamic-true-aotinductor-true
-      docker-image: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.docker-image }}
+      docker-image: ${{ needs.linux-jammy-cpu-py3_8-gcc11-inductor-build.outputs.docker-image }}
-      test-matrix: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.test-matrix }}
+      test-matrix: ${{ needs.linux-jammy-cpu-py3_8-gcc11-inductor-build.outputs.test-matrix }}
      use-gha: anything-non-empty-to-use-gha
      timeout-minutes: 720
    secrets:
      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-  linux-jammy-cpu-py3_9-gcc11-inductor-test:
+  linux-jammy-cpu-py3_8-gcc11-inductor-test:
-    name: linux-jammy-cpu-py3.9-gcc11-inductor
+    name: linux-jammy-cpu-py3.8-gcc11-inductor
    uses: ./.github/workflows/_linux-test.yml
-    needs: linux-jammy-cpu-py3_9-gcc11-inductor-build
+    needs: linux-jammy-cpu-py3_8-gcc11-inductor-build
    if: github.event_name == 'workflow_dispatch'
    with:
-      build-environment: linux-jammy-py3.9-gcc11-build
+      build-environment: linux-jammy-py3.8-gcc11-build
      dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-aotinductor-${{ inputs.aotinductor }}
-      docker-image: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.docker-image }}
+      docker-image: ${{ needs.linux-jammy-cpu-py3_8-gcc11-inductor-build.outputs.docker-image }}
-      test-matrix: ${{ needs.linux-jammy-cpu-py3_9-gcc11-inductor-build.outputs.test-matrix }}
+      test-matrix: ${{ needs.linux-jammy-cpu-py3_8-gcc11-inductor-build.outputs.test-matrix }}
      use-gha: anything-non-empty-to-use-gha
      timeout-minutes: 720
    secrets:
--- a/.github/workflows/inductor-perf-test-nightly.yml
+++ b/.github/workflows/inductor-perf-test-nightly.yml
@ -66,21 +66,10 @@ concurrency:
 permissions: read-all
 jobs:
  get-label-type:
    name: get-label-type
    uses: ./.github/workflows/_runner-determinator.yml
    with:
      triggering_actor: ${{ github.triggering_actor }}
      issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }}
      curr_branch: ${{ github.head_ref || github.ref_name }}
      curr_ref_type: ${{ github.ref_type }}
  linux-focal-cuda12_1-py3_10-gcc9-inductor-build:
    name: cuda12.1-py3.10-gcc9-sm80
    uses: ./.github/workflows/_linux-build.yml
    needs: get-label-type
    with:
      runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
      docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
      cuda-arch-list: '8.0'
--- a/Show More
+++ b/Show More
`@ -1 +1 @@`
	`cd1c833b079adb324871dcbbe75b43d42ffc0ade`	`69472e5c43481324ad923ceb29392ab72830acee`
		`@ -1 +1 @@`
			`461c12871f336fe6f57b55d6a297f13ef209161b`				`340136fec6d3ebc73e7a19eba1663e9b0ba8ab2d`
`@ -1 +1 @@`
	`91b14bf5593cf58a8541f3e6b9125600a867d4ef`	`1b2f15840e0d70eec50d84c7a0575cb835524def`
`@ -1 +1 @@`
	`5fe38ffd73c2ac6ed6323b554205186696631c6f`	`dedb7bdf339a3546896d4820366ca562c586bfa0`
`@ -1 +1 @@`
	`ba696ea3dfec4cbe693bf06a84c75dc196077f5b`	`97ed7b36b7a741253d4e41e4da3c901d83294503`