[NJT+SDPA]Fix flash_attention output when batch_size=1 and seq_len=1 (#133595 )

* [NJT+SDPA]Fix flash_attention output when batch_size=1 and seq_len=1 (#130652) fix issue #130196 Pull Request resolved: https://github.com/pytorch/pytorch/pull/130652 Approved by: https://github.com/Skylion007, https://github.com/drisspg, https://github.com/jbschlosser (cherry picked from commit 0e79e1f95841041ef689e8a94c8be1e92702b873) * resolve conflict by using old the NT API * fix lint --------- Co-authored-by: yuqingj <yuqingj@meta.com>
Update conda-env-iOS.txt (#134239 )
2025-10-21 21:49:24 +08:00 · 2024-08-22 13:42:22 -04:00 · 2024-08-22 10:39:30 -07:00 · 2024-08-22 13:39:10 -04:00 · 2024-08-21 16:30:01 -07:00 · 2024-08-21 12:43:28 -04:00
232 changed files with 3267 additions and 1848 deletions
--- a/.ci/docker/ci_commit_pins/triton-rocm.txt
+++ b/.ci/docker/ci_commit_pins/triton-rocm.txt
@ -1 +1 @@
-01cbe5045a6898c9a925f01435c8277b2fe6afcc
+21eae954efa5bf584da70324b640288c3ee7aede
--- a/.ci/docker/ci_commit_pins/triton-xpu.txt
+++ b/.ci/docker/ci_commit_pins/triton-xpu.txt
@ -1 +1 @@
-b8c64f64c18d8cac598b3adb355c21e7439c21de
+aac14a3b93f11d781d1d5ebc5400b15ae8df5185
--- a/.ci/pytorch/common_utils.sh
+++ b/.ci/pytorch/common_utils.sh
@ -178,7 +178,7 @@ function install_torchrec_and_fbgemm() {

 function clone_pytorch_xla() {
  if [[ ! -d ./xla ]]; then
-    git clone --recursive --quiet https://github.com/pytorch/xla.git
+    git clone --recursive -b r2.4 https://github.com/pytorch/xla.git
    pushd xla
    # pin the xla hash so that we don't get broken by changes to xla
    git checkout "$(cat ../.github/ci_commit_pins/xla.txt)"
--- a/.circleci/scripts/binary_populate_env.sh
+++ b/.circleci/scripts/binary_populate_env.sh
@ -75,9 +75,8 @@ export PYTORCH_BUILD_NUMBER=1
 TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt)

 # Here PYTORCH_EXTRA_INSTALL_REQUIREMENTS is already set for the all the wheel builds hence append TRITON_CONSTRAINT
+TRITON_CONSTRAINT="platform_system == 'Linux' and platform_machine == 'x86_64' and python_version < '3.13'"
 if [[ "$PACKAGE_TYPE" =~ .*wheel.* &&  -n "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" ]]; then
-  # Only linux Python < 3.13 are supported wheels for triton
-  TRITON_CONSTRAINT="platform_system == 'Linux' and platform_machine == 'x86_64' and python_version < '3.13'"
  TRITON_REQUIREMENT="triton==${TRITON_VERSION}; ${TRITON_CONSTRAINT}"
  if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then
      TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton.txt)
@ -87,11 +86,11 @@ if [[ "$PACKAGE_TYPE" =~ .*wheel.* &&  -n "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:
 fi

 # Set triton via PYTORCH_EXTRA_INSTALL_REQUIREMENTS for triton rocm package
-if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*rocm.* && $(uname) == "Linux" && "$DESIRED_PYTHON" != "3.12" ]]; then
-    TRITON_REQUIREMENT="pytorch-triton-rocm==${TRITON_VERSION}"
+if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*rocm.* && $(uname) == "Linux" ]]; then
+    TRITON_REQUIREMENT="pytorch-triton-rocm==${TRITON_VERSION}; ${TRITON_CONSTRAINT}"
    if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then
        TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton-rocm.txt)
-        TRITON_REQUIREMENT="pytorch-triton-rocm==${TRITON_VERSION}+${TRITON_SHORTHASH}"
+        TRITON_REQUIREMENT="pytorch-triton-rocm==${TRITON_VERSION}+${TRITON_SHORTHASH}; ${TRITON_CONSTRAINT}"
    fi
    if [[ -z "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" ]]; then
        export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${TRITON_REQUIREMENT}"
--- a/.circleci/scripts/binary_upload.sh
+++ b/.circleci/scripts/binary_upload.sh
@ -25,6 +25,15 @@ if [[ "${DRY_RUN}" = "disabled" ]]; then
  AWS_S3_CP="aws s3 cp"
 fi

+if [[ "${USE_SPLIT_BUILD:-false}" == "true" ]]; then
+  UPLOAD_SUBFOLDER="${UPLOAD_SUBFOLDER}_pypi_pkg"
+fi
+
+# this is special build with all dependencies packaged
+if [[ ${BUILD_NAME} == *-full* ]]; then
+  UPLOAD_SUBFOLDER="${UPLOAD_SUBFOLDER}_full"
+fi
+
 # Sleep 2 minutes between retries for conda upload
 retry () {
  "$@"  || (sleep 5m && "$@") || (sleep 5m && "$@") || (sleep 5m && "$@") || (sleep 5m && "$@")
--- a/.github/ci_commit_pins/torchbench.txt
+++ b/.github/ci_commit_pins/torchbench.txt
@ -1 +1 @@
-d6015d42d9a1834bc7595c4bd6852562fb80b30b
+23512dbebd44a11eb84afbf53c3c071dd105297e
--- a/.github/ci_commit_pins/xla.txt
+++ b/.github/ci_commit_pins/xla.txt
@ -1 +1 @@
-6f0b61e5d782913a0fc7743812f2a8e522189111
+r2.4
--- a/.github/requirements/conda-env-iOS.txt
+++ b/.github/requirements/conda-env-iOS.txt
@ -4,4 +4,4 @@ ninja=1.10.2
 numpy=1.23.3
 pyyaml=6.0
 setuptools=68.2.2
-typing-extensions=4.9.0
+typing-extensions=4.11.0
--- a/.github/scripts/amd/package_triton_wheel.sh
+++ b/.github/scripts/amd/package_triton_wheel.sh
@ -93,6 +93,8 @@ done

 # Copy Include Files
 cp -r $ROCM_HOME/include/hip $TRITON_ROCM_DIR/include
+cp -r $ROCM_HOME/include/roctracer $TRITON_ROCM_DIR/include
+cp -r $ROCM_HOME/include/hsa $TRITON_ROCM_DIR/include

 # Copy linker
 mkdir -p $TRITON_ROCM_DIR/llvm/bin
--- a/.github/scripts/filter_test_configs.py
+++ b/.github/scripts/filter_test_configs.py
@ -38,9 +38,9 @@ SUPPORTED_PERIODICAL_MODES: Dict[str, Callable[[Optional[str]], bool]] = {
 }

 # The link to the published list of disabled jobs
-DISABLED_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/disabled-jobs.json"
+DISABLED_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/disabled-jobs.json?versionId=tIl0Qo224T_NDVw0dtG4hU1cZJM97inV"
 # and unstable jobs
-UNSTABLE_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/unstable-jobs.json"
+UNSTABLE_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/unstable-jobs.json?versionId=GPyRZRsOo26Gfk_WjAoNNxEMGXkIxIes"

 # Some constants used to handle disabled and unstable jobs
 JOB_NAME_SEP = "/"
--- a/.github/scripts/generate_binary_build_matrix.py
+++ b/.github/scripts/generate_binary_build_matrix.py
@ -347,10 +347,6 @@ def generate_wheels_matrix(
    for python_version in python_versions:
        for arch_version in arches:
            gpu_arch_type = arch_type(arch_version)
-            # Disable py3.12 builds for ROCm because of triton dependency
-            # on llnl-hatchet, which doesn't have py3.12 wheels available
-            if gpu_arch_type == "rocm" and python_version == "3.12":
-                continue
            gpu_arch_version = (
                ""
                if arch_version == "cpu"
@ -390,6 +386,29 @@ def generate_wheels_matrix(
                        ),
                    }
                )
+                # Special build building to use on Colab. PyThon 3.10 for 12.1 CUDA
+                if (
+                    arch_version != "cuda-aarch64"
+                    and python_version == "3.10"
+                    and arch_version == "12.1"
+                ):
+                    ret.append(
+                        {
+                            "python_version": python_version,
+                            "gpu_arch_type": gpu_arch_type,
+                            "gpu_arch_version": gpu_arch_version,
+                            "desired_cuda": translate_desired_cuda(
+                                gpu_arch_type, gpu_arch_version
+                            ),
+                            "devtoolset": "",
+                            "container_image": WHEEL_CONTAINER_IMAGES[arch_version],
+                            "package_type": package_type,
+                            "pytorch_extra_install_requirements": "",
+                            "build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}-full".replace(  # noqa: B950
+                                ".", "_"
+                            ),
+                        }
+                    )
            else:
                ret.append(
                    {
--- a/.github/templates/common.yml.j2
+++ b/.github/templates/common.yml.j2
@ -8,7 +8,7 @@
 # NOTE: If testing pytorch/builder changes you can change this variable to change what pytorch/builder reference
 #       the binary builds will check out
 {%- set builder_repo = "pytorch/builder" -%}
-{%- set builder_branch = "main" -%}
+{%- set builder_branch = "release/2.4" -%}

 {%- macro concurrency(build_environment) -%}
 concurrency:
--- a/.github/templates/linux_binary_build_workflow.yml.j2
+++ b/.github/templates/linux_binary_build_workflow.yml.j2
@ -113,8 +113,8 @@ jobs:
        with:
          name: !{{ config["build_name"] }}
          path: "${{ runner.temp }}/artifacts/"
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
-      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch) }}
+      !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
+      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch, checkout_pr_head=False) }}
      - name: ROCm set GPU_FLAG
        run: |
          echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
--- a/.github/templates/macos_binary_build_workflow.yml.j2
+++ b/.github/templates/macos_binary_build_workflow.yml.j2
@ -81,8 +81,8 @@ jobs:
          elif [ -d "/Applications/Xcode_13.3.1.app" ]; then
            echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
          fi
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
-      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch) }}
+      !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
+      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch, checkout_pr_head=False) }}
      - name: Install sccache (only for non-forked PRs, and pushes to trunk)
        uses: nick-fields/retry@v2.8.2
        if: ${{ github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }}
--- a/.github/templates/windows_binary_build_workflow.yml.j2
+++ b/.github/templates/windows_binary_build_workflow.yml.j2
@ -65,8 +65,8 @@ jobs:
    steps:
      !{{ common.setup_ec2_windows() }}
      !{{ set_runner_specific_vars() }}
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
-      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch) }}
+      !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
+      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch, checkout_pr_head=False) }}
      - name: Populate binary env
        shell: bash
        run: |
@ -105,8 +105,8 @@ jobs:
        with:
          name: !{{ config["build_name"] }}
          path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
-      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch) }}
+      !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
+      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch, checkout_pr_head=False) }}
      - name: Populate binary env
        shell: bash
        run: |
--- a/.github/workflows/_android-build-test.yml
+++ b/.github/workflows/_android-build-test.yml
@ -37,7 +37,7 @@ jobs:
      keep-going: ${{ steps.filter.outputs.keep-going }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          fetch-depth: 1
          submodules: false
@ -59,25 +59,25 @@ jobs:
    runs-on: ${{ matrix.runner }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.4
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4

      - name: Setup Linux
        uses: ./.github/actions/setup-linux

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.4
        with:
          docker-image-name: ${{ inputs.docker-image-name }}

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.4
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

@ -141,5 +141,5 @@ jobs:
        if: always()

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.4
        if: always()
--- a/.github/workflows/_android-full-build-test.yml
+++ b/.github/workflows/_android-full-build-test.yml
@ -37,7 +37,7 @@ jobs:
      keep-going: ${{ steps.filter.outputs.keep-going }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          fetch-depth: 1
          submodules: false
@ -59,25 +59,25 @@ jobs:
    runs-on: ${{ matrix.runner }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.4
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4

      - name: Setup Linux
        uses: ./.github/actions/setup-linux

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.4
        with:
          docker-image-name: ${{ inputs.docker-image-name }}

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.4
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

@ -186,5 +186,5 @@ jobs:
        if: always()

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.4
        if: always()
--- a/.github/workflows/_bazel-build-test.yml
+++ b/.github/workflows/_bazel-build-test.yml
@ -42,7 +42,7 @@ jobs:
      reenabled-issues: ${{ steps.filter.outputs.reenabled-issues }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          fetch-depth: 1
          submodules: false
@ -64,25 +64,25 @@ jobs:
    runs-on: ${{ matrix.runner }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.4
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4

      - name: Setup Linux
        uses: ./.github/actions/setup-linux

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.4
        with:
          docker-image-name: ${{ inputs.docker-image-name }}

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.4
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

@ -92,7 +92,7 @@ jobs:
        run: echo "IN_ARC_RUNNER=$([ -f /.inarc ] && echo true || echo false)" >> "$GITHUB_OUTPUT"

      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
-        uses: pytorch/test-infra/.github/actions/setup-nvidia@main
+        uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.4
        if: ${{ inputs.cuda-version != 'cpu' && steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}

      - name: Output disk space left
@ -201,5 +201,5 @@ jobs:
          file-suffix: bazel-${{ github.job }}_${{ steps.get-job-id.outputs.job-id }}

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.4
        if: always()
--- a/.github/workflows/_binary-build-linux.yml
+++ b/.github/workflows/_binary-build-linux.yml
@ -145,13 +145,13 @@ jobs:

      - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
        if: inputs.build_environment != 'linux-s390x-binary-manywheel'
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.4
        continue-on-error: true
        with:
          github-secret: ${{ secrets.github-token }}

      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          no-sudo: ${{ inputs.build_environment == 'linux-aarch64-binary-manywheel' || inputs.build_environment == 'linux-s390x-binary-manywheel' }}

@ -181,7 +181,6 @@ jobs:
      - name: Checkout PyTorch to pytorch dir
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -195,7 +194,7 @@ jobs:
      - name: Checkout pytorch/builder to builder dir
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -221,7 +220,7 @@ jobs:

      - name: Pull Docker image
        if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' && inputs.build_environment != 'linux-s390x-binary-manywheel' }}
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.4
        with:
          docker-image: ${{ inputs.DOCKER_IMAGE }}

@ -278,7 +277,7 @@ jobs:

      - name: Teardown Linux
        if: always() && inputs.build_environment != 'linux-s390x-binary-manywheel'
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.4

      - name: Chown workspace
        if: always() && inputs.build_environment != 'linux-s390x-binary-manywheel'
--- a/.github/workflows/_binary-test-linux.yml
+++ b/.github/workflows/_binary-test-linux.yml
@ -128,14 +128,14 @@ jobs:

      - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
        if: inputs.build_environment != 'linux-s390x-binary-manywheel'
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.4
        continue-on-error: true
        with:
          github-secret: ${{ secrets.github-token }}

        # Setup the environment
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          no-sudo: ${{ inputs.build_environment == 'linux-aarch64-binary-manywheel' || inputs.build_environment == 'linux-s390x-binary-manywheel' }}

@ -158,7 +158,6 @@ jobs:
      - name: Checkout PyTorch to pytorch dir
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch

@ -171,7 +170,7 @@ jobs:
      - name: Checkout pytorch/builder to builder dir
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -202,12 +201,12 @@ jobs:
          path: "${{ runner.temp }}/artifacts/"

      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
-        uses: pytorch/test-infra/.github/actions/setup-nvidia@main
+        uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.4
        if: ${{ inputs.GPU_ARCH_TYPE == 'cuda' && steps.filter.outputs.is-test-matrix-empty == 'False' }}

      - name: Pull Docker image
        if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' && inputs.build_environment != 'linux-s390x-binary-manywheel' }}
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.4
        with:
          docker-image: ${{ inputs.DOCKER_IMAGE }}

@ -217,7 +216,7 @@ jobs:

      - name: Teardown Linux
        if: always() && inputs.build_environment != 'linux-s390x-binary-manywheel'
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.4

      - name: Chown workspace
        if: always() && inputs.build_environment != 'linux-s390x-binary-manywheel'
--- a/.github/workflows/_binary-upload.yml
+++ b/.github/workflows/_binary-upload.yml
@ -95,7 +95,7 @@ jobs:
      SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          no-sudo: true

--- a/.github/workflows/_buck-build-test.yml
+++ b/.github/workflows/_buck-build-test.yml
@ -23,7 +23,7 @@ jobs:
      keep-going: ${{ steps.filter.outputs.keep-going }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          fetch-depth: 1
          submodules: false
@ -44,7 +44,7 @@ jobs:
    runs-on: ${{ matrix.runner }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4

      - name: Set up JDK 8
        uses: actions/setup-java@v3
@ -53,7 +53,7 @@ jobs:
          distribution: 'temurin'

      - name: Setup miniconda
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.4
        with:
          python-version: 3.8
          environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}
--- a/.github/workflows/_docs.yml
+++ b/.github/workflows/_docs.yml
@ -80,7 +80,7 @@ jobs:
    name: build-docs-${{ matrix.docs_type }}-${{ inputs.push }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.4
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}
          instructions: |
@ -91,7 +91,7 @@ jobs:

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4

      - name: Setup Linux
        uses: ./.github/actions/setup-linux
@ -106,12 +106,12 @@ jobs:

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.4
        with:
          docker-image-name: ${{ inputs.docker-image }}

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.4
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

@ -218,5 +218,5 @@ jobs:
          s3-prefix: pytorch/pytorch/${{ github.event.pull_request.number }}/functorchdocs

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.4
        if: always()
--- a/.github/workflows/_ios-build-test.yml
+++ b/.github/workflows/_ios-build-test.yml
@ -46,7 +46,7 @@ jobs:
      keep-going: ${{ steps.filter.outputs.keep-going }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          fetch-depth: 1
          submodules: false
@ -80,7 +80,7 @@ jobs:
    steps:
      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4

      - name: Populate CI build options
        shell: bash
@ -102,7 +102,7 @@ jobs:
            brew install libtool

      - name: Setup miniconda for iOS
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.4
        with:
          python-version: "3.9"
          environment-file: .github/requirements/conda-env-iOS.txt
--- a/.github/workflows/_linux-build-label.yml
+++ b/.github/workflows/_linux-build-label.yml
@ -81,7 +81,7 @@ jobs:
      test-matrix: ${{ steps.linux-build.outputs.test-matrix }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.4
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}

@ -90,7 +90,7 @@ jobs:
      # checkout because when we run this action we don't *have* a local
      # checkout. In other cases you should prefer a local checkout.
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4

      - name: Linux Build
        id: linux-build
--- a/.github/workflows/_linux-build-rg.yml
+++ b/.github/workflows/_linux-build-rg.yml
@ -86,7 +86,7 @@ jobs:
      # checkout because when we run this action we don't *have* a local
      # checkout. In other cases you should prefer a local checkout.
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4

      - name: Linux Build
        id: linux-build
--- a/.github/workflows/_linux-build.yml
+++ b/.github/workflows/_linux-build.yml
@ -90,7 +90,7 @@ jobs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.4
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}

@ -99,7 +99,7 @@ jobs:
      # checkout because when we run this action we don't *have* a local
      # checkout. In other cases you should prefer a local checkout.
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4

      - name: Setup Linux
        uses: ./.github/actions/setup-linux
@ -114,7 +114,7 @@ jobs:

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.4
        with:
          docker-image-name: ${{ inputs.docker-image-name }}

@ -128,7 +128,7 @@ jobs:
          echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}"

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.4
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

@ -238,5 +238,5 @@ jobs:
          s3-bucket: ${{ inputs.s3-bucket }}

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.4
        if: always()
--- a/.github/workflows/_linux-test-label.yml
+++ b/.github/workflows/_linux-test-label.yml
@ -67,7 +67,7 @@ jobs:
    timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4

      - name: Linux Test
        id: linux-test
--- a/.github/workflows/_linux-test-rg.yml
+++ b/.github/workflows/_linux-test-rg.yml
@ -68,7 +68,7 @@ jobs:
    timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4

      - name: Linux Test
        id: linux-test
--- a/.github/workflows/_linux-test.yml
+++ b/.github/workflows/_linux-test.yml
@ -67,7 +67,7 @@ jobs:
    timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.4
        if: ${{ !contains(matrix.runner, 'gcp.a100') }}
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}
@ -76,7 +76,7 @@ jobs:
              docker exec -it $(docker container ps --format '{{.ID}}') bash

      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4

      - name: Setup Linux
        uses: ./.github/actions/setup-linux
@ -91,7 +91,7 @@ jobs:

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.4
        with:
          docker-image-name: ${{ inputs.docker-image }}

@ -105,7 +105,7 @@ jobs:
          echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}"

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.4
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

@ -116,7 +116,7 @@ jobs:

      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
        id: install-nvidia-driver
-        uses: pytorch/test-infra/.github/actions/setup-nvidia@main
+        uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.4
        if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_arc_runner.outputs.IN_ARC_RUNNER == 'false' }}

      - name: Lock NVIDIA A100 40GB Frequency
@ -333,7 +333,7 @@ jobs:
          path: ./**/core.[1-9]*

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.4
        if: always()

      # NB: We are currently having an intermittent GPU-related issue on G5 runners with
--- a/.github/workflows/_mac-build.yml
+++ b/.github/workflows/_mac-build.yml
@ -71,11 +71,11 @@ jobs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
    steps:
      - name: Clean up disk space before running MacOS workflow
-        uses: pytorch/test-infra/.github/actions/check-disk-space@main
+        uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.4

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4

      - name: Set xcode version
        env:
@ -87,7 +87,7 @@ jobs:

      - name: Setup miniconda
        if: inputs.environment-file == ''
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.4
        with:
          python-version: ${{ inputs.python-version }}
          environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}
@ -97,7 +97,7 @@ jobs:
      # environment even though the arch is x86-64
      - name: Setup miniconda using the provided environment file
        if: inputs.environment-file != ''
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.4
        with:
          python-version: ${{ inputs.python-version }}
          environment-file: ${{ inputs.environment-file }}
@ -207,4 +207,4 @@ jobs:
      - name: Clean up disk space
        if: always()
        continue-on-error: true
-        uses: pytorch/test-infra/.github/actions/check-disk-space@main
+        uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.4
--- a/.github/workflows/_mac-test-mps.yml
+++ b/.github/workflows/_mac-test-mps.yml
@ -40,7 +40,7 @@ jobs:
      reenabled-issues: ${{ steps.filter.outputs.reenabled-issues }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          submodules: false

@ -81,7 +81,7 @@ jobs:
          use-gha: true

      - name: Setup miniconda
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.4
        with:
          python-version: ${{ inputs.python-version }}
          environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}
@ -159,4 +159,4 @@ jobs:
      - name: Clean up disk space
        if: always()
        continue-on-error: true
-        uses: pytorch/test-infra/.github/actions/check-disk-space@main
+        uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.4
--- a/.github/workflows/_mac-test.yml
+++ b/.github/workflows/_mac-test.yml
@ -74,11 +74,11 @@ jobs:
          done

      - name: Clean up disk space before running MacOS workflow
-        uses: pytorch/test-infra/.github/actions/check-disk-space@main
+        uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.4

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4

      - name: Download build artifacts
        uses: ./.github/actions/download-build-artifacts
@ -93,7 +93,7 @@ jobs:
          use-gha: true

      - name: Setup miniconda
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.4
        with:
          python-version: ${{ inputs.python-version }}
          environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}
@ -216,4 +216,4 @@ jobs:
      - name: Clean up disk space
        if: always()
        continue-on-error: true
-        uses: pytorch/test-infra/.github/actions/check-disk-space@main
+        uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.4
--- a/.github/workflows/_rocm-test.yml
+++ b/.github/workflows/_rocm-test.yml
@ -58,7 +58,7 @@ jobs:
    steps:
      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          no-sudo: true

@ -80,12 +80,12 @@ jobs:

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.4
        with:
          docker-image-name: ${{ inputs.docker-image }}

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.4
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

--- a/.github/workflows/_run_android_tests.yml
+++ b/.github/workflows/_run_android_tests.yml
@ -23,7 +23,7 @@ jobs:
      keep-going: ${{ steps.filter.outputs.keep-going }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          fetch-depth: 1
          submodules: false
@ -54,10 +54,10 @@ jobs:
      SUPPORT_ABI: '${{ matrix.support_abi }}'
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4

      - name: Setup miniconda
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.4
        with:
          python-version: 3.8
          environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}.txt
--- a/.github/workflows/_runner-determinator.yml
+++ b/.github/workflows/_runner-determinator.yml
@ -32,7 +32,7 @@ jobs:
      USERNAME: ${{ inputs.user_name }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          fetch-depth: 1
          submodules: true
--- a/.github/workflows/_win-build.yml
+++ b/.github/workflows/_win-build.yml
@ -60,10 +60,10 @@ jobs:
          git config --global core.fsmonitor false

      - name: Clean up leftover processes on non-ephemeral Windows runner
-        uses: pytorch/test-infra/.github/actions/cleanup-runner@main
+        uses: pytorch/test-infra/.github/actions/cleanup-runner@release/2.4

      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.4
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}
          instructions: |
@ -78,7 +78,7 @@ jobs:

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          no-sudo: true

--- a/.github/workflows/_win-test.yml
+++ b/.github/workflows/_win-test.yml
@ -54,10 +54,10 @@ jobs:
          git config --global core.fsmonitor false

      - name: Clean up leftover processes on non-ephemeral Windows runner
-        uses: pytorch/test-infra/.github/actions/cleanup-runner@main
+        uses: pytorch/test-infra/.github/actions/cleanup-runner@release/2.4

      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.4
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}
          instructions: |
@ -73,7 +73,7 @@ jobs:

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          no-sudo: true

--- a/.github/workflows/_xpu-test.yml
+++ b/.github/workflows/_xpu-test.yml
@ -54,7 +54,7 @@ jobs:
    steps:
      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4

      - name: Setup XPU
        uses: ./.github/actions/setup-xpu
@ -72,12 +72,12 @@ jobs:

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.4
        with:
          docker-image-name: ${{ inputs.docker-image }}

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.4
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

--- a/.github/workflows/build-triton-wheel.yml
+++ b/.github/workflows/build-triton-wheel.yml
@ -3,7 +3,7 @@ name: Build Triton wheels
 on:
  push:
    branches:
-      - main
+      - release/2.4
    tags:
      # NOTE: Binary build pipelines should only get triggered on release candidate builds
      # Release candidate tags look like: v1.11.0-rc1
@ -47,12 +47,12 @@ jobs:
      BUILD_DEVICE: ${{ matrix.device }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.4
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}

      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          submodules: false

@ -60,7 +60,7 @@ jobs:
        uses: ./.github/actions/setup-linux

      - name: Pull Docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.4
        with:
          docker-image: ${{ env.DOCKER_IMAGE }}

@ -124,7 +124,7 @@ jobs:
          path: ${{ runner.temp }}/artifacts/*

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.4
        if: always()

  upload-wheel:
@ -209,12 +209,12 @@ jobs:
      PY_VERS: ${{ matrix.py_vers }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.4
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}

      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          submodules: false

@ -222,7 +222,7 @@ jobs:
        uses: ./.github/actions/setup-linux

      - name: Pull Docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.4
        with:
          docker-image: ${{ env.DOCKER_IMAGE }}

@ -257,7 +257,7 @@ jobs:
          path: ${{ runner.temp }}/artifacts/*

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.4
        if: always()

  upload-conda:
--- a/.github/workflows/check-labels.yml
+++ b/.github/workflows/check-labels.yml
@ -31,7 +31,7 @@ jobs:
    runs-on: linux.20_04.4x
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          submodules: false
          fetch-depth: 1
--- a/.github/workflows/close-nonexistent-disable-issues.yml
+++ b/.github/workflows/close-nonexistent-disable-issues.yml
@ -11,7 +11,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4

      - name: Run close_nonexistent_disable_issues.py
        env:
--- a/.github/workflows/create_release.yml
+++ b/.github/workflows/create_release.yml
@ -5,6 +5,11 @@ on:
    branches:
      - main
      - release/*
+    tags:
+      # Final Release tags look like: v1.11.0
+      - v[0-9]+.[0-9]+.[0-9]+
+      # Release candidate tags look like: v1.11.0-rc1
+      - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
  release:
    types: [published]
  pull_request:
@ -18,6 +23,8 @@ jobs:
    # https://github.com/softprops/action-gh-release?tab=readme-ov-file#permissions
    permissions:
      contents: write
+    outputs:
+      pt_release_name: ${{ steps.release_name.outputs.pt_release_name }}
    steps:
      - uses: malfet/checkout@silent-checkout
        with:
@ -49,11 +56,44 @@ jobs:
            # Create archive
            tar -czf "$PT_RELEASE_FILE" "$PT_RELEASE_NAME"
            echo "Created source archive $PT_RELEASE_FILE with content: $(ls -a "$PT_RELEASE_NAME")"
-      - name: Upload source distribution
+      - name: Upload source distribution for release
        if: ${{ github.event_name == 'release' }}
        uses: softprops/action-gh-release@v1
        with:
          files: ${{env.PT_RELEASE_FILE}}
+      - name: Upload source distribution to GHA artifacts for release tags
+        if: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') && contains(github.ref, 'rc') }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: ${{ env.PT_RELEASE_FILE }}
+          path: ${{ env.PT_RELEASE_FILE }}
+      - name: Set output
+        id: release_name
+        run: echo "::set-output name=pt_release_name::${{ env.PT_RELEASE_NAME }}.tar.gz"
+
+  upload_source_code_to_s3:
+    if: ${{ github.repository == 'pytorch/pytorch' && github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') && contains(github.ref, 'rc') }}
+    runs-on: linux.2xlarge
+    environment: sourcecode-upload
+    name: Upload source code to S3 for release tags
+    permissions:
+      id-token: write
+    needs: release
+    steps:
+      - uses: actions/download-artifact@v2
+        with:
+          name: ${{ needs.release.outputs.pt_release_name }}
+      - name: Configure AWS credentials(PyTorch account)
+        uses: aws-actions/configure-aws-credentials@v3
+        with:
+          role-to-assume: arn:aws:iam::749337293305:role/gha_pytorch_source_code_upload_role
+          aws-region: us-east-1
+      - uses: seemethere/upload-artifact-s3@v5
+        with:
+          s3-bucket: pytorch
+          s3-prefix: source_code/test
+          if-no-files-found: warn
+          path: ${{ needs.release.outputs.pt_release_name }}

 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name }}
--- a/.github/workflows/docker-builds.yml
+++ b/.github/workflows/docker-builds.yml
@ -78,21 +78,21 @@ jobs:
      # [see note: pytorch repo ref]
      # deep clone (fetch-depth 0) required for git merge-base
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4

      - name: Setup Linux
        uses: ./.github/actions/setup-linux

      - name: Build docker image
        id: build-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.4
        with:
          docker-image-name: ${{ matrix.docker-image-name }}
          always-rebuild: true
          push: true

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.4
        with:
          docker-image: ${{ steps.build-docker-image.outputs.docker-image }}

@ -124,5 +124,5 @@ jobs:
        if: always()

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.4
        if: always()
--- a/.github/workflows/docker-release.yml
+++ b/.github/workflows/docker-release.yml
@ -41,7 +41,7 @@ jobs:
      matrix: ${{ steps.generate-matrix.outputs.matrix }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          fetch-depth: 1
          submodules: true
@ -69,7 +69,7 @@ jobs:
      CUDNN_VERSION: ${{ matrix.cudnn_version }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.4
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}
      # [see note: pytorch repo ref]
@ -147,12 +147,12 @@ jobs:
          fi

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.4
        if: always()

  validate:
    needs: build
-    uses: pytorch/builder/.github/workflows/validate-docker-images.yml@main
+    uses: pytorch/builder/.github/workflows/validate-docker-images.yml@release/2.4
    with:
      channel: nightly
      ref: main
--- a/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
+++ b/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
@ -48,7 +48,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-2.4
      DESIRED_PYTHON: "3.8"
      runs_on: linux.arm64.m7g.4xlarge
      ALPINE_IMAGE: "arm64v8/alpine"
@ -69,7 +69,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-2.4
      DESIRED_PYTHON: "3.8"
      build_name: manywheel-py3_8-cpu-aarch64
      build_environment: linux-aarch64-binary-manywheel
@ -91,7 +91,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-2.4
      DESIRED_PYTHON: "3.8"
      build_name: manywheel-py3_8-cpu-aarch64
    secrets:
@ -111,7 +111,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu124
      GPU_ARCH_TYPE: cuda-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-2.4
      DESIRED_DEVTOOLSET: cxx11-abi
      DESIRED_PYTHON: "3.8"
      runs_on: linux.arm64.m7g.4xlarge
@ -135,7 +135,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu124
      GPU_ARCH_TYPE: cuda-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-2.4
      DESIRED_DEVTOOLSET: cxx11-abi
      DESIRED_PYTHON: "3.8"
      build_name: manywheel-py3_8-cuda-aarch64
@ -156,7 +156,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-2.4
      DESIRED_PYTHON: "3.9"
      runs_on: linux.arm64.m7g.4xlarge
      ALPINE_IMAGE: "arm64v8/alpine"
@ -177,7 +177,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-2.4
      DESIRED_PYTHON: "3.9"
      build_name: manywheel-py3_9-cpu-aarch64
      build_environment: linux-aarch64-binary-manywheel
@ -199,7 +199,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-2.4
      DESIRED_PYTHON: "3.9"
      build_name: manywheel-py3_9-cpu-aarch64
    secrets:
@ -219,7 +219,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu124
      GPU_ARCH_TYPE: cuda-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-2.4
      DESIRED_DEVTOOLSET: cxx11-abi
      DESIRED_PYTHON: "3.9"
      runs_on: linux.arm64.m7g.4xlarge
@ -243,7 +243,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu124
      GPU_ARCH_TYPE: cuda-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-2.4
      DESIRED_DEVTOOLSET: cxx11-abi
      DESIRED_PYTHON: "3.9"
      build_name: manywheel-py3_9-cuda-aarch64
@ -264,7 +264,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-2.4
      DESIRED_PYTHON: "3.10"
      runs_on: linux.arm64.m7g.4xlarge
      ALPINE_IMAGE: "arm64v8/alpine"
@ -285,7 +285,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-2.4
      DESIRED_PYTHON: "3.10"
      build_name: manywheel-py3_10-cpu-aarch64
      build_environment: linux-aarch64-binary-manywheel
@ -307,7 +307,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-2.4
      DESIRED_PYTHON: "3.10"
      build_name: manywheel-py3_10-cpu-aarch64
    secrets:
@ -327,7 +327,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu124
      GPU_ARCH_TYPE: cuda-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-2.4
      DESIRED_DEVTOOLSET: cxx11-abi
      DESIRED_PYTHON: "3.10"
      runs_on: linux.arm64.m7g.4xlarge
@ -351,7 +351,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu124
      GPU_ARCH_TYPE: cuda-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-2.4
      DESIRED_DEVTOOLSET: cxx11-abi
      DESIRED_PYTHON: "3.10"
      build_name: manywheel-py3_10-cuda-aarch64
@ -372,7 +372,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-2.4
      DESIRED_PYTHON: "3.11"
      runs_on: linux.arm64.m7g.4xlarge
      ALPINE_IMAGE: "arm64v8/alpine"
@ -393,7 +393,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-2.4
      DESIRED_PYTHON: "3.11"
      build_name: manywheel-py3_11-cpu-aarch64
      build_environment: linux-aarch64-binary-manywheel
@ -415,7 +415,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-2.4
      DESIRED_PYTHON: "3.11"
      build_name: manywheel-py3_11-cpu-aarch64
    secrets:
@ -435,7 +435,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu124
      GPU_ARCH_TYPE: cuda-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-2.4
      DESIRED_DEVTOOLSET: cxx11-abi
      DESIRED_PYTHON: "3.11"
      runs_on: linux.arm64.m7g.4xlarge
@ -459,7 +459,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu124
      GPU_ARCH_TYPE: cuda-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-2.4
      DESIRED_DEVTOOLSET: cxx11-abi
      DESIRED_PYTHON: "3.11"
      build_name: manywheel-py3_11-cuda-aarch64
@ -480,7 +480,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-2.4
      DESIRED_PYTHON: "3.12"
      runs_on: linux.arm64.m7g.4xlarge
      ALPINE_IMAGE: "arm64v8/alpine"
@ -501,7 +501,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-2.4
      DESIRED_PYTHON: "3.12"
      build_name: manywheel-py3_12-cpu-aarch64
      build_environment: linux-aarch64-binary-manywheel
@ -523,7 +523,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-2.4
      DESIRED_PYTHON: "3.12"
      build_name: manywheel-py3_12-cpu-aarch64
    secrets:
@ -543,7 +543,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu124
      GPU_ARCH_TYPE: cuda-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-2.4
      DESIRED_DEVTOOLSET: cxx11-abi
      DESIRED_PYTHON: "3.12"
      runs_on: linux.arm64.m7g.4xlarge
@ -567,7 +567,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cu124
      GPU_ARCH_TYPE: cuda-aarch64
-      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-2.4
      DESIRED_DEVTOOLSET: cxx11-abi
      DESIRED_PYTHON: "3.12"
      build_name: manywheel-py3_12-cuda-aarch64
--- a/.github/workflows/generated-linux-binary-conda-nightly.yml
+++ b/.github/workflows/generated-linux-binary-conda-nightly.yml
@ -48,7 +48,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/conda-builder:cpu-main
+      DOCKER_IMAGE: pytorch/conda-builder:cpu-2.4
      DESIRED_PYTHON: "3.8"
      build_name: conda-py3_8-cpu
      build_environment: linux-binary-conda
@ -66,7 +66,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/conda-builder:cpu-main
+      DOCKER_IMAGE: pytorch/conda-builder:cpu-2.4
      DESIRED_PYTHON: "3.8"
      build_name: conda-py3_8-cpu
      build_environment: linux-binary-conda
@ -87,7 +87,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/conda-builder:cpu-main
+      DOCKER_IMAGE: pytorch/conda-builder:cpu-2.4
      DESIRED_PYTHON: "3.8"
      build_name: conda-py3_8-cpu
    secrets:
@ -108,7 +108,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-2.4
      DESIRED_PYTHON: "3.8"
      runs_on: linux.24xlarge
      build_name: conda-py3_8-cuda11_8
@ -128,7 +128,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-2.4
      DESIRED_PYTHON: "3.8"
      build_name: conda-py3_8-cuda11_8
      build_environment: linux-binary-conda
@ -150,7 +150,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-2.4
      DESIRED_PYTHON: "3.8"
      build_name: conda-py3_8-cuda11_8
    secrets:
@ -171,7 +171,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-2.4
      DESIRED_PYTHON: "3.8"
      runs_on: linux.24xlarge
      build_name: conda-py3_8-cuda12_1
@ -191,7 +191,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-2.4
      DESIRED_PYTHON: "3.8"
      build_name: conda-py3_8-cuda12_1
      build_environment: linux-binary-conda
@ -213,7 +213,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-2.4
      DESIRED_PYTHON: "3.8"
      build_name: conda-py3_8-cuda12_1
    secrets:
@ -234,7 +234,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-2.4
      DESIRED_PYTHON: "3.8"
      runs_on: linux.24xlarge
      build_name: conda-py3_8-cuda12_4
@ -254,7 +254,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-2.4
      DESIRED_PYTHON: "3.8"
      build_name: conda-py3_8-cuda12_4
      build_environment: linux-binary-conda
@ -276,7 +276,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-2.4
      DESIRED_PYTHON: "3.8"
      build_name: conda-py3_8-cuda12_4
    secrets:
@ -296,7 +296,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/conda-builder:cpu-main
+      DOCKER_IMAGE: pytorch/conda-builder:cpu-2.4
      DESIRED_PYTHON: "3.9"
      build_name: conda-py3_9-cpu
      build_environment: linux-binary-conda
@ -314,7 +314,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/conda-builder:cpu-main
+      DOCKER_IMAGE: pytorch/conda-builder:cpu-2.4
      DESIRED_PYTHON: "3.9"
      build_name: conda-py3_9-cpu
      build_environment: linux-binary-conda
@ -335,7 +335,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/conda-builder:cpu-main
+      DOCKER_IMAGE: pytorch/conda-builder:cpu-2.4
      DESIRED_PYTHON: "3.9"
      build_name: conda-py3_9-cpu
    secrets:
@ -356,7 +356,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-2.4
      DESIRED_PYTHON: "3.9"
      runs_on: linux.24xlarge
      build_name: conda-py3_9-cuda11_8
@ -376,7 +376,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-2.4
      DESIRED_PYTHON: "3.9"
      build_name: conda-py3_9-cuda11_8
      build_environment: linux-binary-conda
@ -398,7 +398,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-2.4
      DESIRED_PYTHON: "3.9"
      build_name: conda-py3_9-cuda11_8
    secrets:
@ -419,7 +419,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-2.4
      DESIRED_PYTHON: "3.9"
      runs_on: linux.24xlarge
      build_name: conda-py3_9-cuda12_1
@ -439,7 +439,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-2.4
      DESIRED_PYTHON: "3.9"
      build_name: conda-py3_9-cuda12_1
      build_environment: linux-binary-conda
@ -461,7 +461,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-2.4
      DESIRED_PYTHON: "3.9"
      build_name: conda-py3_9-cuda12_1
    secrets:
@ -482,7 +482,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-2.4
      DESIRED_PYTHON: "3.9"
      runs_on: linux.24xlarge
      build_name: conda-py3_9-cuda12_4
@ -502,7 +502,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-2.4
      DESIRED_PYTHON: "3.9"
      build_name: conda-py3_9-cuda12_4
      build_environment: linux-binary-conda
@ -524,7 +524,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-2.4
      DESIRED_PYTHON: "3.9"
      build_name: conda-py3_9-cuda12_4
    secrets:
@ -544,7 +544,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/conda-builder:cpu-main
+      DOCKER_IMAGE: pytorch/conda-builder:cpu-2.4
      DESIRED_PYTHON: "3.10"
      build_name: conda-py3_10-cpu
      build_environment: linux-binary-conda
@ -562,7 +562,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/conda-builder:cpu-main
+      DOCKER_IMAGE: pytorch/conda-builder:cpu-2.4
      DESIRED_PYTHON: "3.10"
      build_name: conda-py3_10-cpu
      build_environment: linux-binary-conda
@ -583,7 +583,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/conda-builder:cpu-main
+      DOCKER_IMAGE: pytorch/conda-builder:cpu-2.4
      DESIRED_PYTHON: "3.10"
      build_name: conda-py3_10-cpu
    secrets:
@ -604,7 +604,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-2.4
      DESIRED_PYTHON: "3.10"
      runs_on: linux.24xlarge
      build_name: conda-py3_10-cuda11_8
@ -624,7 +624,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-2.4
      DESIRED_PYTHON: "3.10"
      build_name: conda-py3_10-cuda11_8
      build_environment: linux-binary-conda
@ -646,7 +646,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-2.4
      DESIRED_PYTHON: "3.10"
      build_name: conda-py3_10-cuda11_8
    secrets:
@ -667,7 +667,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-2.4
      DESIRED_PYTHON: "3.10"
      runs_on: linux.24xlarge
      build_name: conda-py3_10-cuda12_1
@ -687,7 +687,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-2.4
      DESIRED_PYTHON: "3.10"
      build_name: conda-py3_10-cuda12_1
      build_environment: linux-binary-conda
@ -709,7 +709,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-2.4
      DESIRED_PYTHON: "3.10"
      build_name: conda-py3_10-cuda12_1
    secrets:
@ -730,7 +730,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-2.4
      DESIRED_PYTHON: "3.10"
      runs_on: linux.24xlarge
      build_name: conda-py3_10-cuda12_4
@ -750,7 +750,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-2.4
      DESIRED_PYTHON: "3.10"
      build_name: conda-py3_10-cuda12_4
      build_environment: linux-binary-conda
@ -772,7 +772,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-2.4
      DESIRED_PYTHON: "3.10"
      build_name: conda-py3_10-cuda12_4
    secrets:
@ -792,7 +792,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/conda-builder:cpu-main
+      DOCKER_IMAGE: pytorch/conda-builder:cpu-2.4
      DESIRED_PYTHON: "3.11"
      build_name: conda-py3_11-cpu
      build_environment: linux-binary-conda
@ -810,7 +810,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/conda-builder:cpu-main
+      DOCKER_IMAGE: pytorch/conda-builder:cpu-2.4
      DESIRED_PYTHON: "3.11"
      build_name: conda-py3_11-cpu
      build_environment: linux-binary-conda
@ -831,7 +831,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/conda-builder:cpu-main
+      DOCKER_IMAGE: pytorch/conda-builder:cpu-2.4
      DESIRED_PYTHON: "3.11"
      build_name: conda-py3_11-cpu
    secrets:
@ -852,7 +852,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-2.4
      DESIRED_PYTHON: "3.11"
      runs_on: linux.24xlarge
      build_name: conda-py3_11-cuda11_8
@ -872,7 +872,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-2.4
      DESIRED_PYTHON: "3.11"
      build_name: conda-py3_11-cuda11_8
      build_environment: linux-binary-conda
@ -894,7 +894,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-2.4
      DESIRED_PYTHON: "3.11"
      build_name: conda-py3_11-cuda11_8
    secrets:
@ -915,7 +915,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-2.4
      DESIRED_PYTHON: "3.11"
      runs_on: linux.24xlarge
      build_name: conda-py3_11-cuda12_1
@ -935,7 +935,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-2.4
      DESIRED_PYTHON: "3.11"
      build_name: conda-py3_11-cuda12_1
      build_environment: linux-binary-conda
@ -957,7 +957,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-2.4
      DESIRED_PYTHON: "3.11"
      build_name: conda-py3_11-cuda12_1
    secrets:
@ -978,7 +978,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-2.4
      DESIRED_PYTHON: "3.11"
      runs_on: linux.24xlarge
      build_name: conda-py3_11-cuda12_4
@ -998,7 +998,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-2.4
      DESIRED_PYTHON: "3.11"
      build_name: conda-py3_11-cuda12_4
      build_environment: linux-binary-conda
@ -1020,7 +1020,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-2.4
      DESIRED_PYTHON: "3.11"
      build_name: conda-py3_11-cuda12_4
    secrets:
@ -1040,7 +1040,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/conda-builder:cpu-main
+      DOCKER_IMAGE: pytorch/conda-builder:cpu-2.4
      DESIRED_PYTHON: "3.12"
      build_name: conda-py3_12-cpu
      build_environment: linux-binary-conda
@ -1058,7 +1058,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/conda-builder:cpu-main
+      DOCKER_IMAGE: pytorch/conda-builder:cpu-2.4
      DESIRED_PYTHON: "3.12"
      build_name: conda-py3_12-cpu
      build_environment: linux-binary-conda
@ -1079,7 +1079,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/conda-builder:cpu-main
+      DOCKER_IMAGE: pytorch/conda-builder:cpu-2.4
      DESIRED_PYTHON: "3.12"
      build_name: conda-py3_12-cpu
    secrets:
@ -1100,7 +1100,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-2.4
      DESIRED_PYTHON: "3.12"
      runs_on: linux.24xlarge
      build_name: conda-py3_12-cuda11_8
@ -1120,7 +1120,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-2.4
      DESIRED_PYTHON: "3.12"
      build_name: conda-py3_12-cuda11_8
      build_environment: linux-binary-conda
@ -1142,7 +1142,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda11.8-2.4
      DESIRED_PYTHON: "3.12"
      build_name: conda-py3_12-cuda11_8
    secrets:
@ -1163,7 +1163,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-2.4
      DESIRED_PYTHON: "3.12"
      runs_on: linux.24xlarge
      build_name: conda-py3_12-cuda12_1
@ -1183,7 +1183,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-2.4
      DESIRED_PYTHON: "3.12"
      build_name: conda-py3_12-cuda12_1
      build_environment: linux-binary-conda
@ -1205,7 +1205,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.1-2.4
      DESIRED_PYTHON: "3.12"
      build_name: conda-py3_12-cuda12_1
    secrets:
@ -1226,7 +1226,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-2.4
      DESIRED_PYTHON: "3.12"
      runs_on: linux.24xlarge
      build_name: conda-py3_12-cuda12_4
@ -1246,7 +1246,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-2.4
      DESIRED_PYTHON: "3.12"
      build_name: conda-py3_12-cuda12_4
      build_environment: linux-binary-conda
@ -1268,7 +1268,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/conda-builder:cuda12.4-2.4
      DESIRED_PYTHON: "3.12"
      build_name: conda-py3_12-cuda12_4
    secrets:
--- a/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-main.yml
+++ b/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-main.yml
@ -43,7 +43,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cpu-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cpu-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cpu-shared-with-deps-cxx11-abi
@ -62,7 +62,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cpu-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cpu-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cpu-shared-with-deps-cxx11-abi
--- a/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-nightly.yml
+++ b/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-nightly.yml
@ -48,7 +48,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cpu-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cpu-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cpu-shared-with-deps-cxx11-abi
@ -67,7 +67,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cpu-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cpu-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cpu-shared-with-deps-cxx11-abi
@ -89,7 +89,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cpu-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cpu-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cpu-shared-with-deps-cxx11-abi
@ -111,7 +111,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.8-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cuda11_8-shared-with-deps-cxx11-abi
@ -131,7 +131,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.8-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cuda11_8-shared-with-deps-cxx11-abi
@ -154,7 +154,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda11.8-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cuda11_8-shared-with-deps-cxx11-abi
@ -176,7 +176,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.1-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cuda12_1-shared-with-deps-cxx11-abi
@ -196,7 +196,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.1-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cuda12_1-shared-with-deps-cxx11-abi
@ -219,7 +219,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.1-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cuda12_1-shared-with-deps-cxx11-abi
@ -241,7 +241,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.4-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cuda12_4-shared-with-deps-cxx11-abi
@ -261,7 +261,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.4-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cuda12_4-shared-with-deps-cxx11-abi
@ -284,7 +284,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cuda12.4-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cuda12_4-shared-with-deps-cxx11-abi
@ -306,7 +306,7 @@ jobs:
      DESIRED_CUDA: rocm6.0
      GPU_ARCH_VERSION: 6.0
      GPU_ARCH_TYPE: rocm
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.0-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.0-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-rocm6_0-shared-with-deps-cxx11-abi
@ -328,7 +328,7 @@ jobs:
      GPU_ARCH_VERSION: 6.0
      GPU_ARCH_TYPE: rocm
      SKIP_ALL_TESTS: 1
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.0-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.0-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
    steps:
@ -342,7 +342,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -354,7 +353,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -370,7 +369,7 @@ jobs:
      - name: Pull Docker image
        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
        with:
-          docker-image: pytorch/libtorch-cxx11-builder:rocm6.0-main
+          docker-image: pytorch/libtorch-cxx11-builder:rocm6.0-2.4
      - name: Test Pytorch binary
        uses: ./pytorch/.github/actions/test-pytorch-binary
      - name: Teardown ROCm
@ -390,7 +389,7 @@ jobs:
      DESIRED_CUDA: rocm6.0
      GPU_ARCH_VERSION: 6.0
      GPU_ARCH_TYPE: rocm
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.0-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.0-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-rocm6_0-shared-with-deps-cxx11-abi
@ -412,7 +411,7 @@ jobs:
      DESIRED_CUDA: rocm6.1
      GPU_ARCH_VERSION: 6.1
      GPU_ARCH_TYPE: rocm
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.1-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.1-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-rocm6_1-shared-with-deps-cxx11-abi
@ -434,7 +433,7 @@ jobs:
      GPU_ARCH_VERSION: 6.1
      GPU_ARCH_TYPE: rocm
      SKIP_ALL_TESTS: 1
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.1-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.1-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
    steps:
@ -448,7 +447,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -460,7 +458,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -476,7 +474,7 @@ jobs:
      - name: Pull Docker image
        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
        with:
-          docker-image: pytorch/libtorch-cxx11-builder:rocm6.1-main
+          docker-image: pytorch/libtorch-cxx11-builder:rocm6.1-2.4
      - name: Test Pytorch binary
        uses: ./pytorch/.github/actions/test-pytorch-binary
      - name: Teardown ROCm
@ -496,7 +494,7 @@ jobs:
      DESIRED_CUDA: rocm6.1
      GPU_ARCH_VERSION: 6.1
      GPU_ARCH_TYPE: rocm
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.1-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:rocm6.1-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-rocm6_1-shared-with-deps-cxx11-abi
--- a/.github/workflows/generated-linux-binary-libtorch-pre-cxx11-main.yml
+++ b/.github/workflows/generated-linux-binary-libtorch-pre-cxx11-main.yml
@ -43,7 +43,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: pre-cxx11
      build_name: libtorch-cpu-shared-with-deps-pre-cxx11
@ -62,7 +62,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: pre-cxx11
      build_name: libtorch-cpu-shared-with-deps-pre-cxx11
--- a/.github/workflows/generated-linux-binary-libtorch-pre-cxx11-nightly.yml
+++ b/.github/workflows/generated-linux-binary-libtorch-pre-cxx11-nightly.yml
@ -48,7 +48,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: pre-cxx11
      build_name: libtorch-cpu-shared-with-deps-pre-cxx11
@ -67,7 +67,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: pre-cxx11
      build_name: libtorch-cpu-shared-with-deps-pre-cxx11
@ -89,7 +89,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: pre-cxx11
      build_name: libtorch-cpu-shared-with-deps-pre-cxx11
@ -111,7 +111,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: pre-cxx11
      build_name: libtorch-cuda11_8-shared-with-deps-pre-cxx11
@ -131,7 +131,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: pre-cxx11
      build_name: libtorch-cuda11_8-shared-with-deps-pre-cxx11
@ -154,7 +154,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: pre-cxx11
      build_name: libtorch-cuda11_8-shared-with-deps-pre-cxx11
@ -176,7 +176,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: pre-cxx11
      build_name: libtorch-cuda12_1-shared-with-deps-pre-cxx11
@ -196,7 +196,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: pre-cxx11
      build_name: libtorch-cuda12_1-shared-with-deps-pre-cxx11
@ -219,7 +219,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: pre-cxx11
      build_name: libtorch-cuda12_1-shared-with-deps-pre-cxx11
@ -241,7 +241,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: pre-cxx11
      build_name: libtorch-cuda12_4-shared-with-deps-pre-cxx11
@ -261,7 +261,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: pre-cxx11
      build_name: libtorch-cuda12_4-shared-with-deps-pre-cxx11
@ -284,7 +284,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: pre-cxx11
      build_name: libtorch-cuda12_4-shared-with-deps-pre-cxx11
@ -306,7 +306,7 @@ jobs:
      DESIRED_CUDA: rocm6.0
      GPU_ARCH_VERSION: 6.0
      GPU_ARCH_TYPE: rocm
-      DOCKER_IMAGE: pytorch/manylinux-builder:rocm6.0-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:rocm6.0-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: pre-cxx11
      build_name: libtorch-rocm6_0-shared-with-deps-pre-cxx11
@ -328,7 +328,7 @@ jobs:
      GPU_ARCH_VERSION: 6.0
      GPU_ARCH_TYPE: rocm
      SKIP_ALL_TESTS: 1
-      DOCKER_IMAGE: pytorch/manylinux-builder:rocm6.0-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:rocm6.0-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: pre-cxx11
    steps:
@ -342,7 +342,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -354,7 +353,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -370,7 +369,7 @@ jobs:
      - name: Pull Docker image
        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
        with:
-          docker-image: pytorch/manylinux-builder:rocm6.0-main
+          docker-image: pytorch/manylinux-builder:rocm6.0-2.4
      - name: Test Pytorch binary
        uses: ./pytorch/.github/actions/test-pytorch-binary
      - name: Teardown ROCm
@ -390,7 +389,7 @@ jobs:
      DESIRED_CUDA: rocm6.0
      GPU_ARCH_VERSION: 6.0
      GPU_ARCH_TYPE: rocm
-      DOCKER_IMAGE: pytorch/manylinux-builder:rocm6.0-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:rocm6.0-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: pre-cxx11
      build_name: libtorch-rocm6_0-shared-with-deps-pre-cxx11
@ -412,7 +411,7 @@ jobs:
      DESIRED_CUDA: rocm6.1
      GPU_ARCH_VERSION: 6.1
      GPU_ARCH_TYPE: rocm
-      DOCKER_IMAGE: pytorch/manylinux-builder:rocm6.1-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:rocm6.1-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: pre-cxx11
      build_name: libtorch-rocm6_1-shared-with-deps-pre-cxx11
@ -434,7 +433,7 @@ jobs:
      GPU_ARCH_VERSION: 6.1
      GPU_ARCH_TYPE: rocm
      SKIP_ALL_TESTS: 1
-      DOCKER_IMAGE: pytorch/manylinux-builder:rocm6.1-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:rocm6.1-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: pre-cxx11
    steps:
@ -448,7 +447,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -460,7 +458,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -476,7 +474,7 @@ jobs:
      - name: Pull Docker image
        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
        with:
-          docker-image: pytorch/manylinux-builder:rocm6.1-main
+          docker-image: pytorch/manylinux-builder:rocm6.1-2.4
      - name: Test Pytorch binary
        uses: ./pytorch/.github/actions/test-pytorch-binary
      - name: Teardown ROCm
@ -496,7 +494,7 @@ jobs:
      DESIRED_CUDA: rocm6.1
      GPU_ARCH_VERSION: 6.1
      GPU_ARCH_TYPE: rocm
-      DOCKER_IMAGE: pytorch/manylinux-builder:rocm6.1-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:rocm6.1-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: pre-cxx11
      build_name: libtorch-rocm6_1-shared-with-deps-pre-cxx11
--- a/.github/workflows/generated-linux-binary-manywheel-main.yml
+++ b/.github/workflows/generated-linux-binary-manywheel-main.yml
@ -44,7 +44,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-2.4
      DESIRED_PYTHON: "3.8"
      build_name: manywheel-py3_8-cuda11_8
      build_environment: linux-binary-manywheel
@ -64,7 +64,7 @@ jobs:
      DESIRED_CUDA: cu118
      GPU_ARCH_VERSION: 11.8
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-2.4
      DESIRED_PYTHON: "3.8"
      build_name: manywheel-py3_8-cuda11_8
      build_environment: linux-binary-manywheel
@ -84,7 +84,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-2.4
      DESIRED_PYTHON: "3.8"
      build_name: manywheel-py3_8-cuda12_1
      build_environment: linux-binary-manywheel
@ -104,7 +104,7 @@ jobs:
      DESIRED_CUDA: cu121
      GPU_ARCH_VERSION: 12.1
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-2.4
      DESIRED_PYTHON: "3.8"
      build_name: manywheel-py3_8-cuda12_1
      build_environment: linux-binary-manywheel
@ -124,7 +124,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-2.4
      DESIRED_PYTHON: "3.8"
      build_name: manywheel-py3_8-cuda12_4
      build_environment: linux-binary-manywheel
@ -144,7 +144,7 @@ jobs:
      DESIRED_CUDA: cu124
      GPU_ARCH_VERSION: 12.4
      GPU_ARCH_TYPE: cuda
-      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-2.4
      DESIRED_PYTHON: "3.8"
      build_name: manywheel-py3_8-cuda12_4
      build_environment: linux-binary-manywheel
--- a/.github/workflows/generated-linux-binary-manywheel-nightly.yml
+++ b/.github/workflows/generated-linux-binary-manywheel-nightly.yml
--- a/.github/workflows/generated-linux-s390x-binary-manywheel-nightly.yml
+++ b/.github/workflows/generated-linux-s390x-binary-manywheel-nightly.yml
@ -48,7 +48,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
-      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
+      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-2.4
      DESIRED_PYTHON: "3.8"
      runs_on: linux.s390x
      ALPINE_IMAGE: "docker.io/s390x/alpine"
@ -69,7 +69,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
-      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
+      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-2.4
      DESIRED_PYTHON: "3.8"
      build_name: manywheel-py3_8-cpu-s390x
      build_environment: linux-s390x-binary-manywheel
@ -91,7 +91,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
-      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
+      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-2.4
      DESIRED_PYTHON: "3.8"
      build_name: manywheel-py3_8-cpu-s390x
    secrets:
@ -111,7 +111,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
-      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
+      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-2.4
      DESIRED_PYTHON: "3.9"
      runs_on: linux.s390x
      ALPINE_IMAGE: "docker.io/s390x/alpine"
@ -132,7 +132,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
-      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
+      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-2.4
      DESIRED_PYTHON: "3.9"
      build_name: manywheel-py3_9-cpu-s390x
      build_environment: linux-s390x-binary-manywheel
@ -154,7 +154,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
-      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
+      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-2.4
      DESIRED_PYTHON: "3.9"
      build_name: manywheel-py3_9-cpu-s390x
    secrets:
@ -174,7 +174,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
-      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
+      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-2.4
      DESIRED_PYTHON: "3.10"
      runs_on: linux.s390x
      ALPINE_IMAGE: "docker.io/s390x/alpine"
@ -195,7 +195,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
-      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
+      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-2.4
      DESIRED_PYTHON: "3.10"
      build_name: manywheel-py3_10-cpu-s390x
      build_environment: linux-s390x-binary-manywheel
@ -217,7 +217,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
-      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
+      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-2.4
      DESIRED_PYTHON: "3.10"
      build_name: manywheel-py3_10-cpu-s390x
    secrets:
@ -237,7 +237,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
-      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
+      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-2.4
      DESIRED_PYTHON: "3.11"
      runs_on: linux.s390x
      ALPINE_IMAGE: "docker.io/s390x/alpine"
@ -258,7 +258,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
-      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
+      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-2.4
      DESIRED_PYTHON: "3.11"
      build_name: manywheel-py3_11-cpu-s390x
      build_environment: linux-s390x-binary-manywheel
@ -280,7 +280,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
-      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
+      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-2.4
      DESIRED_PYTHON: "3.11"
      build_name: manywheel-py3_11-cpu-s390x
    secrets:
@ -300,7 +300,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
-      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
+      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-2.4
      DESIRED_PYTHON: "3.12"
      runs_on: linux.s390x
      ALPINE_IMAGE: "docker.io/s390x/alpine"
@ -321,7 +321,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
-      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
+      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-2.4
      DESIRED_PYTHON: "3.12"
      build_name: manywheel-py3_12-cpu-s390x
      build_environment: linux-s390x-binary-manywheel
@ -343,7 +343,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu-s390x
-      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-main
+      DOCKER_IMAGE: pytorch/manylinuxs390x-builder:cpu-s390x-2.4
      DESIRED_PYTHON: "3.12"
      build_name: manywheel-py3_12-cpu-s390x
    secrets:
--- a/.github/workflows/generated-macos-arm64-binary-conda-nightly.yml
+++ b/.github/workflows/generated-macos-arm64-binary-conda-nightly.yml
@ -77,7 +77,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -89,7 +88,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -141,7 +140,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/conda-builder:cpu-main
+      DOCKER_IMAGE: pytorch/conda-builder:cpu-2.4
      DESIRED_PYTHON: "3.8"
      build_name: conda-py3_8-cpu
      use_s3: False
@ -195,7 +194,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -207,7 +205,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -259,7 +257,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/conda-builder:cpu-main
+      DOCKER_IMAGE: pytorch/conda-builder:cpu-2.4
      DESIRED_PYTHON: "3.9"
      build_name: conda-py3_9-cpu
      use_s3: False
@ -313,7 +311,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -325,7 +322,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -377,7 +374,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/conda-builder:cpu-main
+      DOCKER_IMAGE: pytorch/conda-builder:cpu-2.4
      DESIRED_PYTHON: "3.10"
      build_name: conda-py3_10-cpu
      use_s3: False
@ -431,7 +428,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -443,7 +439,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -495,7 +491,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/conda-builder:cpu-main
+      DOCKER_IMAGE: pytorch/conda-builder:cpu-2.4
      DESIRED_PYTHON: "3.11"
      build_name: conda-py3_11-cpu
      use_s3: False
@ -549,7 +545,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -561,7 +556,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -613,7 +608,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/conda-builder:cpu-main
+      DOCKER_IMAGE: pytorch/conda-builder:cpu-2.4
      DESIRED_PYTHON: "3.12"
      build_name: conda-py3_12-cpu
      use_s3: False
--- a/.github/workflows/generated-macos-arm64-binary-libtorch-cxx11-abi-nightly.yml
+++ b/.github/workflows/generated-macos-arm64-binary-libtorch-cxx11-abi-nightly.yml
@ -81,7 +81,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -93,7 +92,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -145,7 +144,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cpu-main
+      DOCKER_IMAGE: pytorch/libtorch-cxx11-builder:cpu-2.4
      LIBTORCH_VARIANT: shared-with-deps
      DESIRED_DEVTOOLSET: cxx11-abi
      build_name: libtorch-cpu-shared-with-deps-cxx11-abi
--- a/.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml
+++ b/.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml
@ -78,7 +78,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -90,7 +89,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -142,7 +141,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-2.4
      DESIRED_PYTHON: "3.8"
      build_name: wheel-py3_8-cpu
      use_s3: False
@ -197,7 +196,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -209,7 +207,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -261,7 +259,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-2.4
      DESIRED_PYTHON: "3.9"
      build_name: wheel-py3_9-cpu
      use_s3: False
@ -316,7 +314,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -328,7 +325,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -380,7 +377,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-2.4
      DESIRED_PYTHON: "3.10"
      build_name: wheel-py3_10-cpu
      use_s3: False
@ -435,7 +432,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -447,7 +443,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -499,7 +495,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-2.4
      DESIRED_PYTHON: "3.11"
      build_name: wheel-py3_11-cpu
      use_s3: False
@ -554,7 +550,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -566,7 +561,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -618,7 +613,7 @@ jobs:
      #       favor of GPU_ARCH_VERSION
      DESIRED_CUDA: cpu
      GPU_ARCH_TYPE: cpu
-      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main
+      DOCKER_IMAGE: pytorch/manylinux-builder:cpu-2.4
      DESIRED_PYTHON: "3.12"
      build_name: wheel-py3_12-cpu
      use_s3: False
--- a/.github/workflows/generated-windows-binary-conda-nightly.yml
+++ b/.github/workflows/generated-windows-binary-conda-nightly.yml
@ -93,7 +93,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -105,7 +104,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -210,7 +209,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -222,7 +220,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -336,7 +334,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -348,7 +345,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -454,7 +451,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -466,7 +462,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -581,7 +577,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -593,7 +588,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -699,7 +694,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -711,7 +705,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -826,7 +820,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -838,7 +831,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -944,7 +937,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -956,7 +948,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1070,7 +1062,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -1082,7 +1073,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1187,7 +1178,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -1199,7 +1189,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1313,7 +1303,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -1325,7 +1314,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1431,7 +1420,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -1443,7 +1431,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1558,7 +1546,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -1570,7 +1557,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1676,7 +1663,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -1688,7 +1674,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1803,7 +1789,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -1815,7 +1800,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1921,7 +1906,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -1933,7 +1917,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2047,7 +2031,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -2059,7 +2042,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2164,7 +2147,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -2176,7 +2158,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2290,7 +2272,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -2302,7 +2283,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2408,7 +2389,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -2420,7 +2400,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2535,7 +2515,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -2547,7 +2526,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2653,7 +2632,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -2665,7 +2643,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2780,7 +2758,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -2792,7 +2769,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2898,7 +2875,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -2910,7 +2886,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3024,7 +3000,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -3036,7 +3011,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3141,7 +3116,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -3153,7 +3127,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3267,7 +3241,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -3279,7 +3252,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3385,7 +3358,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -3397,7 +3369,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3512,7 +3484,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -3524,7 +3495,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3630,7 +3601,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -3642,7 +3612,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3757,7 +3727,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -3769,7 +3738,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3875,7 +3844,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -3887,7 +3855,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4001,7 +3969,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -4013,7 +3980,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4118,7 +4085,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -4130,7 +4096,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4244,7 +4210,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -4256,7 +4221,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4362,7 +4327,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -4374,7 +4338,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4489,7 +4453,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -4501,7 +4464,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4607,7 +4570,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -4619,7 +4581,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4734,7 +4696,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -4746,7 +4707,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4852,7 +4813,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -4864,7 +4824,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-windows-binary-libtorch-debug-main.yml
+++ b/.github/workflows/generated-windows-binary-libtorch-debug-main.yml
@ -90,7 +90,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -102,7 +101,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -211,7 +210,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -223,7 +221,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml
+++ b/.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml
@ -97,7 +97,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -109,7 +108,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -218,7 +217,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -230,7 +228,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -352,7 +350,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -364,7 +361,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -474,7 +471,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -486,7 +482,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -609,7 +605,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -621,7 +616,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -731,7 +726,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -743,7 +737,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -866,7 +860,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -878,7 +871,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -988,7 +981,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -1000,7 +992,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-windows-binary-libtorch-release-main.yml
+++ b/.github/workflows/generated-windows-binary-libtorch-release-main.yml
@ -90,7 +90,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -102,7 +101,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -211,7 +210,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -223,7 +221,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-windows-binary-libtorch-release-nightly.yml
+++ b/.github/workflows/generated-windows-binary-libtorch-release-nightly.yml
@ -97,7 +97,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -109,7 +108,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -218,7 +217,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -230,7 +228,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -352,7 +350,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -364,7 +361,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -474,7 +471,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -486,7 +482,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -609,7 +605,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -621,7 +616,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -731,7 +726,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -743,7 +737,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -866,7 +860,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -878,7 +871,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -988,7 +981,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -1000,7 +992,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-windows-binary-wheel-nightly.yml
+++ b/.github/workflows/generated-windows-binary-wheel-nightly.yml
@ -94,7 +94,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -106,7 +105,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -211,7 +210,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -223,7 +221,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -338,7 +336,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -350,7 +347,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -456,7 +453,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -468,7 +464,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -584,7 +580,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -596,7 +591,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -702,7 +697,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -714,7 +708,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -830,7 +824,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -842,7 +835,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -948,7 +941,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -960,7 +952,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1075,7 +1067,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -1087,7 +1078,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1192,7 +1183,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -1204,7 +1194,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1319,7 +1309,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -1331,7 +1320,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1437,7 +1426,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -1449,7 +1437,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1565,7 +1553,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -1577,7 +1564,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1683,7 +1670,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -1695,7 +1681,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1811,7 +1797,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -1823,7 +1808,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1929,7 +1914,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -1941,7 +1925,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2056,7 +2040,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -2068,7 +2051,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2173,7 +2156,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -2185,7 +2167,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2300,7 +2282,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -2312,7 +2293,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2418,7 +2399,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -2430,7 +2410,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2546,7 +2526,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -2558,7 +2537,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2664,7 +2643,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -2676,7 +2654,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2792,7 +2770,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -2804,7 +2781,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2910,7 +2887,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -2922,7 +2898,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3037,7 +3013,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -3049,7 +3024,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3154,7 +3129,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -3166,7 +3140,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3281,7 +3255,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -3293,7 +3266,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3399,7 +3372,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -3411,7 +3383,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3527,7 +3499,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -3539,7 +3510,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3645,7 +3616,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -3657,7 +3627,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3773,7 +3743,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -3785,7 +3754,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3891,7 +3860,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -3903,7 +3871,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4018,7 +3986,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -4030,7 +3997,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4135,7 +4102,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -4147,7 +4113,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4262,7 +4228,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -4274,7 +4239,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4380,7 +4345,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -4392,7 +4356,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4508,7 +4472,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -4520,7 +4483,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4626,7 +4589,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -4638,7 +4600,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4754,7 +4716,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -4766,7 +4727,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4872,7 +4833,6 @@ jobs:
      - name: Checkout PyTorch
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -4884,7 +4844,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.4
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/lint-bc.yml
+++ b/.github/workflows/lint-bc.yml
@ -15,7 +15,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Run BC Lint Action
-        uses: pytorch/test-infra/.github/actions/bc-lint@main
+        uses: pytorch/test-infra/.github/actions/bc-lint@release/2.4
        with:
          repo: ${{ github.event.pull_request.head.repo.full_name }}
          base_sha: ${{ github.event.pull_request.base.sha }}
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@ -16,7 +16,7 @@ permissions: read-all
 # When any other step fails, it's job will be retried once by retryBot.
 jobs:
  lintrunner-clang:
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@release/2.4
    with:
      timeout: 120
      runner: linux.2xlarge
@ -32,7 +32,7 @@ jobs:
        .github/scripts/lintrunner.sh

  lintrunner-noclang:
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@release/2.4
    with:
      timeout: 120
      runner: linux.2xlarge
@ -47,7 +47,7 @@ jobs:
        .github/scripts/lintrunner.sh

  quick-checks:
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@release/2.4
    with:
      runner: linux.2xlarge
      docker-image: pytorch-linux-focal-linter
@ -88,7 +88,7 @@ jobs:
    if: github.event_name == 'pull_request' && !contains(github.event.pull_request.labels.*.name, 'skip-pr-sanity-checks')
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          submodules: false
          fetch-depth: -1
@ -101,7 +101,7 @@ jobs:
          bash .github/scripts/pr-sanity-check.sh

  workflow-checks:
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@release/2.4
    with:
      runner: linux.2xlarge
      docker-image: pytorch-linux-focal-linter
@ -112,6 +112,7 @@ jobs:
        # The generic Linux job chooses to use base env, not the one setup by the image
        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
        conda activate "${CONDA_ENV}"
+        export RELEASE_VERSION_TAG="2.4"

        # Regenerate workflows
        .github/scripts/generate_ci_workflows.py
@ -137,7 +138,7 @@ jobs:
        exit $RC

  toc:
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@release/2.4
    with:
      runner: linux.2xlarge
      docker-image: pytorch-linux-focal-linter
@ -175,7 +176,7 @@ jobs:
  test-tools:
    name: Test tools
    if: ${{ github.repository == 'pytorch/pytorch' }}
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@release/2.4
    with:
      runner: linux.2xlarge
      docker-image: pytorch-linux-focal-linter
@ -196,7 +197,7 @@ jobs:
    runs-on: linux.20_04.4x
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          submodules: false
          fetch-depth: 1
@ -226,7 +227,7 @@ jobs:
      # [see note: pytorch repo ref]
      # deep clone (fetch-depth 0) required, to allow us to use git log
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          submodules: false
          fetch-depth: 1
--- a/.github/workflows/llm_td_retrieval.yml
+++ b/.github/workflows/llm_td_retrieval.yml
@ -116,5 +116,5 @@ jobs:
          AWS_REGION: ""

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.4
        if: always()
--- a/.github/workflows/nightly-rockset-uploads.yml
+++ b/.github/workflows/nightly-rockset-uploads.yml
@ -21,7 +21,7 @@ jobs:
    environment: upload-stats
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          fetch-depth: 1
          submodules: false
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@ -41,7 +41,7 @@ jobs:
    environment: update-commit-hash
    steps:
      - name: update-vision-commit-hash
-        uses: pytorch/test-infra/.github/actions/update-commit-hash@main
+        uses: pytorch/test-infra/.github/actions/update-commit-hash@release/2.4
        if: ${{ github.event_name == 'schedule' }}
        with:
          repo-name: vision
@ -56,7 +56,7 @@ jobs:
    environment: update-commit-hash
    steps:
      - name: update-audio-commit-hash
-        uses: pytorch/test-infra/.github/actions/update-commit-hash@main
+        uses: pytorch/test-infra/.github/actions/update-commit-hash@release/2.4
        if: ${{ github.event_name == 'schedule' }}
        with:
          repo-name: audio
@ -71,7 +71,7 @@ jobs:
    environment: update-commit-hash
    steps:
      - name: update-executorch-commit-hash
-        uses: pytorch/test-infra/.github/actions/update-commit-hash@main
+        uses: pytorch/test-infra/.github/actions/update-commit-hash@release/2.4
        if: ${{ github.event_name == 'schedule' }}
        with:
          repo-name: executorch
--- a/.github/workflows/target-determination-indexer.yml
+++ b/.github/workflows/target-determination-indexer.yml
@ -24,7 +24,7 @@ jobs:

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.4
        with:
          docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
          working-directory: pytorch
@ -39,13 +39,13 @@ jobs:
          echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}"

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.4
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
        id: install-nvidia-driver
-        uses: pytorch/test-infra/.github/actions/setup-nvidia@main
+        uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.4

      - name: Clone CodeLlama
        uses: actions/checkout@v3
@ -136,7 +136,7 @@ jobs:
            "s3://target-determinator-assets/indexes/latest/${ZIP_NAME}"

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.4
        if: always()

 concurrency:
--- a/.github/workflows/target_determination.yml
+++ b/.github/workflows/target_determination.yml
@ -14,7 +14,7 @@ jobs:
      # checkout because when we run this action we don't *have* a local
      # checkout. In other cases you should prefer a local checkout.
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          submodules: false

--- a/.github/workflows/update-viablestrict.yml
+++ b/.github/workflows/update-viablestrict.yml
@ -16,7 +16,7 @@ jobs:
    environment: ${{ (github.event_name == 'schedule') && 'mergebot' || '' }}
    steps:
      - name: Update viable/strict
-        uses: pytorch/test-infra/.github/actions/update-viablestrict@main
+        uses: pytorch/test-infra/.github/actions/update-viablestrict@release/2.4
        with:
          repository: pytorch/pytorch
          stable-branch: viable/strict
--- a/.github/workflows/update_pytorch_labels.yml
+++ b/.github/workflows/update_pytorch_labels.yml
@ -17,7 +17,7 @@ jobs:
      contents: read
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          fetch-depth: 1
          submodules: false
--- a/.github/workflows/upload-alerts.yml
+++ b/.github/workflows/upload-alerts.yml
@ -44,7 +44,7 @@ jobs:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-        uses: pytorch/test-infra/.github/actions/upload-alerts@main
+        uses: pytorch/test-infra/.github/actions/upload-alerts@release/2.4
        with:
          alerts: '${{ steps.alert_creation_step.outputs.script-output }}'
          organization: "pytorch"
--- a/.github/workflows/upload-test-stats.yml
+++ b/.github/workflows/upload-test-stats.yml
@ -39,7 +39,7 @@ jobs:
        run: echo "${TRIGGERING_WORKFLOW}"

      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4

      - uses: actions/setup-python@v4
        with:
--- a/.github/workflows/upload-torch-dynamo-perf-stats.yml
+++ b/.github/workflows/upload-torch-dynamo-perf-stats.yml
@ -29,7 +29,7 @@ jobs:
    name: Upload dynamo performance stats for ${{ github.event.workflow_run.id }}, attempt ${{ github.event.workflow_run.run_attempt }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          submodules: false
          fetch-depth: 1
--- a/.github/workflows/upload_test_stats_intermediate.yml
+++ b/.github/workflows/upload_test_stats_intermediate.yml
@ -17,7 +17,7 @@ jobs:
    environment: upload-stats
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.4
        with:
          fetch-depth: 1
          submodules: false
--- a/.github/workflows/weekly.yml
+++ b/.github/workflows/weekly.yml
@ -21,7 +21,7 @@ jobs:
          fetch-depth: 0
      - name: update-xla-commit-hash
        continue-on-error: true
-        uses: pytorch/test-infra/.github/actions/update-commit-hash@main
+        uses: pytorch/test-infra/.github/actions/update-commit-hash@release/2.4
        with:
          repo-name: xla
          branch: master
@ -30,7 +30,7 @@ jobs:
          updatebot-token: ${{ secrets.UPDATEBOT_TOKEN }}
          pytorchbot-token: ${{ secrets.GH_PYTORCHBOT_TOKEN }}
      - name: update-triton-commit-hash
-        uses: pytorch/test-infra/.github/actions/update-commit-hash@main
+        uses: pytorch/test-infra/.github/actions/update-commit-hash@release/2.4
        with:
          repo-owner: openai
          repo-name: triton
--- a/README.md
+++ b/README.md
@ -207,7 +207,7 @@ pip install -r requirements.txt
 **On Linux**

 ```bash
-conda install intel::mkl-static intel::mkl-include
+pip install mkl-static mkl-include
 # CUDA only: Add LAPACK support for the GPU if needed
 conda install -c pytorch magma-cuda121  # or the magma-cuda* that matches your CUDA version from https://anaconda.org/pytorch/repo

@ -221,7 +221,7 @@ make triton

 ```bash
 # Add this package on intel x86 processor machines only
-conda install intel::mkl-static intel::mkl-include
+pip install mkl-static mkl-include
 # Add these packages if torch.distributed is needed
 conda install pkg-config libuv
 ```
@ -229,7 +229,7 @@ conda install pkg-config libuv
 **On Windows**

 ```bash
-conda install intel::mkl-static intel::mkl-include
+pip install mkl-static mkl-include
 # Add these packages if torch.distributed is needed.
 # Distributed package support on Windows is a prototype feature and is subject to changes.
 conda install -c conda-forge libuv=1.39
--- a/aten/src/ATen/Context.cpp
+++ b/aten/src/ATen/Context.cpp
@ -263,7 +263,24 @@ void Context::setLinalgPreferredBackend(at::LinalgBackend b) {
  }
 }

-at::BlasBackend Context::blasPreferredBackend() const {
+at::BlasBackend Context::blasPreferredBackend() {
+#ifdef USE_ROCM
+  if (blas_preferred_backend == at::BlasBackend::Cublaslt) {
+    static const bool hipblaslt_unsupported = []() {
+      static const std::vector<std::string> archs = {"gfx90a", "gfx940", "gfx941", "gfx942"};
+      for (auto index = 0; index < at::getNumGPUs(); index++) {
+        if (!detail::getCUDAHooks().isGPUArch(index, archs)) {
+          TORCH_WARN_ONCE(
+            "Attempting to use hipBLASLt on an unsupported architecture! "
+            "Overriding blas backend to hipblas");
+          return true;
+        }
+      }
+      return false;
+    }();
+    if (hipblaslt_unsupported) blas_preferred_backend = at::BlasBackend::Cublas;
+  }
+#endif
  return blas_preferred_backend;
 }

--- a/aten/src/ATen/Context.h
+++ b/aten/src/ATen/Context.h
@ -217,7 +217,7 @@ class TORCH_API Context {
  at::LinalgBackend linalgPreferredBackend() const;
  void setLinalgPreferredBackend(at::LinalgBackend);

-  at::BlasBackend blasPreferredBackend() const;
+  at::BlasBackend blasPreferredBackend();
  void setBlasPreferredBackend(at::BlasBackend);

  // Note [Enabling Deterministic Operations]
@ -364,7 +364,7 @@ class TORCH_API Context {
  bool enabled_flashSDP = true;
  bool enabled_mem_efficientSDP = true;
  bool enabled_mathSDP = true;
-  bool enabled_cudnnSDP = true;
+  bool enabled_cudnnSDP = false;
 #ifdef USE_ROCM
  bool benchmark_cudnn = true;
 #else
--- a/aten/src/ATen/core/MetaFallbackKernel.cpp
+++ b/aten/src/ATen/core/MetaFallbackKernel.cpp
@ -17,7 +17,7 @@ static void metaFallback(
      "while using an operator with PT2 compilation APIs (torch.compile/torch.export); "
      "in order to use this operator with those APIs you'll need to add a fake impl. "
      "Please see the following for next steps:  "
-      "https://pytorch.org/docs/main/notes/custom_operators.html");
+      "https://pytorch.org/tutorials/advanced/custom_ops_landing_page.html");
 }

 TORCH_LIBRARY_IMPL(_, Meta, m) {
--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_double_vsx.h
+++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_double_vsx.h
@ -554,6 +554,30 @@ Vectorized<ComplexDbl> inline minimum(
  // return _mm256_or_ps(min, isnan);
 }

+template <>
+Vectorized<ComplexDbl> C10_ALWAYS_INLINE operator+(const Vectorized<ComplexDbl>& a, const Vectorized<ComplexDbl>& b) {
+  return Vectorized<ComplexDbl>{vec_add(a.vec0(), b.vec0()), vec_add(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<ComplexDbl> C10_ALWAYS_INLINE operator-(const Vectorized<ComplexDbl>& a, const Vectorized<ComplexDbl>& b) {
+  return Vectorized<ComplexDbl>{vec_sub(a.vec0(), b.vec0()), vec_sub(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<ComplexDbl> C10_ALWAYS_INLINE operator&(const Vectorized<ComplexDbl>& a, const Vectorized<ComplexDbl>& b) {
+  return Vectorized<ComplexDbl>{vec_and(a.vec0(), b.vec0()), vec_and(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<ComplexDbl> C10_ALWAYS_INLINE operator|(const Vectorized<ComplexDbl>& a, const Vectorized<ComplexDbl>& b) {
+  return Vectorized<ComplexDbl>{vec_or(a.vec0(), b.vec0()), vec_or(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<ComplexDbl> C10_ALWAYS_INLINE operator^(const Vectorized<ComplexDbl>& a, const Vectorized<ComplexDbl>& b) {
+  return Vectorized<ComplexDbl>{vec_xor(a.vec0(), b.vec0()), vec_xor(a.vec1(), b.vec1())};
+}

 } // namespace
 } // namespace vec
--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h
+++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h
@ -55,6 +55,13 @@ class Vectorized<ComplexFlt> {
    _vec1 = vfloat32{val3.real(), val3.imag(), val4.real(), val4.imag()};
  }

+  C10_ALWAYS_INLINE const vec_internal_type& vec0() const {
+    return _vec0;
+  }
+  C10_ALWAYS_INLINE const vec_internal_type& vec1() const {
+    return _vec1;
+  }
+
  template <uint64_t mask>
  static std::enable_if_t<blendChoiceComplex(mask) == 0, Vectorized<ComplexFlt>>
      C10_ALWAYS_INLINE
@ -623,6 +630,31 @@ Vectorized<ComplexFlt> inline minimum(
  // return _mm256_or_ps(min, isnan);
 }

+template <>
+Vectorized<ComplexFlt> C10_ALWAYS_INLINE operator+(const Vectorized<ComplexFlt>& a, const Vectorized<ComplexFlt>& b) {
+  return Vectorized<ComplexFlt>{vec_add(a.vec0(), b.vec0()), vec_add(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<ComplexFlt> C10_ALWAYS_INLINE operator-(const Vectorized<ComplexFlt>& a, const Vectorized<ComplexFlt>& b) {
+  return Vectorized<ComplexFlt>{vec_sub(a.vec0(), b.vec0()), vec_sub(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<ComplexFlt> C10_ALWAYS_INLINE operator&(const Vectorized<ComplexFlt>& a, const Vectorized<ComplexFlt>& b) {
+  return Vectorized<ComplexFlt>{vec_and(a.vec0(), b.vec0()), vec_and(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<ComplexFlt> C10_ALWAYS_INLINE operator|(const Vectorized<ComplexFlt>& a, const Vectorized<ComplexFlt>& b) {
+  return Vectorized<ComplexFlt>{vec_or(a.vec0(), b.vec0()), vec_or(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<ComplexFlt> C10_ALWAYS_INLINE operator^(const Vectorized<ComplexFlt>& a, const Vectorized<ComplexFlt>& b) {
+  return Vectorized<ComplexFlt>{vec_xor(a.vec0(), b.vec0()), vec_xor(a.vec1(), b.vec1())};
+}
+
 } // namespace
 } // namespace vec
 } // namespace at
--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_double_vsx.h
+++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_double_vsx.h
@ -433,6 +433,42 @@ Vectorized<double> inline minimum(
    const Vectorized<double>& b) {
  return a.minimum(b);
 }
+
+template <>
+Vectorized<double> C10_ALWAYS_INLINE operator+(const Vectorized<double>& a, const Vectorized<double>& b) {
+  return Vectorized<double>{vec_add(a.vec0(), b.vec0()), vec_add(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<double> C10_ALWAYS_INLINE operator-(const Vectorized<double>& a, const Vectorized<double>& b) {
+  return Vectorized<double>{vec_sub(a.vec0(), b.vec0()), vec_sub(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<double> C10_ALWAYS_INLINE operator*(const Vectorized<double>& a, const Vectorized<double>& b) {
+  return Vectorized<double>{vec_mul(a.vec0(), b.vec0()), vec_mul(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<double> C10_ALWAYS_INLINE operator/(const Vectorized<double>& a, const Vectorized<double>& b) {
+  return Vectorized<double>{vec_div(a.vec0(), b.vec0()), vec_div(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<double> C10_ALWAYS_INLINE operator&(const Vectorized<double>& a, const Vectorized<double>& b) {
+  return Vectorized<double>{vec_and(a.vec0(), b.vec0()), vec_and(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<double> C10_ALWAYS_INLINE operator|(const Vectorized<double>& a, const Vectorized<double>& b) {
+  return Vectorized<double>{vec_or(a.vec0(), b.vec0()), vec_or(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<double> C10_ALWAYS_INLINE operator^(const Vectorized<double>& a, const Vectorized<double>& b) {
+  return Vectorized<double>{vec_xor(a.vec0(), b.vec0()), vec_xor(a.vec1(), b.vec1())};
+}
+
 } // namespace
 } // namespace vec
 } // namespace at
--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_float_vsx.h
+++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_float_vsx.h
@ -456,6 +456,41 @@ Vectorized<float> inline minimum(const Vectorized<float>& a, const Vectorized<fl
  return a.minimum(b);
 }

+template <>
+Vectorized<float> C10_ALWAYS_INLINE operator+(const Vectorized<float>& a, const Vectorized<float>& b) {
+  return Vectorized<float>{vec_add(a.vec0(), b.vec0()), vec_add(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<float> C10_ALWAYS_INLINE operator-(const Vectorized<float>& a, const Vectorized<float>& b) {
+  return Vectorized<float>{vec_sub(a.vec0(), b.vec0()), vec_sub(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<float> C10_ALWAYS_INLINE operator*(const Vectorized<float>& a, const Vectorized<float>& b) {
+  return Vectorized<float>{vec_mul(a.vec0(), b.vec0()), vec_mul(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<float> C10_ALWAYS_INLINE operator/(const Vectorized<float>& a, const Vectorized<float>& b) {
+  return Vectorized<float>{vec_div(a.vec0(), b.vec0()), vec_div(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<float> C10_ALWAYS_INLINE operator&(const Vectorized<float>& a, const Vectorized<float>& b) {
+  return Vectorized<float>{vec_and(a.vec0(), b.vec0()), vec_and(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<float> C10_ALWAYS_INLINE operator|(const Vectorized<float>& a, const Vectorized<float>& b) {
+  return Vectorized<float>{vec_or(a.vec0(), b.vec0()), vec_or(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<float> C10_ALWAYS_INLINE operator^(const Vectorized<float>& a, const Vectorized<float>& b) {
+  return Vectorized<float>{vec_xor(a.vec0(), b.vec0()), vec_xor(a.vec1(), b.vec1())};
+}
+
 } // namespace
 } // namespace vec
 } // namespace at
--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int16_vsx.h
+++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int16_vsx.h
@ -362,6 +362,40 @@ Vectorized<int16_t> inline minimum(
  return a.minimum(b);
 }

+template <>
+Vectorized<int16_t> C10_ALWAYS_INLINE operator+(const Vectorized<int16_t>& a, const Vectorized<int16_t>& b) {
+  return Vectorized<int16_t>{vec_add(a.vec0(), b.vec0()), vec_add(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<int16_t> C10_ALWAYS_INLINE operator-(const Vectorized<int16_t>& a, const Vectorized<int16_t>& b) {
+  return Vectorized<int16_t>{vec_sub(a.vec0(), b.vec0()), vec_sub(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<int16_t> C10_ALWAYS_INLINE operator*(const Vectorized<int16_t>& a, const Vectorized<int16_t>& b) {
+  return Vectorized<int16_t>{vec_mul(a.vec0(), b.vec0()), vec_mul(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<int16_t> C10_ALWAYS_INLINE operator/(const Vectorized<int16_t>& a, const Vectorized<int16_t>& b) {
+  return Vectorized<int16_t>{a.vec0()/b.vec0(), a.vec1()/b.vec1()};
+}
+
+template <>
+Vectorized<int16_t> C10_ALWAYS_INLINE operator&(const Vectorized<int16_t>& a, const Vectorized<int16_t>& b) {
+  return Vectorized<int16_t>{vec_and(a.vec0(), b.vec0()), vec_and(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<int16_t> C10_ALWAYS_INLINE operator|(const Vectorized<int16_t>& a, const Vectorized<int16_t>& b) {
+  return Vectorized<int16_t>{vec_or(a.vec0(), b.vec0()), vec_or(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<int16_t> C10_ALWAYS_INLINE operator^(const Vectorized<int16_t>& a, const Vectorized<int16_t>& b) {
+  return Vectorized<int16_t>{vec_xor(a.vec0(), b.vec0()), vec_xor(a.vec1(), b.vec1())};
+}

 } // namespace
 } // namespace vec
--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int32_vsx.h
+++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int32_vsx.h
@ -293,6 +293,41 @@ Vectorized<int32_t> inline minimum(
  return a.minimum(b);
 }

+template <>
+Vectorized<int32_t> C10_ALWAYS_INLINE operator+(const Vectorized<int32_t>& a, const Vectorized<int32_t>& b) {
+  return Vectorized<int32_t>{vec_add(a.vec0(), b.vec0()), vec_add(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<int32_t> C10_ALWAYS_INLINE operator-(const Vectorized<int32_t>& a, const Vectorized<int32_t>& b) {
+  return Vectorized<int32_t>{vec_sub(a.vec0(), b.vec0()), vec_sub(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<int32_t> C10_ALWAYS_INLINE operator*(const Vectorized<int32_t>& a, const Vectorized<int32_t>& b) {
+  return Vectorized<int32_t>{vec_mul(a.vec0(), b.vec0()), vec_mul(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<int32_t> C10_ALWAYS_INLINE operator/(const Vectorized<int32_t>& a, const Vectorized<int32_t>& b) {
+  return Vectorized<int32_t>{a.vec0()/b.vec0(), a.vec1()/b.vec1()};
+}
+
+template <>
+Vectorized<int32_t> C10_ALWAYS_INLINE operator&(const Vectorized<int32_t>& a, const Vectorized<int32_t>& b) {
+  return Vectorized<int32_t>{vec_and(a.vec0(), b.vec0()), vec_and(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<int32_t> C10_ALWAYS_INLINE operator|(const Vectorized<int32_t>& a, const Vectorized<int32_t>& b) {
+  return Vectorized<int32_t>{vec_or(a.vec0(), b.vec0()), vec_or(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<int32_t> C10_ALWAYS_INLINE operator^(const Vectorized<int32_t>& a, const Vectorized<int32_t>& b) {
+  return Vectorized<int32_t>{vec_xor(a.vec0(), b.vec0()), vec_xor(a.vec1(), b.vec1())};
+}
+
 } // namespace
 } // namespace vec
 } // namespace at
--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int64_vsx.h
+++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int64_vsx.h
@ -246,6 +246,41 @@ Vectorized<int64_t> inline minimum(
  return a.minimum(b);
 }

+template <>
+Vectorized<int64_t> C10_ALWAYS_INLINE operator+(const Vectorized<int64_t>& a, const Vectorized<int64_t>& b) {
+  return Vectorized<int64_t>{vec_add(a.vec0(), b.vec0()), vec_add(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<int64_t> C10_ALWAYS_INLINE operator-(const Vectorized<int64_t>& a, const Vectorized<int64_t>& b) {
+  return Vectorized<int64_t>{vec_sub(a.vec0(), b.vec0()), vec_sub(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<int64_t> C10_ALWAYS_INLINE operator*(const Vectorized<int64_t>& a, const Vectorized<int64_t>& b) {
+  return Vectorized<int64_t>{vec_mul(a.vec0(), b.vec0()), vec_mul(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<int64_t> C10_ALWAYS_INLINE operator/(const Vectorized<int64_t>& a, const Vectorized<int64_t>& b) {
+  return Vectorized<int64_t>{vec_div(a.vec0(), b.vec0()), vec_div(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<int64_t> C10_ALWAYS_INLINE operator&(const Vectorized<int64_t>& a, const Vectorized<int64_t>& b) {
+  return Vectorized<int64_t>{vec_and(a.vec0(), b.vec0()), vec_and(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<int64_t> C10_ALWAYS_INLINE operator|(const Vectorized<int64_t>& a, const Vectorized<int64_t>& b) {
+  return Vectorized<int64_t>{vec_or(a.vec0(), b.vec0()), vec_or(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<int64_t> C10_ALWAYS_INLINE operator^(const Vectorized<int64_t>& a, const Vectorized<int64_t>& b) {
+  return Vectorized<int64_t>{vec_xor(a.vec0(), b.vec0()), vec_xor(a.vec1(), b.vec1())};
+}
+
 } // namespace
 } // namespace vec
 } // namespace at
--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_qint32_vsx.h
+++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_qint32_vsx.h
@ -240,6 +240,42 @@ Vectorized<c10::qint32> inline minimum(
    const Vectorized<c10::qint32>& b) {
  return a.minimum(b);
 }
+
+template <>
+Vectorized<c10::qint32> C10_ALWAYS_INLINE operator+(const Vectorized<c10::qint32>& a, const Vectorized<c10::qint32>& b) {
+  return Vectorized<c10::qint32>{vec_add(a.vec0(), b.vec0()), vec_add(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<c10::qint32> C10_ALWAYS_INLINE operator-(const Vectorized<c10::qint32>& a, const Vectorized<c10::qint32>& b) {
+  return Vectorized<c10::qint32>{vec_sub(a.vec0(), b.vec0()), vec_sub(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<c10::qint32> C10_ALWAYS_INLINE operator*(const Vectorized<c10::qint32>& a, const Vectorized<c10::qint32>& b) {
+  return Vectorized<c10::qint32>{vec_mul(a.vec0(), b.vec0()), vec_mul(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<c10::qint32> C10_ALWAYS_INLINE operator/(const Vectorized<c10::qint32>& a, const Vectorized<c10::qint32>& b) {
+  return Vectorized<c10::qint32>{a.vec0()/b.vec0(), a.vec1()/b.vec1()};
+}
+
+template <>
+Vectorized<c10::qint32> C10_ALWAYS_INLINE operator&(const Vectorized<c10::qint32>& a, const Vectorized<c10::qint32>& b) {
+  return Vectorized<c10::qint32>{vec_and(a.vec0(), b.vec0()), vec_and(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<c10::qint32> C10_ALWAYS_INLINE operator|(const Vectorized<c10::qint32>& a, const Vectorized<c10::qint32>& b) {
+  return Vectorized<c10::qint32>{vec_or(a.vec0(), b.vec0()), vec_or(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<c10::qint32> C10_ALWAYS_INLINE operator^(const Vectorized<c10::qint32>& a, const Vectorized<c10::qint32>& b) {
+  return Vectorized<c10::qint32>{vec_xor(a.vec0(), b.vec0()), vec_xor(a.vec1(), b.vec1())};
+}
+
 } // namespace
 } // namespace vec
 } // namespace at
--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_qint8_vsx.h
+++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_qint8_vsx.h
@ -442,6 +442,42 @@ Vectorized<c10::qint8> inline minimum(
    const Vectorized<c10::qint8>& b) {
  return a.minimum(b);
 }
+
+template <>
+Vectorized<c10::qint8> C10_ALWAYS_INLINE operator+(const Vectorized<c10::qint8>& a, const Vectorized<c10::qint8>& b) {
+  return Vectorized<c10::qint8>{vec_add(a.vec0(), b.vec0()), vec_add(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<c10::qint8> C10_ALWAYS_INLINE operator-(const Vectorized<c10::qint8>& a, const Vectorized<c10::qint8>& b) {
+  return Vectorized<c10::qint8>{vec_sub(a.vec0(), b.vec0()), vec_sub(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<c10::qint8> C10_ALWAYS_INLINE operator*(const Vectorized<c10::qint8>& a, const Vectorized<c10::qint8>& b) {
+  return Vectorized<c10::qint8>{vec_mul(a.vec0(), b.vec0()), vec_mul(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<c10::qint8> C10_ALWAYS_INLINE operator/(const Vectorized<c10::qint8>& a, const Vectorized<c10::qint8>& b) {
+  return Vectorized<c10::qint8>{a.vec0()/b.vec0(), a.vec1()/b.vec1()};
+}
+
+template <>
+Vectorized<c10::qint8> C10_ALWAYS_INLINE operator&(const Vectorized<c10::qint8>& a, const Vectorized<c10::qint8>& b) {
+  return Vectorized<c10::qint8>{vec_and(a.vec0(), b.vec0()), vec_and(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<c10::qint8> C10_ALWAYS_INLINE operator|(const Vectorized<c10::qint8>& a, const Vectorized<c10::qint8>& b) {
+  return Vectorized<c10::qint8>{vec_or(a.vec0(), b.vec0()), vec_or(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<c10::qint8> C10_ALWAYS_INLINE operator^(const Vectorized<c10::qint8>& a, const Vectorized<c10::qint8>& b) {
+  return Vectorized<c10::qint8>{vec_xor(a.vec0(), b.vec0()), vec_xor(a.vec1(), b.vec1())};
+}
+
 } // namespace
 } // namespace vec
 } // namespace at
--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_quint8_vsx.h
+++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_quint8_vsx.h
@ -461,6 +461,41 @@ Vectorized<c10::quint8> inline minimum(
  return a.minimum(b);
 }

+template <>
+Vectorized<c10::quint8> C10_ALWAYS_INLINE operator+(const Vectorized<c10::quint8>& a, const Vectorized<c10::quint8>& b) {
+  return Vectorized<c10::quint8>{vec_add(a.vec0(), b.vec0()), vec_add(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<c10::quint8> C10_ALWAYS_INLINE operator-(const Vectorized<c10::quint8>& a, const Vectorized<c10::quint8>& b) {
+  return Vectorized<c10::quint8>{vec_sub(a.vec0(), b.vec0()), vec_sub(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<c10::quint8> C10_ALWAYS_INLINE operator*(const Vectorized<c10::quint8>& a, const Vectorized<c10::quint8>& b) {
+  return Vectorized<c10::quint8>{vec_mul(a.vec0(), b.vec0()), vec_mul(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<c10::quint8> C10_ALWAYS_INLINE operator/(const Vectorized<c10::quint8>& a, const Vectorized<c10::quint8>& b) {
+  return Vectorized<c10::quint8>{a.vec0()/b.vec0(), a.vec1()/b.vec1()};
+}
+
+template <>
+Vectorized<c10::quint8> C10_ALWAYS_INLINE operator&(const Vectorized<c10::quint8>& a, const Vectorized<c10::quint8>& b) {
+  return Vectorized<c10::quint8>{vec_and(a.vec0(), b.vec0()), vec_and(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<c10::quint8> C10_ALWAYS_INLINE operator|(const Vectorized<c10::quint8>& a, const Vectorized<c10::quint8>& b) {
+  return Vectorized<c10::quint8>{vec_or(a.vec0(), b.vec0()), vec_or(a.vec1(), b.vec1())};
+}
+
+template <>
+Vectorized<c10::quint8> C10_ALWAYS_INLINE operator^(const Vectorized<c10::quint8>& a, const Vectorized<c10::quint8>& b) {
+  return Vectorized<c10::quint8>{vec_xor(a.vec0(), b.vec0()), vec_xor(a.vec1(), b.vec1())};
+}
+
 } // namespace
 } // namespace vec
 } // namespace at
--- a/aten/src/ATen/cuda/CUDAGeneratorImpl.cpp
+++ b/aten/src/ATen/cuda/CUDAGeneratorImpl.cpp
@ -152,9 +152,6 @@ void CUDAGeneratorState::register_graph(cuda::CUDAGraph* graph) {
 * Unregisters a CUDA graph from the RNG state.
 */
 void CUDAGeneratorState::unregister_graph(cuda::CUDAGraph* graph) {
-  // Ensures that the RNG state is not currently being captured.
-  at::cuda::assertNotCapturing(
-      "Cannot unregister the state during capturing stage.");
  // Verify the graph was previously registered.
  TORCH_CHECK(
      registered_graphs_.find(graph) != registered_graphs_.end(),
--- a/aten/src/ATen/cuda/detail/CUDAHooks.cpp
+++ b/aten/src/ATen/cuda/detail/CUDAHooks.cpp
@ -440,6 +440,20 @@ int CUDAHooks::getNumGPUs() const {
  return at::cuda::device_count();
 }

+#ifdef USE_ROCM
+bool CUDAHooks::isGPUArch(DeviceIndex device_index, const std::vector<std::string>& archs) const {
+  hipDeviceProp_t* prop = at::cuda::getDeviceProperties(device_index);
+  std::string device_arch = prop->gcnArchName;
+  for (std::string arch : archs) {
+      size_t substring = device_arch.find(arch);
+      if (substring != std::string::npos) {
+          return true;
+      }
+  }
+  return false;
+}
+#endif
+
 void CUDAHooks::deviceSynchronize(DeviceIndex device_index) const {
  at::DeviceGuard device_guard(at::Device(at::DeviceType::CUDA, device_index));
  c10::cuda::device_synchronize();
--- a/aten/src/ATen/cuda/detail/CUDAHooks.h
+++ b/aten/src/ATen/cuda/detail/CUDAHooks.h
@ -49,6 +49,9 @@ struct CUDAHooks : public at::CUDAHooksInterface {
  int64_t cuFFTGetPlanCacheSize(DeviceIndex device_index) const override;
  void cuFFTClearPlanCache(DeviceIndex device_index) const override;
  int getNumGPUs() const override;
+#ifdef USE_ROCM
+  bool isGPUArch(DeviceIndex device_index, const std::vector<std::string>& archs) const override;
+#endif
  void deviceSynchronize(DeviceIndex device_index) const override;
 };

--- a/aten/src/ATen/cuda/tunable/GemmCommon.h
+++ b/aten/src/ATen/cuda/tunable/GemmCommon.h
@ -84,11 +84,23 @@ struct GemmParams : OpParams {
    return c10::str(transa, transb, "_", m, "_", n, "_", k);
  }

+  size_t GetSizeA() const {
+    return sizeof(T) * lda * ((transa == 'n' || transa == 'N') ? k : m);
+  }
+
+  size_t GetSizeB() const {
+    return sizeof(T) * ldb * ((transb == 'n' || transb == 'N') ? n : k);
+  }
+
+  size_t GetSizeC() const {
+    return sizeof(T) * ldc * n;
+  }
+
  size_t GetSize(bool duplicate_inputs) const {
-    size_t size = sizeof(T) * ldc * n;
+    size_t size = GetSizeC();
    if (duplicate_inputs) {
-      size += sizeof(T) * lda * ((transa == 'n' || transa == 'N') ? k : m);
-      size += sizeof(T) * ldb * ((transb == 'n' || transb == 'N') ? n : k);
+      size += GetSizeA();
+      size += GetSizeB();
    }
    return size;
  }
@ -98,13 +110,13 @@ struct GemmParams : OpParams {
    *copy = *this;
    c10::DeviceIndex device = 0;
    AT_CUDA_CHECK(c10::cuda::GetDevice(&device));
-    size_t c_size = ldc * n * sizeof(T);
+    size_t c_size = GetSizeC();
    copy->c = static_cast<T*>(c10::cuda::CUDACachingAllocator::raw_alloc(c_size));
    AT_CUDA_CHECK(c10::cuda::CUDACachingAllocator::memcpyAsync(
        copy->c, device, c, device, c_size, getCurrentCUDAStream(device), true));
    if (duplicate_inputs) {
-      size_t a_size = sizeof(T) * lda * ((transa == 'n' || transa == 'N') ? k : m);
-      size_t b_size = sizeof(T) * ldb * ((transb == 'n' || transb == 'N') ? n : k);
+      size_t a_size = GetSizeA();
+      size_t b_size = GetSizeB();
      copy->a = static_cast<const T*>(c10::cuda::CUDACachingAllocator::raw_alloc(a_size));
      copy->b = static_cast<const T*>(c10::cuda::CUDACachingAllocator::raw_alloc(b_size));
      copy->duplicate_inputs_ = true;
@ -153,11 +165,23 @@ struct GemmStridedBatchedParams : OpParams {
    return c10::str(transa, transb, "_", m, "_", n, "_", k, "_B_", batch);
  }

+  size_t GetSizeA() const {
+    return sizeof(T) * lda * ((transa == 'n' || transa == 'N') ? k : m) * batch;
+  }
+
+  size_t GetSizeB() const {
+    return sizeof(T) * ldb * ((transb == 'n' || transb == 'N') ? n : k) * batch;
+  }
+
+  size_t GetSizeC() const {
+    return sizeof(T) * ldc * n * batch;
+  }
+
  size_t GetSize(bool duplicate_inputs) const {
-    size_t size = sizeof(T) * stride_c * batch;
+    size_t size = GetSizeC();
    if (duplicate_inputs) {
-      size += sizeof(T) * stride_a * batch;
-      size += sizeof(T) * stride_b * batch;
+      size += GetSizeA();
+      size += GetSizeB();
    }
    return size;
  }
@ -167,13 +191,13 @@ struct GemmStridedBatchedParams : OpParams {
    *copy = *this;
    c10::DeviceIndex device = 0;
    AT_CUDA_CHECK(c10::cuda::GetDevice(&device));
-    size_t c_size = batch * stride_c * sizeof(T);
+    size_t c_size = GetSizeC();
    copy->c = static_cast<T*>(c10::cuda::CUDACachingAllocator::raw_alloc(c_size));
    AT_CUDA_CHECK(c10::cuda::CUDACachingAllocator::memcpyAsync(
        copy->c, device, c, device, c_size, getCurrentCUDAStream(device), true));
    if (duplicate_inputs) {
-      size_t a_size = sizeof(T) * stride_a * batch;
-      size_t b_size = sizeof(T) * stride_b * batch;
+      size_t a_size = GetSizeA();
+      size_t b_size = GetSizeB();
      copy->a = static_cast<const T*>(c10::cuda::CUDACachingAllocator::raw_alloc(a_size));
      copy->b = static_cast<const T*>(c10::cuda::CUDACachingAllocator::raw_alloc(b_size));
      copy->duplicate_inputs_ = true;
@ -226,11 +250,23 @@ struct ScaledGemmParams : OpParams {
    return c10::str(transa, transb, "_", m, "_", n, "_", k);
  }

+  size_t GetSizeA() const {
+    return sizeof(T) * lda * ((transa == 'n' || transa == 'N') ? k : m);
+  }
+
+  size_t GetSizeB() const {
+    return sizeof(T) * ldb * ((transb == 'n' || transb == 'N') ? n : k);
+  }
+
+  size_t GetSizeC() const {
+    return sizeof(T) * ldc * n;
+  }
+
  size_t GetSize(bool duplicate_inputs) const {
-    size_t size = sizeof(T) * ldc * n;
+    size_t size = GetSizeC();
    if (duplicate_inputs) {
-      size += sizeof(T) * lda * ((transa == 'n' || transa == 'N') ? k : m);
-      size += sizeof(T) * ldb * ((transb == 'n' || transb == 'N') ? n : k);
+      size += GetSizeA();
+      size += GetSizeB();
    }
    return size;
  }
@ -240,13 +276,13 @@ struct ScaledGemmParams : OpParams {
    *copy = *this;
    c10::DeviceIndex device = 0;
    AT_CUDA_CHECK(c10::cuda::GetDevice(&device));
-    size_t c_size = ldc * n * sizeof(T);
+    size_t c_size = GetSizeC();
    copy->c = c10::cuda::CUDACachingAllocator::raw_alloc(c_size);
    AT_CUDA_CHECK(c10::cuda::CUDACachingAllocator::memcpyAsync(
        copy->c, device, c, device, c_size, getCurrentCUDAStream(device), true));
    if (duplicate_inputs) {
-      size_t a_size = sizeof(T) * lda * ((transa == 'n' || transa == 'N') ? k : m);
-      size_t b_size = sizeof(T) * ldb * ((transb == 'n' || transb == 'N') ? n : k);
+      size_t a_size = GetSizeA();
+      size_t b_size = GetSizeB();
      copy->a = c10::cuda::CUDACachingAllocator::raw_alloc(a_size);
      copy->b = c10::cuda::CUDACachingAllocator::raw_alloc(b_size);
      copy->duplicate_inputs_ = true;
--- a/aten/src/ATen/cuda/tunable/Tunable.cpp
+++ b/aten/src/ATen/cuda/tunable/Tunable.cpp
@ -375,9 +375,9 @@ void TuningContext::EnableNumericsCheck(bool value) {
 }

 bool TuningContext::IsNumericsCheckEnabled() const {
-  static const char *env = getenv("PYTORCH_TUNABLEOP_NUMERICAL_CHECK");
-  if (env != nullptr && strcmp(env, "0") == 0) {
-    return false;
+  const char *env = getenv("PYTORCH_TUNABLEOP_NUMERICAL_CHECK");
+  if (env != nullptr && strcmp(env, "1") == 0) {
+    return true;
  }
  return numerics_check_enable_;
 }
--- a/aten/src/ATen/cuda/tunable/TunableOp.h
+++ b/aten/src/ATen/cuda/tunable/TunableOp.h
@ -124,8 +124,11 @@ class TunableOp {
      std::string id_name = "Default";
      ParamsT* reference_params = nullptr;

+      // numeric check option is controlled by non-static env var, so check it once per tuned operator
+      bool do_numerics_check = ctx->IsNumericsCheckEnabled();
+
      // calcaulte a reference answer for numerical check
-      if (ctx->IsNumericsCheckEnabled()) {
+      if (do_numerics_check) {
        reference_params = params->DeepCopy(false);
        TORCH_CHECK(ops_[ResultEntry::Default()]->Call(reference_params) == OK);
      }
@ -156,10 +159,11 @@ class TunableOp {
      for (size_t i = 0; i < op_names_.size(); i++) {
        auto* candidate = ops_[op_names_[i]].get(); // borrow pointer

-        if (ctx->IsNumericsCheckEnabled()) {
+        if (do_numerics_check) {
          ParamsT* numerical_params = params->DeepCopy(false);
          auto status = candidate->Call(numerical_params);
          if (status != OK) {
+            numerical_params->Delete();
            TUNABLE_LOG3("├──unsupported id=", i, ", ", op_sig, '(', params_sig, ") ", op_names_[i]);
            continue;
          }
--- a/aten/src/ATen/detail/CUDAHooksInterface.h
+++ b/aten/src/ATen/detail/CUDAHooksInterface.h
@ -186,6 +186,12 @@ struct TORCH_API CUDAHooksInterface : AcceleratorHooksInterface {
    return 0;
  }

+#ifdef USE_ROCM
+  virtual bool isGPUArch(DeviceIndex /*device_index*/, const std::vector<std::string>& /*archs*/) const {
+    TORCH_CHECK(false, "Cannot check GPU arch without ATen_cuda library. ", CUDA_HELP);
+  }
+#endif
+
  virtual void deviceSynchronize(DeviceIndex /*device_index*/) const {
    TORCH_CHECK(false, "Cannot synchronize CUDA device without ATen_cuda library. ", CUDA_HELP);
  }
--- a/aten/src/ATen/native/cpu/FlashAttentionKernel.cpp
+++ b/aten/src/ATen/native/cpu/FlashAttentionKernel.cpp
@ -351,23 +351,29 @@ void cpu_flash_attention(
                tmp_max);
          }
          tmp_max = qk_max_data[row] > tmp_max ? qk_max_data[row] : tmp_max;
-          // qk <- exp(qk - max) and sum per row
-          tmp_sum = tmp_max;
-          _exp_reduce_sum_fusion_kernel(
-              qk_data + row * kvBlockSize, kvBlockSize,
-              conditional_data_ptr(qk_data, qk_reduced_data) + row * kvBlockSize,
-              tmp_sum);
-          // exp_tmp <- exp(max[row] - max)
-          exp_tmp = std::exp(qk_max_data[row] - tmp_max);
-          // sum[row] <- sum + exp_tmp * sum[row]
-          qk_sum_data[row] = tmp_sum + exp_tmp * qk_sum_data[row];
-          // max[row] <- max
-          qk_max_data[row] = tmp_max;
-          // dst <- dst * exp_tmp
-          if (n > 0) {
-            vec::map<accum_t>(
-              [exp_tmp](Vec x) { return x * Vec(exp_tmp); },
-              dst_data + row * headSize, dst_data + row * headSize, headSize);
+          if (tmp_max == -std::numeric_limits<accum_t>::infinity()) {
+            // to avoid `nan = exp2f(-inf - (-inf))`
+            fill_stub(conditional_data_ptr(qk_data, qk_reduced_data) + row * kvBlockSize,
+              static_cast<scalar_t>(0), kvBlockSize);
+          } else {
+            tmp_sum = tmp_max;
+            // qk <- exp(qk - max) and sum per row
+            _exp_reduce_sum_fusion_kernel(
+                qk_data + row * kvBlockSize, kvBlockSize,
+                conditional_data_ptr(qk_data, qk_reduced_data) + row * kvBlockSize,
+                tmp_sum);
+            // exp_tmp <- exp(max[row] - max)
+            exp_tmp = std::exp(qk_max_data[row] - tmp_max);
+            // sum[row] <- sum + exp_tmp * sum[row]
+            qk_sum_data[row] = tmp_sum + exp_tmp * qk_sum_data[row];
+            // max[row] <- max
+            qk_max_data[row] = tmp_max;
+            // dst <- dst * exp_tmp
+            if (n > 0) {
+              vec::map<accum_t>(
+                [exp_tmp](Vec x) { return x * Vec(exp_tmp); },
+                dst_data + row * headSize, dst_data + row * headSize, headSize);
+            }
          }
        }
        // Calculate Softmax(q @ k.T) @ v
--- a/aten/src/ATen/native/cuda/Indexing.cu
+++ b/aten/src/ATen/native/cuda/Indexing.cu
@ -664,7 +664,7 @@ REGISTER_CUDA_DISPATCH(index_put_with_sort_quantized_stub, &index_put_with_sort_


 // Check tensor dimensions for index operations, and return the slice size.
-static ptrdiff_t getSliceSize(const Tensor & dst,
+static size_t getSliceSize(const Tensor & dst,
                              int dim,
                              const Tensor & index,
                              const Tensor & src)
@ -674,7 +674,7 @@ static ptrdiff_t getSliceSize(const Tensor & dst,

  TORCH_CHECK(index.dim() <= 1, "Index must be vector or scalar");

-  ptrdiff_t dstSliceSize = 1;
+  size_t dstSliceSize = 1;
  TORCH_CHECK(dim >= 0 && dim < dstDims, "Indexing dim ", dim, " is out of bounds");
  for (const auto d: c10::irange(dstDims)) {
    if (d != dim) {
@ -686,7 +686,7 @@ static ptrdiff_t getSliceSize(const Tensor & dst,
  TORCH_CHECK(index.numel() == src.size(dim),
             "length of src.size[dim] is not equal to length of indices");

-  ptrdiff_t srcSliceSize = 1;
+  size_t srcSliceSize = 1;
  bool mismatch = false;

  if (dstDims != srcDims) mismatch = true;
@ -876,11 +876,11 @@ void index_add_cuda_impl(const Tensor& self, int64_t dim, const Tensor& index, c
  // total size of the tensor ignoring dimension `dim`;
  // -the number of index we are choosing, which is the total size
  // of the tensor `index`.
-  const ptrdiff_t sliceSize = getSliceSize(self_, dim, index, source_);
-  const ptrdiff_t sourceTotalSize = source.numel();
-  const int64_t selfAddDimSize = self_.size(dim);
-  const ptrdiff_t numIndex = index.numel();
-  const int64_t selfNumel = self_.numel();
+  const uint64_t sliceSize = getSliceSize(self_, dim, index, source_);
+  const uint64_t sourceTotalSize = source.numel();
+  const uint64_t selfAddDimSize = self_.size(dim);
+  const uint64_t numIndex = index.numel();
+  const uint64_t selfNumel = self_.numel();

  if (sliceSize == 0) {
    return;
@ -909,11 +909,11 @@ void index_add_cuda_impl(const Tensor& self, int64_t dim, const Tensor& index, c
      selfAddDimSize, selfNumel, reduce_add, alpha_value);                  \
  C10_CUDA_KERNEL_LAUNCH_CHECK();

-  const dim3 smallIndexGrid(std::min(ceil_div(sliceSize, (ptrdiff_t)128), (ptrdiff_t)(mpc * 8)));
-  const dim3 smallIndexBlock(std::min(sliceSize, (ptrdiff_t)128));
+  const dim3 smallIndexGrid(std::min(ceil_div(sliceSize, (uint64_t)128), (uint64_t)(mpc * 8)));
+  const dim3 smallIndexBlock(std::min(sliceSize, (uint64_t)128));

-  const dim3 largeIndexGrid(std::min(ceil_div(sourceTotalSize, (ptrdiff_t)128), (ptrdiff_t)(mpc * 8)));
-  const dim3 largeIndexBlock(std::min(sourceTotalSize, (ptrdiff_t)128));
+  const dim3 largeIndexGrid(std::min(ceil_div(sourceTotalSize, (uint64_t)128), (uint64_t)(mpc * 8)));
+  const dim3 largeIndexBlock(std::min(sourceTotalSize, (uint64_t)128));

  if (cuda::detail::canUse32BitIndexMath(result) &&
      cuda::detail::canUse32BitIndexMath(source) &&
@ -1049,11 +1049,11 @@ void index_reduce_func_cuda_impl(
  // total size of the tensor ignoring dimension `dim`;
  // -the number of index we are choosing, which is the total size
  // of the tensor `index`.
-  ptrdiff_t sliceSize = getSliceSize(self_, dim, index, source_);
-  ptrdiff_t sourceTotalSize = source.numel();
-  int64_t selfReduceDimSize = self_.size(dim);
-  ptrdiff_t numIndex = index.numel();
-  int64_t selfNumel = self_.numel();
+  uint64_t sliceSize = getSliceSize(self_, dim, index, source_);
+  uint64_t sourceTotalSize = source.numel();
+  uint64_t selfReduceDimSize = self_.size(dim);
+  uint64_t numIndex = index.numel();
+  uint64_t selfNumel = self_.numel();

  if (sliceSize == 0) {
    return;
@ -1082,11 +1082,11 @@ void index_reduce_func_cuda_impl(
      selfReduceDimSize, selfNumel, reduce_func, alpha_value);                           \
  C10_CUDA_KERNEL_LAUNCH_CHECK();

-  dim3 smallIndexGrid(std::min(ceil_div(sliceSize, (ptrdiff_t)128), (ptrdiff_t)(mpc * 8)));
-  dim3 smallIndexBlock(std::min(sliceSize, (ptrdiff_t)128));
+  dim3 smallIndexGrid(std::min(ceil_div(sliceSize, (uint64_t)128), (uint64_t)(mpc * 8)));
+  dim3 smallIndexBlock(std::min(sliceSize, (uint64_t)128));

-  dim3 largeIndexGrid(std::min(ceil_div(sourceTotalSize, (ptrdiff_t)128), (ptrdiff_t)(mpc * 8)));
-  dim3 largeIndexBlock(std::min(sourceTotalSize, (ptrdiff_t)128));
+  dim3 largeIndexGrid(std::min(ceil_div(sourceTotalSize, (uint64_t)128), (uint64_t)(mpc * 8)));
+  dim3 largeIndexBlock(std::min(sourceTotalSize, (uint64_t)128));

  if (cuda::detail::canUse32BitIndexMath(result) &&
      cuda::detail::canUse32BitIndexMath(source) &&
@ -1318,8 +1318,8 @@ void index_select_out_cuda_impl(
    const Tensor& self,
    long dim,
    const Tensor& index) {
-  ptrdiff_t numIndices = index.numel();
-  int selfDims = self.dim() == 0 ? 1 : self.dim();
+  uint64_t numIndices = index.numel();
+  uint64_t selfDims = self.dim() == 0 ? 1 : self.dim();

  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();

@ -1340,7 +1340,7 @@ void index_select_out_cuda_impl(
    at::native::resize_output(out, newSize);
  }

-  ptrdiff_t outTotalSize = out.numel();
+  uint64_t outTotalSize = out.numel();
  if (outTotalSize == 0) {
    return;
  }
@ -1352,8 +1352,8 @@ void index_select_out_cuda_impl(
  // total size of the tensor ignoring dimension `dim`;
  // -the number of indices we are choosing, which is the total size
  // of the tensor `indices`.
-  int64_t selfSelectDimSize = self.dim() == 0 ? 1 : self.size(dim);
-  ptrdiff_t sliceSize = outTotalSize / numIndices;
+  uint64_t selfSelectDimSize = self.dim() == 0 ? 1 : self.size(dim);
+  uint64_t sliceSize = outTotalSize / numIndices;

  int mpc = at::cuda::getCurrentDeviceProperties()->multiProcessorCount;

@ -1376,11 +1376,14 @@ void index_select_out_cuda_impl(
      selfSelectDimSize);                                                      \
  C10_CUDA_KERNEL_LAUNCH_CHECK();

-  dim3 smallIndexGrid(std::min(ceil_div(sliceSize, (ptrdiff_t)128), (ptrdiff_t)(mpc * 8)));
-  dim3 smallIndexBlock(std::min(sliceSize, (ptrdiff_t)128));
+  dim3 smallIndexGrid(std::min(ceil_div(sliceSize, (uint64_t)128), (uint64_t) (mpc * 8)));
+  dim3 smallIndexBlock(std::min(sliceSize, (uint64_t)128));

-  dim3 largeIndexGrid(std::min(ceil_div(outTotalSize, (ptrdiff_t)128), (ptrdiff_t)(mpc * 8)));
-  dim3 largeIndexBlock(std::min(outTotalSize, (ptrdiff_t)128));
+  dim3 largeIndexGrid(std::min(ceil_div(outTotalSize, (uint64_t)128), (uint64_t) (mpc * 8)));
+  // for issue https://github.com/pytorch/pytorch/issues/130806 there are two problems
+  // 1: ptrdiff_t was used but it is signed int,  outTotalSize of 2147483648 can cause overflow
+  // 2: On ROCm, std::min -> ::min did not work as expected on when outTotalSize>=2147483648
+  dim3 largeIndexBlock( (outTotalSize < 128) ? outTotalSize : 128 );
  if (cuda::detail::canUse32BitIndexMath(out) &&
      cuda::detail::canUse32BitIndexMath(self) &&
      cuda::detail::canUse32BitIndexMath(index)) {
--- a/Show More
+++ b/Show More