Fix invalid read in masked softmax (#82272 ) (#82272 ) (#82405 )

Summary: PEr title, unfortunately testing invalid reads with caching allocator is hard. Pull Request resolved: https://github.com/pytorch/pytorch/pull/82272 Approved by: https://github.com/cpuhrsch Test Plan: contbuild & OSS CI, see 24d702d38e Original Phabricator Test Plan: Imported from GitHub, without a `Test Plan:` line. Reviewed By: ajtulloch, osalpekar, cpuhrsch Differential Revision: D38183160 Pulled By: ngimel fbshipit-source-id: 0ea59868d4829bc540c1277a93daa029519d05b4 Co-authored-by: Natalia Gimelshein (Meta Employee) <ngimel@fb.com>
[ci] Release only change: bump macos worker instance type (#82113 )
2025-10-22 22:25:10 +08:00 · 2022-07-28 13:08:39 -04:00 · 2022-07-25 18:22:51 +01:00 · 2022-07-25 10:36:39 +01:00 · 2022-07-25 10:35:59 +01:00 · 2022-07-25 08:54:24 +01:00
194 changed files with 7985 additions and 2190 deletions
--- a/.circleci/scripts/binary_checkout.sh
+++ b/.circleci/scripts/binary_checkout.sh
@ -62,7 +62,7 @@ git --no-pager log --max-count 1
 popd

 # Clone the Builder master repo
-retry git clone -q https://github.com/pytorch/builder.git "$BUILDER_ROOT"
+retry git clone -q https://github.com/pytorch/builder.git -b release/1.12 "$BUILDER_ROOT"
 pushd "$BUILDER_ROOT"
 echo "Using builder from "
 git --no-pager log --max-count 1
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@ -12,4 +12,6 @@ self-hosted-runner:
    - windows.8xlarge.nvidia.gpu
    - bm-runner
    - linux.rocm.gpu
+    - macos-12-xl
    - macos-12
+    - macos12.3-m1
--- a/.github/templates/common.yml.j2
+++ b/.github/templates/common.yml.j2
@ -8,7 +8,7 @@

 # NOTE: If testing pytorch/builder changes you can change this variable to change what pytorch/builder reference
 #       the binary builds will check out
-{%- set builder_branch = "main" -%}
+{%- set builder_branch = "release/1.12" -%}

 {%- macro concurrency(build_environment) -%}
 concurrency:
--- a/.github/templates/linux_binary_build_workflow.yml.j2
+++ b/.github/templates/linux_binary_build_workflow.yml.j2
@ -42,7 +42,6 @@ env:
  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
  PR_NUMBER: ${{ github.event.pull_request.number }}
  PYTORCH_FINAL_PACKAGE_DIR: /artifacts
-  PYTORCH_RETRY_TEST_CASES: 1
  PYTORCH_ROOT: /pytorch
  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
  SKIP_ALL_TESTS: 1
@ -57,8 +56,8 @@ jobs:
    !{{ upload.binary_env(config) }}
    steps:
      !{{ common.setup_ec2_linux() }}
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
-      !{{ common.checkout(deep_clone=False, directory="builder", repository="pytorch/builder", branch=common.builder_branch) }}
+      !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
+      !{{ common.checkout(deep_clone=False, directory="builder", repository="pytorch/builder", branch=common.builder_branch, checkout_pr_head=False) }}
 {%- if config["gpu_arch_type"] == 'cuda' and config["gpu_arch_version"].startswith('11') %}
      - name: Set BUILD_SPLIT_CUDA
        run: |
@ -130,8 +129,8 @@ jobs:
        with:
          name: !{{ config["build_name"] }}
          path: "${{ runner.temp }}/artifacts/"
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
-      !{{ common.checkout(deep_clone=False, directory="builder", repository="pytorch/builder", branch=common.builder_branch) }}
+      !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
+      !{{ common.checkout(deep_clone=False, directory="builder", repository="pytorch/builder", branch=common.builder_branch, checkout_pr_head=False) }}
 {%- if config["gpu_arch_type"] == "rocm" %}
      - name: ROCm set GPU_FLAG
        run: |
--- a/.github/templates/macos_binary_build_workflow.yml.j2
+++ b/.github/templates/macos_binary_build_workflow.yml.j2
@ -64,7 +64,7 @@ jobs:
    {%- if config["package_type"] == "libtorch" %}
    runs-on: macos-10.15
    {%- else %}
-    runs-on: macos-12
+    runs-on: macos-12-xl
    {%- endif %}
 {%- if config["package_type"] == "libtorch" %}
    # libtorch builds take a long time on github hosted runners
@ -85,8 +85,8 @@ jobs:
          chmod +x "${RUNNER_TEMP}/conda.sh"
          /bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda"
          echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}"
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
-      !{{ common.checkout(deep_clone=False, directory="builder", repository="pytorch/builder", branch=common.builder_branch) }}
+      !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
+      !{{ common.checkout(deep_clone=False, directory="builder", repository="pytorch/builder", branch=common.builder_branch, checkout_pr_head=False) }}
      - name: Install sccache (only for non-forked PRs, and pushes to trunk)
        if: ${{ github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }}
        run: |
--- a/.github/templates/windows_binary_build_workflow.yml.j2
+++ b/.github/templates/windows_binary_build_workflow.yml.j2
@ -51,7 +51,6 @@ env:
  IS_GHA: 1
  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
  PR_NUMBER: ${{ github.event.pull_request.number }}
-  PYTORCH_RETRY_TEST_CASES: 1
  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
  SKIP_ALL_TESTS: 1
 !{{ common.concurrency(build_environment) }}
@ -66,8 +65,8 @@ jobs:
    steps:
      !{{ common.setup_ec2_windows() }}
      !{{ set_runner_specific_vars() }}
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
-      !{{ common.checkout(deep_clone=False, directory="builder", repository="pytorch/builder", branch=common.builder_branch) }}
+      !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
+      !{{ common.checkout(deep_clone=False, directory="builder", repository="pytorch/builder", branch=common.builder_branch, checkout_pr_head=False) }}
      - name: Populate binary env
        shell: bash
        run: |
@ -102,8 +101,8 @@ jobs:
        with:
          name: !{{ config["build_name"] }}
          path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
-      !{{ common.checkout(deep_clone=False, directory="builder", repository="pytorch/builder", branch=common.builder_branch) }}
+      !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
+      !{{ common.checkout(deep_clone=False, directory="builder", repository="pytorch/builder", branch=common.builder_branch, checkout_pr_head=False) }}
      - name: Populate binary env
        shell: bash
        run: |
--- a/.github/workflows/_bazel-build-test.yml
+++ b/.github/workflows/_bazel-build-test.yml
@ -106,6 +106,7 @@ jobs:
          CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
          PYTORCH_RETRY_TEST_CASES: 1
+          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
          PR_BODY: ${{ github.event.pull_request.body }}
          SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
          DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
@ -129,6 +130,8 @@ jobs:
            -e MAX_JOBS="$(nproc --ignore=2)" \
            -e SCCACHE_BUCKET \
            -e PR_LABELS \
+            -e PYTORCH_RETRY_TEST_CASES \
+            -e PYTORCH_OVERRIDE_FLAKY_SIGNAL \
            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
            --security-opt seccomp=unconfined \
            --cap-add=SYS_PTRACE \
@ -168,6 +171,8 @@ jobs:
          BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
          JOB_BASE_NAME: ${{ inputs.build-environment }}-test
          PR_NUMBER: ${{ github.event.pull_request.number }}
+          PYTORCH_RETRY_TEST_CASES: 1
+          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
          TAG: ${{ steps.parse-ref.outputs.tag }}
          WORKFLOW_ID: ${{ github.run_id }}
--- a/.github/workflows/_ios-build-test.yml
+++ b/.github/workflows/_ios-build-test.yml
@ -159,7 +159,7 @@ jobs:
          cd "${GITHUB_WORKSPACE}/ios/TestApp/benchmark"
          mkdir -p ../models
          if [ "${USE_COREML_DELEGATE}" == 1 ]; then
-            pip install coremltools==5.0b5
+            pip install coremltools==5.0b5 protobuf==3.20.1
            pip install six==1.16.0
            python coreml_backend.py
          else
--- a/.github/workflows/_linux-test.yml
+++ b/.github/workflows/_linux-test.yml
@ -75,6 +75,7 @@ jobs:
          CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
          PYTORCH_RETRY_TEST_CASES: 1
+          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
          JOB_BASE_NAME: ${{ inputs.build-environment }}-test
          TEST_CONFIG: ${{ matrix.config }}
          SHARD_NUMBER: ${{ matrix.shard }}
@ -123,6 +124,7 @@ jobs:
            -e PR_BODY \
            -e COMMIT_MESSAGES \
            -e PYTORCH_RETRY_TEST_CASES \
+            -e PYTORCH_OVERRIDE_FLAKY_SIGNAL \
            -e PR_LABELS \
            -e MAX_JOBS="$(nproc --ignore=2)" \
            -e SCCACHE_BUCKET \
@ -177,6 +179,8 @@ jobs:
          SHARD_NUMBER: ${{ matrix.shard }}
          BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
          PR_NUMBER: ${{ github.event.pull_request.number }}
+          PYTORCH_RETRY_TEST_CASES: 1
+          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
          TAG: ${{ steps.parse-ref.outputs.tag }}
          WORKFLOW_ID: ${{ github.run_id }}
--- a/.github/workflows/_mac-test-arm64.yml
+++ b/.github/workflows/_mac-test-arm64.yml
@ -0,0 +1,57 @@
+name: mac-test-arm64
+
+on:
+  workflow_call:
+    inputs:
+      build-environment:
+        required: true
+        type: string
+        description: Top-level label for what's being built/tested.
+
+
+jobs:
+  run_mps_test:
+    name: "Run MPS tests"
+    runs-on: macos12.3-m1
+    steps:
+      - name: Checkout PyTorch
+        uses: malfet/checkout@silent-checkout
+        with:
+          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+          quiet-checkout: true
+
+      - name: Clean checkout
+        shell: arch -arch arm64 bash {0}
+        run: |
+          git clean -fxd
+
+      - name: Download build artifacts
+        uses: ./.github/actions/download-build-artifacts
+        with:
+          name: ${{ inputs.build-environment }}
+          use-gha: true
+
+      - name: Install PyTorch
+        env:
+          ENV_NAME: conda-test-env-${{ github.run_id }}
+          PY_VERS: 3.8
+        shell: arch -arch arm64 bash {0}
+        run: |
+          # shellcheck disable=SC1090
+          . ~/miniconda3/etc/profile.d/conda.sh
+          set -ex
+          conda create -yp "${ENV_NAME}" "python=${PY_VERS}" numpy expecttest pyyaml
+          # As wheels are cross-compiled they are reported as x86_64 ones
+          ORIG_WHLNAME=$(ls -1 dist/*.whl); ARM_WHLNAME=${ORIG_WHLNAME/x86_64/arm64}; mv ${ORIG_WHLNAME} ${ARM_WHLNAME}
+          conda run -p "${ENV_NAME}" python3 -mpip install dist/*.whl
+
+      - name: Run MPS tests
+        env:
+          ENV_NAME: conda-test-env-${{ github.run_id }}
+        shell: arch -arch arm64 bash {0}
+        run: |
+          # shellcheck disable=SC1090
+          . ~/miniconda3/etc/profile.d/conda.sh
+          set -ex
+          conda run --cwd test -p "${ENV_NAME}" python3 test_mps.py -v
+          conda env remove -p "${ENV_NAME}"
--- a/.github/workflows/_mac-test.yml
+++ b/.github/workflows/_mac-test.yml
@ -48,6 +48,7 @@ jobs:
      NUM_TEST_SHARDS: ${{ matrix.num_shards }}
      PR_BODY: ${{ github.event.pull_request.body }}
      PYTORCH_RETRY_TEST_CASES: 1
+      PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
    steps:
      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
@ -108,6 +109,8 @@ jobs:
          SHARD_NUMBER: ${{ matrix.shard }}
          BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
          PR_NUMBER: ${{ github.event.pull_request.number }}
+          PYTORCH_RETRY_TEST_CASES: 1
+          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
          TAG: ${{ steps.parse-ref.outputs.tag }}
          WORKFLOW_ID: ${{ github.run_id }}
--- a/.github/workflows/_rocm-test.yml
+++ b/.github/workflows/_rocm-test.yml
@ -74,6 +74,7 @@ jobs:
          CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
          PYTORCH_RETRY_TEST_CASES: 1
+          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
          JOB_BASE_NAME: ${{ inputs.build-environment }}-test
          TEST_CONFIG: ${{ matrix.config }}
          SHARD_NUMBER: ${{ matrix.shard }}
@ -120,6 +121,7 @@ jobs:
            -e PR_BODY \
            -e COMMIT_MESSAGES \
            -e PYTORCH_RETRY_TEST_CASES \
+            -e PYTORCH_OVERRIDE_FLAKY_SIGNAL \
            -e PR_LABELS \
            -e MAX_JOBS="$(nproc --ignore=2)" \
            -e SCCACHE_BUCKET \
@ -173,6 +175,8 @@ jobs:
          SHARD_NUMBER: ${{ matrix.shard }}
          BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
          PR_NUMBER: ${{ github.event.pull_request.number }}
+          PYTORCH_RETRY_TEST_CASES: 1
+          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
          TAG: ${{ steps.parse-ref.outputs.tag }}
          WORKFLOW_ID: ${{ github.run_id }}
--- a/.github/workflows/_win-build.yml
+++ b/.github/workflows/_win-build.yml
@ -61,7 +61,6 @@ jobs:
          MAX_JOBS: 8
          CUDA_VERSION: ${{ inputs.cuda-version }}
          PYTHON_VERSION: "3.8"
-          PYTORCH_RETRY_TEST_CASES: 1
          PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
          SCCACHE_BUCKET: "ossci-compiler-cache"
          VC_PRODUCT: "BuildTools"
--- a/.github/workflows/_win-test.yml
+++ b/.github/workflows/_win-test.yml
@ -65,6 +65,7 @@ jobs:
          INSTALL_WINDOWS_SDK: 1
          PYTHON_VERSION: 3.8
          PYTORCH_RETRY_TEST_CASES: 1
+          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
          PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
          VC_PRODUCT: "BuildTools"
          VC_VERSION: ""
@ -116,6 +117,8 @@ jobs:
          SHARD_NUMBER: ${{ matrix.shard }}
          BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
          PR_NUMBER: ${{ github.event.pull_request.number }}
+          PYTORCH_RETRY_TEST_CASES: 1
+          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
          TAG: ${{ steps.parse-ref.outputs.tag }}
          WORKFLOW_ID: ${{ github.run_id }}
--- a/.github/workflows/generated-linux-binary-conda-nightly.yml
+++ b/.github/workflows/generated-linux-binary-conda-nightly.yml
@ -31,7 +31,6 @@ env:
  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
  PR_NUMBER: ${{ github.event.pull_request.number }}
  PYTORCH_FINAL_PACKAGE_DIR: /artifacts
-  PYTORCH_RETRY_TEST_CASES: 1
  PYTORCH_ROOT: /pytorch
  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
  SKIP_ALL_TESTS: 1
@ -77,7 +76,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -88,7 +86,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -206,7 +204,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -217,7 +214,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -413,7 +410,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -424,7 +420,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -543,7 +539,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -554,7 +549,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -762,7 +757,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -773,7 +767,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -895,7 +889,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -906,7 +899,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1114,7 +1107,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1125,7 +1117,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1247,7 +1239,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1258,7 +1249,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1465,7 +1456,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1476,7 +1466,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1594,7 +1584,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1605,7 +1594,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1801,7 +1790,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1812,7 +1800,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1931,7 +1919,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1942,7 +1929,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2150,7 +2137,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2161,7 +2147,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2283,7 +2269,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2294,7 +2279,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2502,7 +2487,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2513,7 +2497,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2635,7 +2619,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2646,7 +2629,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2853,7 +2836,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2864,7 +2846,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2982,7 +2964,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2993,7 +2974,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3189,7 +3170,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3200,7 +3180,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3319,7 +3299,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3330,7 +3309,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3538,7 +3517,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3549,7 +3527,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3671,7 +3649,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3682,7 +3659,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3890,7 +3867,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3901,7 +3877,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4023,7 +3999,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4034,7 +4009,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4241,7 +4216,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4252,7 +4226,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4370,7 +4344,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4381,7 +4354,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4577,7 +4550,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4588,7 +4560,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4707,7 +4679,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4718,7 +4689,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4926,7 +4897,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4937,7 +4907,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -5059,7 +5029,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -5070,7 +5039,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -5278,7 +5247,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -5289,7 +5257,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -5411,7 +5379,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -5422,7 +5389,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-master.yml
+++ b/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-master.yml
@ -27,7 +27,6 @@ env:
  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
  PR_NUMBER: ${{ github.event.pull_request.number }}
  PYTORCH_FINAL_PACKAGE_DIR: /artifacts
-  PYTORCH_RETRY_TEST_CASES: 1
  PYTORCH_ROOT: /pytorch
  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
  SKIP_ALL_TESTS: 1
@ -74,7 +73,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -85,7 +83,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -204,7 +202,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -215,7 +212,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-nightly.yml
+++ b/.github/workflows/generated-linux-binary-libtorch-cxx11-abi-nightly.yml
@ -31,7 +31,6 @@ env:
  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
  PR_NUMBER: ${{ github.event.pull_request.number }}
  PYTORCH_FINAL_PACKAGE_DIR: /artifacts
-  PYTORCH_RETRY_TEST_CASES: 1
  PYTORCH_ROOT: /pytorch
  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
  SKIP_ALL_TESTS: 1
@ -78,7 +77,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -89,7 +87,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -208,7 +206,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -219,7 +216,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -416,7 +413,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -427,7 +423,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -546,7 +542,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -557,7 +552,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -754,7 +749,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -765,7 +759,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -884,7 +878,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -895,7 +888,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1092,7 +1085,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1103,7 +1095,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1222,7 +1214,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1233,7 +1224,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1431,7 +1422,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1442,7 +1432,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1562,7 +1552,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1573,7 +1562,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1783,7 +1772,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1794,7 +1782,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1914,7 +1902,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1925,7 +1912,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2135,7 +2122,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2146,7 +2132,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2266,7 +2252,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2277,7 +2262,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2487,7 +2472,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2498,7 +2482,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2618,7 +2602,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2629,7 +2612,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2839,7 +2822,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2850,7 +2832,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2973,7 +2955,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2984,7 +2965,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3194,7 +3175,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3205,7 +3185,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3328,7 +3308,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3339,7 +3318,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3549,7 +3528,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3560,7 +3538,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3683,7 +3661,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3694,7 +3671,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3904,7 +3881,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3915,7 +3891,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4038,7 +4014,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4049,7 +4024,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4259,7 +4234,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4270,7 +4244,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4393,7 +4367,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4404,7 +4377,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4614,7 +4587,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4625,7 +4597,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4748,7 +4720,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4759,7 +4730,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4969,7 +4940,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4980,7 +4950,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -5103,7 +5073,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -5114,7 +5083,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -5324,7 +5293,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -5335,7 +5303,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -5458,7 +5426,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -5469,7 +5436,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -5679,7 +5646,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -5690,7 +5656,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -5826,7 +5792,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -5837,7 +5802,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -6029,7 +5994,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -6040,7 +6004,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -6176,7 +6140,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -6187,7 +6150,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -6379,7 +6342,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -6390,7 +6352,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -6526,7 +6488,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -6537,7 +6498,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -6729,7 +6690,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -6740,7 +6700,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -6876,7 +6836,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -6887,7 +6846,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-linux-binary-libtorch-pre-cxx11-master.yml
+++ b/.github/workflows/generated-linux-binary-libtorch-pre-cxx11-master.yml
@ -27,7 +27,6 @@ env:
  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
  PR_NUMBER: ${{ github.event.pull_request.number }}
  PYTORCH_FINAL_PACKAGE_DIR: /artifacts
-  PYTORCH_RETRY_TEST_CASES: 1
  PYTORCH_ROOT: /pytorch
  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
  SKIP_ALL_TESTS: 1
@ -74,7 +73,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -85,7 +83,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -204,7 +202,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -215,7 +212,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-linux-binary-libtorch-pre-cxx11-nightly.yml
+++ b/.github/workflows/generated-linux-binary-libtorch-pre-cxx11-nightly.yml
@ -31,7 +31,6 @@ env:
  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
  PR_NUMBER: ${{ github.event.pull_request.number }}
  PYTORCH_FINAL_PACKAGE_DIR: /artifacts
-  PYTORCH_RETRY_TEST_CASES: 1
  PYTORCH_ROOT: /pytorch
  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
  SKIP_ALL_TESTS: 1
@ -78,7 +77,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -89,7 +87,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -208,7 +206,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -219,7 +216,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -416,7 +413,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -427,7 +423,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -546,7 +542,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -557,7 +552,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -754,7 +749,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -765,7 +759,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -884,7 +878,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -895,7 +888,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1092,7 +1085,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1103,7 +1095,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1222,7 +1214,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1233,7 +1224,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1431,7 +1422,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1442,7 +1432,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1562,7 +1552,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1573,7 +1562,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1783,7 +1772,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1794,7 +1782,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1914,7 +1902,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1925,7 +1912,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2135,7 +2122,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2146,7 +2132,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2266,7 +2252,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2277,7 +2262,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2487,7 +2472,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2498,7 +2482,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2618,7 +2602,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2629,7 +2612,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2839,7 +2822,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2850,7 +2832,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2973,7 +2955,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2984,7 +2965,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3194,7 +3175,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3205,7 +3185,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3328,7 +3308,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3339,7 +3318,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3549,7 +3528,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3560,7 +3538,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3683,7 +3661,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3694,7 +3671,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3904,7 +3881,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3915,7 +3891,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4038,7 +4014,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4049,7 +4024,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4259,7 +4234,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4270,7 +4244,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4393,7 +4367,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4404,7 +4377,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4614,7 +4587,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4625,7 +4597,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4748,7 +4720,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4759,7 +4730,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4969,7 +4940,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4980,7 +4950,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -5103,7 +5073,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -5114,7 +5083,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -5324,7 +5293,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -5335,7 +5303,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -5458,7 +5426,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -5469,7 +5436,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -5679,7 +5646,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -5690,7 +5656,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -5826,7 +5792,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -5837,7 +5802,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -6029,7 +5994,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -6040,7 +6004,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -6176,7 +6140,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -6187,7 +6150,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -6379,7 +6342,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -6390,7 +6352,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -6526,7 +6488,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -6537,7 +6498,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -6729,7 +6690,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -6740,7 +6700,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -6876,7 +6836,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -6887,7 +6846,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-linux-binary-manywheel-master.yml
+++ b/.github/workflows/generated-linux-binary-manywheel-master.yml
@ -27,7 +27,6 @@ env:
  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
  PR_NUMBER: ${{ github.event.pull_request.number }}
  PYTORCH_FINAL_PACKAGE_DIR: /artifacts
-  PYTORCH_RETRY_TEST_CASES: 1
  PYTORCH_ROOT: /pytorch
  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
  SKIP_ALL_TESTS: 1
@ -74,7 +73,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -85,7 +83,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -204,7 +202,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -215,7 +212,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-linux-binary-manywheel-nightly.yml
+++ b/.github/workflows/generated-linux-binary-manywheel-nightly.yml
@ -31,7 +31,6 @@ env:
  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
  PR_NUMBER: ${{ github.event.pull_request.number }}
  PYTORCH_FINAL_PACKAGE_DIR: /artifacts
-  PYTORCH_RETRY_TEST_CASES: 1
  PYTORCH_ROOT: /pytorch
  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
  SKIP_ALL_TESTS: 1
@ -77,7 +76,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -88,7 +86,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -206,7 +204,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -217,7 +214,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -413,7 +410,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -424,7 +420,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -543,7 +539,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -554,7 +549,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -762,7 +757,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -773,7 +767,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -895,7 +889,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -906,7 +899,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1114,7 +1107,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1125,7 +1117,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1247,7 +1239,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1258,7 +1249,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1466,7 +1457,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1477,7 +1467,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1612,7 +1602,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1623,7 +1612,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1813,7 +1802,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1824,7 +1812,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1959,7 +1947,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1970,7 +1957,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2159,7 +2146,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2170,7 +2156,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2288,7 +2274,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2299,7 +2284,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2495,7 +2480,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2506,7 +2490,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2625,7 +2609,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2636,7 +2619,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2844,7 +2827,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2855,7 +2837,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2977,7 +2959,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2988,7 +2969,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3196,7 +3177,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3207,7 +3187,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3329,7 +3309,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3340,7 +3319,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3548,7 +3527,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3559,7 +3537,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3694,7 +3672,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3705,7 +3682,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3895,7 +3872,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3906,7 +3882,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4041,7 +4017,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4052,7 +4027,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4241,7 +4216,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4252,7 +4226,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4370,7 +4344,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4381,7 +4354,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4577,7 +4550,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4588,7 +4560,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4707,7 +4679,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4718,7 +4689,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -4926,7 +4897,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -4937,7 +4907,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -5059,7 +5029,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -5070,7 +5039,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -5278,7 +5247,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -5289,7 +5257,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -5411,7 +5379,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -5422,7 +5389,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -5630,7 +5597,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -5641,7 +5607,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -5776,7 +5742,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -5787,7 +5752,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -5977,7 +5942,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -5988,7 +5952,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -6123,7 +6087,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -6134,7 +6097,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -6323,7 +6286,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -6334,7 +6296,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -6452,7 +6414,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -6463,7 +6424,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -6659,7 +6620,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -6670,7 +6630,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -6789,7 +6749,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -6800,7 +6759,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -7008,7 +6967,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -7019,7 +6977,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -7141,7 +7099,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -7152,7 +7109,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -7360,7 +7317,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -7371,7 +7327,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -7493,7 +7449,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -7504,7 +7459,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -7712,7 +7667,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -7723,7 +7677,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -7858,7 +7812,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -7869,7 +7822,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -8059,7 +8012,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -8070,7 +8022,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -8205,7 +8157,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -8216,7 +8167,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-macos-arm64-binary-conda-nightly.yml
+++ b/.github/workflows/generated-macos-arm64-binary-conda-nightly.yml
@ -39,7 +39,7 @@ concurrency:
 jobs:
  conda-py3_8-cpu-build:
    if: ${{ github.repository_owner == 'pytorch' }}
-    runs-on: macos-12
+    runs-on: macos-12-xl
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -77,7 +77,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -88,7 +87,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -214,7 +213,7 @@ jobs:
          docker system prune -af
  conda-py3_9-cpu-build:
    if: ${{ github.repository_owner == 'pytorch' }}
-    runs-on: macos-12
+    runs-on: macos-12-xl
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -252,7 +251,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -263,7 +261,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -389,7 +387,7 @@ jobs:
          docker system prune -af
  conda-py3_10-cpu-build:
    if: ${{ github.repository_owner == 'pytorch' }}
-    runs-on: macos-12
+    runs-on: macos-12-xl
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -427,7 +425,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -438,7 +435,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml
+++ b/.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml
@ -39,7 +39,7 @@ concurrency:
 jobs:
  wheel-py3_7-cpu-build:
    if: ${{ github.repository_owner == 'pytorch' }}
-    runs-on: macos-12
+    runs-on: macos-12-xl
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -77,7 +77,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -88,7 +87,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -214,7 +213,7 @@ jobs:
          docker system prune -af
  wheel-py3_8-cpu-build:
    if: ${{ github.repository_owner == 'pytorch' }}
-    runs-on: macos-12
+    runs-on: macos-12-xl
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -252,7 +251,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -263,7 +261,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -389,7 +387,7 @@ jobs:
          docker system prune -af
  wheel-py3_9-cpu-build:
    if: ${{ github.repository_owner == 'pytorch' }}
-    runs-on: macos-12
+    runs-on: macos-12-xl
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -427,7 +425,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -438,7 +435,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -564,7 +561,7 @@ jobs:
          docker system prune -af
  wheel-py3_10-cpu-build:
    if: ${{ github.repository_owner == 'pytorch' }}
-    runs-on: macos-12
+    runs-on: macos-12-xl
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -602,7 +599,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -613,7 +609,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-macos-binary-conda-nightly.yml
+++ b/.github/workflows/generated-macos-binary-conda-nightly.yml
@ -37,7 +37,7 @@ concurrency:
 jobs:
  conda-py3_7-cpu-build:
    if: ${{ github.repository_owner == 'pytorch' }}
-    runs-on: macos-12
+    runs-on: macos-12-xl
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -75,7 +75,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -86,7 +85,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -212,7 +211,7 @@ jobs:
          docker system prune -af
  conda-py3_8-cpu-build:
    if: ${{ github.repository_owner == 'pytorch' }}
-    runs-on: macos-12
+    runs-on: macos-12-xl
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -250,7 +249,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -261,7 +259,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -387,7 +385,7 @@ jobs:
          docker system prune -af
  conda-py3_9-cpu-build:
    if: ${{ github.repository_owner == 'pytorch' }}
-    runs-on: macos-12
+    runs-on: macos-12-xl
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -425,7 +423,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -436,7 +433,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -562,7 +559,7 @@ jobs:
          docker system prune -af
  conda-py3_10-cpu-build:
    if: ${{ github.repository_owner == 'pytorch' }}
-    runs-on: macos-12
+    runs-on: macos-12-xl
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -600,7 +597,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -611,7 +607,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-macos-binary-libtorch-cxx11-abi-nightly.yml
+++ b/.github/workflows/generated-macos-binary-libtorch-cxx11-abi-nightly.yml
@ -80,7 +80,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -91,7 +90,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -261,7 +260,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -272,7 +270,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -442,7 +440,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -453,7 +450,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -623,7 +620,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -634,7 +630,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-macos-binary-libtorch-pre-cxx11-nightly.yml
+++ b/.github/workflows/generated-macos-binary-libtorch-pre-cxx11-nightly.yml
@ -80,7 +80,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -91,7 +90,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -261,7 +260,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -272,7 +270,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -442,7 +440,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -453,7 +450,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -623,7 +620,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -634,7 +630,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-macos-binary-wheel-nightly.yml
+++ b/.github/workflows/generated-macos-binary-wheel-nightly.yml
@ -37,7 +37,7 @@ concurrency:
 jobs:
  wheel-py3_7-cpu-build:
    if: ${{ github.repository_owner == 'pytorch' }}
-    runs-on: macos-12
+    runs-on: macos-12-xl
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -75,7 +75,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -86,7 +85,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -212,7 +211,7 @@ jobs:
          docker system prune -af
  wheel-py3_8-cpu-build:
    if: ${{ github.repository_owner == 'pytorch' }}
-    runs-on: macos-12
+    runs-on: macos-12-xl
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -250,7 +249,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -261,7 +259,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -387,7 +385,7 @@ jobs:
          docker system prune -af
  wheel-py3_9-cpu-build:
    if: ${{ github.repository_owner == 'pytorch' }}
-    runs-on: macos-12
+    runs-on: macos-12-xl
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -425,7 +423,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -436,7 +433,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -562,7 +559,7 @@ jobs:
          docker system prune -af
  wheel-py3_10-cpu-build:
    if: ${{ github.repository_owner == 'pytorch' }}
-    runs-on: macos-12
+    runs-on: macos-12-xl
    timeout-minutes: 240
    env:
      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
@ -600,7 +597,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -611,7 +607,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-windows-binary-conda-nightly.yml
+++ b/.github/workflows/generated-windows-binary-conda-nightly.yml
@ -28,7 +28,6 @@ env:
  IS_GHA: 1
  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
  PR_NUMBER: ${{ github.event.pull_request.number }}
-  PYTORCH_RETRY_TEST_CASES: 1
  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
  SKIP_ALL_TESTS: 1
 concurrency:
@ -91,7 +90,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -102,7 +100,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -200,7 +198,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -211,7 +208,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -390,7 +387,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -401,7 +397,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -500,7 +496,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -511,7 +506,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -691,7 +686,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -702,7 +696,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -801,7 +795,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -812,7 +805,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -991,7 +984,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1002,7 +994,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1100,7 +1092,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1111,7 +1102,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1290,7 +1281,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1301,7 +1291,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1400,7 +1390,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1411,7 +1400,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1591,7 +1580,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1602,7 +1590,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1701,7 +1689,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1712,7 +1699,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1891,7 +1878,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1902,7 +1888,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2000,7 +1986,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2011,7 +1996,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2190,7 +2175,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2201,7 +2185,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2300,7 +2284,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2311,7 +2294,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2491,7 +2474,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2502,7 +2484,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2601,7 +2583,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2612,7 +2593,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2791,7 +2772,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2802,7 +2782,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2900,7 +2880,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2911,7 +2890,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3090,7 +3069,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3101,7 +3079,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3200,7 +3178,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3211,7 +3188,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3391,7 +3368,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3402,7 +3378,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3501,7 +3477,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3512,7 +3487,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-windows-binary-libtorch-debug-master.yml
+++ b/.github/workflows/generated-windows-binary-libtorch-debug-master.yml
@ -24,7 +24,6 @@ env:
  IS_GHA: 1
  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
  PR_NUMBER: ${{ github.event.pull_request.number }}
-  PYTORCH_RETRY_TEST_CASES: 1
  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
  SKIP_ALL_TESTS: 1
 concurrency:
@ -91,7 +90,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -102,7 +100,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -204,7 +202,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -215,7 +212,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml
+++ b/.github/workflows/generated-windows-binary-libtorch-debug-nightly.yml
@ -28,7 +28,6 @@ env:
  IS_GHA: 1
  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
  PR_NUMBER: ${{ github.event.pull_request.number }}
-  PYTORCH_RETRY_TEST_CASES: 1
  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
  SKIP_ALL_TESTS: 1
 concurrency:
@ -95,7 +94,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -106,7 +104,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -208,7 +206,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -219,7 +216,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -405,7 +402,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -416,7 +412,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -518,7 +514,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -529,7 +524,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -715,7 +710,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -726,7 +720,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -828,7 +822,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -839,7 +832,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1025,7 +1018,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1036,7 +1028,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1138,7 +1130,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1149,7 +1140,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1336,7 +1327,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1347,7 +1337,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1450,7 +1440,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1461,7 +1450,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1649,7 +1638,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1660,7 +1648,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1763,7 +1751,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1774,7 +1761,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1962,7 +1949,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1973,7 +1959,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2076,7 +2062,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2087,7 +2072,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2275,7 +2260,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2286,7 +2270,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2389,7 +2373,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2400,7 +2383,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2588,7 +2571,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2599,7 +2581,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2702,7 +2684,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2713,7 +2694,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2901,7 +2882,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2912,7 +2892,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3015,7 +2995,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3026,7 +3005,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3214,7 +3193,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3225,7 +3203,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3328,7 +3306,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3339,7 +3316,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3527,7 +3504,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3538,7 +3514,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3641,7 +3617,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3652,7 +3627,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-windows-binary-libtorch-release-master.yml
+++ b/.github/workflows/generated-windows-binary-libtorch-release-master.yml
@ -24,7 +24,6 @@ env:
  IS_GHA: 1
  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
  PR_NUMBER: ${{ github.event.pull_request.number }}
-  PYTORCH_RETRY_TEST_CASES: 1
  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
  SKIP_ALL_TESTS: 1
 concurrency:
@ -91,7 +90,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -102,7 +100,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -204,7 +202,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -215,7 +212,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-windows-binary-libtorch-release-nightly.yml
+++ b/.github/workflows/generated-windows-binary-libtorch-release-nightly.yml
@ -28,7 +28,6 @@ env:
  IS_GHA: 1
  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
  PR_NUMBER: ${{ github.event.pull_request.number }}
-  PYTORCH_RETRY_TEST_CASES: 1
  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
  SKIP_ALL_TESTS: 1
 concurrency:
@ -95,7 +94,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -106,7 +104,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -208,7 +206,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -219,7 +216,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -405,7 +402,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -416,7 +412,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -518,7 +514,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -529,7 +524,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -715,7 +710,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -726,7 +720,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -828,7 +822,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -839,7 +832,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1025,7 +1018,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1036,7 +1028,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1138,7 +1130,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1149,7 +1140,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1336,7 +1327,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1347,7 +1337,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1450,7 +1440,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1461,7 +1450,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1649,7 +1638,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1660,7 +1648,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1763,7 +1751,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1774,7 +1761,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1962,7 +1949,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1973,7 +1959,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2076,7 +2062,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2087,7 +2072,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2275,7 +2260,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2286,7 +2270,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2389,7 +2373,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2400,7 +2383,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2588,7 +2571,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2599,7 +2581,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2702,7 +2684,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2713,7 +2694,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2901,7 +2882,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2912,7 +2892,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3015,7 +2995,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3026,7 +3005,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3214,7 +3193,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3225,7 +3203,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3328,7 +3306,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3339,7 +3316,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3527,7 +3504,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3538,7 +3514,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3641,7 +3617,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3652,7 +3627,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-windows-binary-wheel-master.yml
+++ b/.github/workflows/generated-windows-binary-wheel-master.yml
@ -24,7 +24,6 @@ env:
  IS_GHA: 1
  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
  PR_NUMBER: ${{ github.event.pull_request.number }}
-  PYTORCH_RETRY_TEST_CASES: 1
  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
  SKIP_ALL_TESTS: 1
 concurrency:
@ -88,7 +87,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -99,7 +97,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -198,7 +196,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -209,7 +206,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/generated-windows-binary-wheel-nightly.yml
+++ b/.github/workflows/generated-windows-binary-wheel-nightly.yml
@ -28,7 +28,6 @@ env:
  IS_GHA: 1
  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
  PR_NUMBER: ${{ github.event.pull_request.number }}
-  PYTORCH_RETRY_TEST_CASES: 1
  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
  SKIP_ALL_TESTS: 1
 concurrency:
@ -91,7 +90,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -102,7 +100,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -200,7 +198,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -211,7 +208,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -390,7 +387,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -401,7 +397,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -500,7 +496,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -511,7 +506,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -691,7 +686,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -702,7 +696,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -801,7 +795,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -812,7 +805,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -991,7 +984,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1002,7 +994,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1100,7 +1092,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1111,7 +1102,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1290,7 +1281,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1301,7 +1291,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1400,7 +1390,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1411,7 +1400,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1591,7 +1580,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1602,7 +1590,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1701,7 +1689,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1712,7 +1699,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -1891,7 +1878,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -1902,7 +1888,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2000,7 +1986,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2011,7 +1996,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2190,7 +2175,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2201,7 +2185,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2300,7 +2284,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2311,7 +2294,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2491,7 +2474,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2502,7 +2484,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2601,7 +2583,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2612,7 +2593,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2791,7 +2772,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2802,7 +2782,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -2900,7 +2880,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -2911,7 +2890,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3090,7 +3069,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3101,7 +3079,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3200,7 +3178,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3211,7 +3188,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3391,7 +3368,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3402,7 +3378,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -3501,7 +3477,6 @@ jobs:
      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
      - name: Clean PyTorch checkout
@ -3512,7 +3487,7 @@ jobs:
      - name: Checkout pytorch/builder
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-          ref: main
+          ref: release/1.12
          submodules: recursive
          repository: pytorch/builder
          path: builder
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@ -5,6 +5,9 @@ on:
    - cron: 0 0 * * *
  push:
    tags:
+      # NOTE: Doc build pipelines should only get triggered on release candidate builds
+      # Release candidate tags look like: v1.11.0-rc1
+      - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
      - ciflow/nightly/*
  workflow_dispatch:

--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@ -235,6 +235,7 @@ jobs:
      build-generates-artifacts: false

  pytorch-xla-linux-bionic-py3_7-clang8-build:
+    if: false
    name: pytorch-xla-linux-bionic-py3.7-clang8
    uses: ./.github/workflows/_linux-build.yml
    with:
--- a/.github/workflows/run_android_tests.yml
+++ b/.github/workflows/run_android_tests.yml
@ -56,7 +56,16 @@ jobs:

      - name: Build PyTorch Android
        run: |
+          # Install NDK 21 after GitHub update
+          # https://github.com/actions/virtual-environments/issues/5595
+          ANDROID_ROOT="/usr/local/lib/android"
+          ANDROID_SDK_ROOT="${ANDROID_ROOT}/sdk"
+          SDKMANAGER="${ANDROID_SDK_ROOT}/cmdline-tools/latest/bin/sdkmanager"
+          echo "y" | $SDKMANAGER "ndk;21.4.7075529"
+
          export ANDROID_NDK="${ANDROID_SDK_ROOT}/ndk-bundle"
+          ln -sfn $ANDROID_SDK_ROOT/ndk/21.4.7075529 $ANDROID_NDK
+
          echo "CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname "$(which conda)")/../"}" >> "${GITHUB_ENV}"
          ./scripts/build_pytorch_android.sh x86

--- a/.github/workflows/trunk.yml
+++ b/.github/workflows/trunk.yml
@ -63,7 +63,7 @@ jobs:
          { config: "default", shard: 1, num_shards: 2, runner: "linux.4xlarge.nvidia.gpu" },
          { config: "default", shard: 2, num_shards: 2, runner: "linux.4xlarge.nvidia.gpu" },
          { config: "slow", shard: 1, num_shards: 1, runner: "linux.4xlarge.nvidia.gpu" },
-          { config: "nogpu_NO_AVX", shard: 1, num_shards: 1, runner: "linux.2xlarge" },
+          { config: "nogpu_AVX512", shard: 1, num_shards: 1, runner: "linux.2xlarge" },
          { config: "nogpu_NO_AVX2", shard: 1, num_shards: 1, runner: "linux.2xlarge" },
          { config: "jit_legacy", shard: 1, num_shards: 1, runner: "linux.4xlarge.nvidia.gpu" },
          { config: "distributed", shard: 1, num_shards: 2, runner: "linux.8xlarge.nvidia.gpu" },
@ -128,6 +128,7 @@ jobs:
        ]}

  ios-12-5-1-x86-64:
+    if: false
    name: ios-12-5-1-x86-64
    uses: ./.github/workflows/_ios-build-test.yml
    with:
@ -173,8 +174,8 @@ jobs:
      build-environment: macos-11-py3-x86-64
      test-matrix: |
        { include: [
-          { config: "default", shard: 1, num_shards: 2, runner: "macos-12", xcode-version: "13.3.1" },
-          { config: "default", shard: 2, num_shards: 2, runner: "macos-12", xcode-version: "13.3.1" },
+          { config: "default", shard: 1, num_shards: 2, runner: "macos-12" },
+          { config: "default", shard: 2, num_shards: 2, runner: "macos-12" },
        ]}
    secrets:
      AWS_OSSCI_METRICS_V2_ACCESS_KEY_ID: ${{ secrets.AWS_OSSCI_METRICS_V2_ACCESS_KEY_ID }}
@ -199,11 +200,18 @@ jobs:
      build-environment: macos-10-15-py3-arm64
      xcode-version: "13.3.1"
      runner-type: macos-12
-      build-generates-artifacts: false
+      build-generates-artifacts: true
    secrets:
      MACOS_SCCACHE_S3_ACCESS_KEY_ID: ${{ secrets.MACOS_SCCACHE_S3_ACCESS_KEY_ID }}
      MACOS_SCCACHE_S3_SECRET_ACCESS_KEY: ${{ secrets.MACOS_SCCACHE_S3_SECRET_ACCESS_KEY }}

+  macos-12-3-py38-arm64-test:
+    name: macos-12.3-py3.8-arm64-test
+    uses: ./.github/workflows/_mac-test-arm64.yml
+    needs: macos-10-15-py3-arm64
+    with:
+      build-environment: macos-10-15-py3-arm64
+
  # please ensure that this and its corresponding job in pull.yml are in sync
  win-vs2019-cuda11_3-py3-build:
    name: win-vs2019-cuda11.3-py3
--- a/.gitignore
+++ b/.gitignore
@ -122,6 +122,10 @@ env
 .circleci/scripts/COMMIT_MSG
 scripts/release_notes/*.json

+# These files get copied over on invoking setup.py
+torchgen/packaged/*
+!torchgen/packaged/README.md
+
 # IPython notebook checkpoints
 .ipynb_checkpoints

--- a/.gitmodules
+++ b/.gitmodules
@ -65,7 +65,7 @@
 [submodule "third_party/python-peachpy"]
    ignore = dirty
    path = third_party/python-peachpy
-    url = https://github.com/Maratyszcza/PeachPy.git
+    url = https://github.com/malfet/PeachPy.git
 [submodule "third_party/python-six"]
    ignore = dirty
    path = third_party/python-six
--- a/.jenkins/pytorch/codegen-test.sh
+++ b/.jenkins/pytorch/codegen-test.sh
@ -27,6 +27,7 @@ rm -rf "$OUT"

 # aten codegen
 python -m torchgen.gen \
+  -s aten/src/ATen \
  -d "$OUT"/torch/share/ATen

 # torch codegen
--- a/.jenkins/pytorch/common_utils.sh
+++ b/.jenkins/pytorch/common_utils.sh
@ -99,6 +99,6 @@ function checkout_install_torchvision() {

 function clone_pytorch_xla() {
  if [[ ! -d ./xla ]]; then
-    git clone --recursive --quiet https://github.com/pytorch/xla.git
+    git clone --recursive -b r1.12 https://github.com/pytorch/xla.git
  fi
 }
--- a/.jenkins/pytorch/macos-build.sh
+++ b/.jenkins/pytorch/macos-build.sh
@ -33,11 +33,11 @@ fi

 cross_compile_arm64() {
  # Cross compilation for arm64
-  USE_DISTRIBUTED=1 CMAKE_OSX_ARCHITECTURES=arm64 MACOSX_DEPLOYMENT_TARGET=11.0 USE_MKLDNN=OFF USE_NNPACK=OFF USE_QNNPACK=OFF BUILD_TEST=OFF python setup.py bdist_wheel
+  USE_DISTRIBUTED=1 CMAKE_OSX_ARCHITECTURES=arm64 MACOSX_DEPLOYMENT_TARGET=11.0 USE_MKLDNN=OFF USE_QNNPACK=OFF BUILD_TEST=OFF python setup.py bdist_wheel
 }

 compile_x86_64() {
-  USE_DISTRIBUTED=1 USE_NNPACK=OFF python setup.py bdist_wheel
+  USE_DISTRIBUTED=1 python setup.py bdist_wheel
 }

 build_lite_interpreter() {
--- a/.jenkins/pytorch/multigpu-test.sh
+++ b/.jenkins/pytorch/multigpu-test.sh
@ -28,4 +28,24 @@ time python test/run_test.py --verbose -i distributed/test_c10d_spawn_nccl
 time python test/run_test.py --verbose -i distributed/test_store
 time python test/run_test.py --verbose -i distributed/test_pg_wrapper
 time python test/run_test.py --verbose -i distributed/rpc/cuda/test_tensorpipe_agent
+time python test/run_test.py --verbose -i distributed/_shard/checkpoint/test_checkpoint
+time python test/run_test.py --verbose -i distributed/_shard/checkpoint/test_file_system_checkpoint
+time python test/run_test.py --verbose -i distributed/_shard/sharding_spec/test_sharding_spec
+time python test/run_test.py --verbose -i distributed/_shard/sharding_plan/test_sharding_plan
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_megatron_prototype
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor_reshard
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_chunk
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_elementwise_ops
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_embedding
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_embedding_bag
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_binary_cmp
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_init
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_linear
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_math_ops
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_matrix_ops
+time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_softmax
+time python test/run_test.py --verbose -i distributed/_shard/sharded_optim/test_sharded_optim
+time python test/run_test.py --verbose -i distributed/_shard/test_partial_tensor
+time python test/run_test.py --verbose -i distributed/_shard/test_replicated_tensor
 assert_git_not_dirty
--- a/.jenkins/pytorch/test.sh
+++ b/.jenkins/pytorch/test.sh
@ -152,11 +152,9 @@ if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then
    (cd test && ! get_exit_code python -c "import torch; torch._C._crash_if_aten_asan(3)")
 fi

-if [[ "${BUILD_ENVIRONMENT}" == *-NO_AVX-* || $TEST_CONFIG == 'nogpu_NO_AVX' ]]; then
+if [[ "${BUILD_ENVIRONMENT}" == *-NO_AVX2-* || $TEST_CONFIG == 'nogpu_NO_AVX2' ]]; then
  export ATEN_CPU_CAPABILITY=default
-elif [[ "${BUILD_ENVIRONMENT}" == *-NO_AVX2-* || $TEST_CONFIG == 'nogpu_NO_AVX2' ]]; then
-  export ATEN_CPU_CAPABILITY=default
-elif [[ "${BUILD_ENVIRONMENT}" == *-NO_AVX512-* || $TEST_CONFIG == 'nogpu_NO_AVX512' ]]; then
+elif [[ "${BUILD_ENVIRONMENT}" == *-AVX512-* || $TEST_CONFIG == 'nogpu_AVX512' ]]; then
  export ATEN_CPU_CAPABILITY=avx2
 fi

@ -460,7 +458,7 @@ test_forward_backward_compatibility() {
  python -m venv venv
  # shellcheck disable=SC1091
  . venv/bin/activate
-  pip_install --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
+  pip_install --pre torch -f https://download.pytorch.org/whl/test/cpu/torch_test.html
  pip show torch
  python dump_all_function_schemas.py --filename nightly_schemas.txt
  # FC: verify newmodel can be load with old code.
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -44,6 +44,10 @@ if(DEFINED GLIBCXX_USE_CXX11_ABI)
  if(${GLIBCXX_USE_CXX11_ABI} EQUAL 1)
    set(CXX_STANDARD_REQUIRED ON)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=1")
+  else()
+    # Please note this is required in order to ensure compatibility between gcc 9 and gcc 7
+    # This could be removed when all Linux PyTorch binary builds are compiled by the same toolchain again
+    string(APPEND CMAKE_CXX_FLAGS " -fabi-version=11")
  endif()
 endif()

@ -947,7 +951,8 @@ endif()

 if(APPLE)
    if(USE_MPS)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_MPS -fno-objc-arc -weak_framework Foundation -weak_framework MetalPerformanceShaders -weak_framework MetalPerformanceShadersGraph -weak_framework Metal")
+      string(APPEND CMAKE_CXX_FLAGS " -DUSE_MPS -fno-objc-arc")
+      string(APPEND CMAKE_SHARED_LINKER_FLAGS " -weak_framework Foundation -weak_framework MetalPerformanceShaders -weak_framework MetalPerformanceShadersGraph -weak_framework Metal")
    endif()
    string(APPEND CMAKE_CXX_FLAGS " -Wno-unused-private-field")
    string(APPEND CMAKE_CXX_FLAGS " -Wno-missing-braces")
--- a/5
+++ b/5
@ -73,3 +73,8 @@ test/test_unary_ufuncs.py @mruberry @ngimel
 test/test_binary_ufuncs.py @mruberry @ngimel
 test/test_reductions.py @mruberry @ngimel
 test/test_type_promotion.py @mruberry @ngimel
+
+# torch MPS
+test/test_mps.py @kulinseth
+aten/src/ATen/mps/ @kulinseth
+aten/src/ATen/native/mps/ @kulinseth
--- a/aten/src/ATen/CMakeLists.txt
+++ b/aten/src/ATen/CMakeLists.txt
@ -420,7 +420,6 @@ if(USE_CUDA AND NOT USE_ROCM)
      ${CUDA_LIBRARIES}
      ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcusparse_static.a
      ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcurand_static.a
-      ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublas_static.a
      ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcufft_static_nocallback.a
      )
   if(NOT BUILD_LAZY_CUDA_LINALG)
--- a/aten/src/ATen/Context.cpp
+++ b/aten/src/ATen/Context.cpp
@ -21,6 +21,10 @@
 #include <fbgemm/Fbgemm.h>
 #endif // USE_FBGEMM

+#ifdef USE_MPS
+#include <ATen/mps/MPSDevice.h>
+#endif
+
 namespace at {

 Context::Context() = default;
@ -225,16 +229,8 @@ bool Context::hasMKLDNN() {
 }

 bool Context::hasMPS() {
-#if defined(__APPLE__)
-#if __is_target_os(macOS)
-  if (__builtin_available(macOS 12.3, *) || __builtin_available(macOSApplicationExtension 12.3, *)) {
-    return c10::impl::hasDeviceGuardImpl(at::DeviceType::MPS);
-  } else {
-    return false;
-  }
-#else
-  return false;
-#endif
+#if USE_MPS
+  return at::mps::is_available();
 #else
  return false;
 #endif
--- a/aten/src/ATen/EmptyTensor.cpp
+++ b/aten/src/ATen/EmptyTensor.cpp
@ -26,6 +26,13 @@ constexpr uint64_t storage_max() {
  return std::min(int64_max, size_max);
 }

+inline void raise_warning_for_complex_half(ScalarType dtype) {
+  if (dtype == kComplexHalf) {
+    TORCH_WARN_ONCE(
+        "ComplexHalf support is experimental and many operators don't support it yet.");
+  }
+}
+
 }  // namespace (anonymous)

 size_t computeStorageNbytesContiguous(
@ -98,7 +105,7 @@ TensorBase empty_generic(
    ScalarType scalar_type,
    c10::optional<c10::MemoryFormat> memory_format_opt) {
  at::detail::check_size_nonnegative(size);
-
+  at::detail::raise_warning_for_complex_half(scalar_type);
  caffe2::TypeMeta dtype = scalarTypeToTypeMeta(scalar_type);
  size_t size_bytes = computeStorageNbytesContiguous(size, dtype.itemsize());
  auto storage_impl = c10::make_intrusive<StorageImpl>(
@ -132,7 +139,7 @@ TensorBase empty_strided_generic(
    c10::DispatchKeySet ks,
    ScalarType scalar_type) {
  at::detail::check_size_nonnegative(size);
-
+  at::detail::raise_warning_for_complex_half(scalar_type);
  caffe2::TypeMeta dtype = scalarTypeToTypeMeta(scalar_type);
  size_t size_bytes = computeStorageNbytes(size, stride, dtype.itemsize());
  auto storage_impl = c10::make_intrusive<StorageImpl>(
--- a/aten/src/ATen/TensorIndexing.h
+++ b/aten/src/ATen/TensorIndexing.h
@ -353,8 +353,8 @@ static inline void copy_to(const Tensor& dst, const Tensor& src) {
    // appear. Users can workaround that case by dst[index..] = src.reshape(..)
    dst.copy_(src);
    return;
-  } else if (src.sizes().size() == 0 && src.device().type() == at::kCPU) {
-    dst.fill_(src.item());
+  } else if (src.dim() == 0 && src.device().type() == at::kCPU) {
+    dst.fill_(src);
    return;
  }
  auto src_view = src.view(slicePrefix1sSize(src.sizes()));
--- a/aten/src/ATen/core/tensor_type.cpp
+++ b/aten/src/ATen/core/tensor_type.cpp
@ -253,7 +253,7 @@ TensorTypePtr TensorType::create(const at::Tensor& t) {
  VaryingShape<size_t> stride_indices;
  VaryingShape<int64_t> strides;
  VaryingShape<int64_t> sizes;
-  if (t.layout() == at::kStrided) {
+  if (t.layout() == at::kStrided && !t.is_nested()) {
    sizes = VaryingShape<int64_t>{t.sizes().vec()};
    strides = VaryingShape<int64_t>{t.strides().vec()};
    return TensorType::create(
--- a/aten/src/ATen/mps/EmptyTensor.cpp
+++ b/aten/src/ATen/mps/EmptyTensor.cpp
@ -12,6 +12,8 @@
 #define MPS_ERROR_RUNTIME_TOO_LOW \
  "The MPS backend is supported on MacOS 12.3+.", \
  "Current OS version can be queried using `sw_vers`"
+#define MPS_ERROR_DOUBLE_NOT_SUPPORTED "Cannot convert a MPS Tensor to float64 dtype " \
+  "as the MPS framework doesn't support float64. Please use float32 instead."

 namespace at { namespace detail {
 TensorBase empty_mps(
@ -23,7 +25,7 @@ TensorBase empty_mps(
    c10::optional<c10::MemoryFormat> memory_format_opt) {
 #if defined(__APPLE__)
 #if __is_target_os(macOS)
-  if (__builtin_available(macOS 12.3, *) || __builtin_available(macOSApplicationExtension 12.3, *)) {
+  if (at::hasMPS()) {
    auto device = device_or_default(device_opt);
    TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device.type() == DeviceType::MPS);

@ -35,6 +37,8 @@ TensorBase empty_mps(
    auto* allocator = at::mps::GetMPSAllocator();
    int64_t nelements = c10::multiply_integers(size);
    auto dtype = dtype_or_default(dtype_opt);
+    TORCH_CHECK_TYPE(dtype != ScalarType::Double, MPS_ERROR_DOUBLE_NOT_SUPPORTED);
+
    auto dtype_meta = scalarTypeToTypeMeta(dtype);
    int64_t size_bytes = nelements * dtype_meta.itemsize();
    auto storage_impl = c10::make_intrusive<StorageImpl>(
@ -83,9 +87,10 @@ TensorBase empty_strided_mps(
    c10::optional<Device> device_opt) {
 #if defined(__APPLE__)
 #if __is_target_os(macOS)
-  if (__builtin_available(macOS 12.3, *) || __builtin_available(macOSApplicationExtension 12.3, *)) {
+  if (at::hasMPS()) {
    auto device = device_or_default(device_opt);
    TORCH_INTERNAL_ASSERT(device.is_mps());
+    TORCH_CHECK_TYPE(dtype != ScalarType::Double, MPS_ERROR_DOUBLE_NOT_SUPPORTED);
    const DeviceGuard device_guard(device);
    auto* allocator = at::mps::GetMPSAllocator();
    constexpr c10::DispatchKeySet mps_dks(c10::DispatchKey::MPS);
--- a/aten/src/ATen/mps/MPSAllocator.h
+++ b/aten/src/ATen/mps/MPSAllocator.h
@ -26,6 +26,23 @@
 namespace at {
 namespace mps {

+class IMpsAllocatorCallback {
+ public:
+  enum class EventType {
+    ALLOCATED, // buffer got allocated to be used immediately
+    RECYCLED,  // buffer pulled from free list to be reused
+    FREED,     // buffer put to free list for future recycling
+    RELEASED,  // buffer memory released
+  };
+  virtual ~IMpsAllocatorCallback() = default;
+  virtual void executeMPSAllocatorCallback(void* ptr, EventType event) = 0;
+};
+
+// MPS allocator will execute every registered callback when a block of memory is freed.
+C10_DECLARE_REGISTRY(MPSAllocatorCallbacksRegistry, IMpsAllocatorCallback);
+#define REGISTER_MPS_ALLOCATOR_CALLBACK(name, ...) \
+  C10_REGISTER_CLASS(MPSAllocatorCallbacksRegistry, name, __VA_ARGS__);
+
 namespace HeapAllocator {

 #define MB(x) round_page(x * 1048576UL)
@ -46,13 +63,18 @@ struct HeapBlock;
 struct BufferBlock
 {
  id<MTLBuffer> buffer;
-  size_t size;
+  size_t size; // size after alignment
+  size_t requested_size; // requested size (before alignment)
+  // buffer shape is used for retrieving base of views in cached graphs
+  std::vector<int64_t> shape;
  bool in_use;
  HeapBlock* heap;
  id_t buf_id;

-  BufferBlock(size_t Size, const id<MTLBuffer> Buffer = nullptr, HeapBlock* Heap = nullptr, id_t BufID = 0) :
-            buffer(Buffer), size(Size), in_use(false), heap(Heap), buf_id(BufID) { }
+  BufferBlock(size_t Size, size_t RequestedSize = 0, const id<MTLBuffer> Buffer = nullptr,
+              HeapBlock* Heap = nullptr, id_t BufID = 0) :
+              buffer(Buffer), size(Size), requested_size(RequestedSize),
+              in_use(false), heap(Heap), buf_id(BufID) { }

  static bool Comparator(const BufferBlock* a, const BufferBlock* b) {
    return (a->size != b->size) ? a->size < b->size : (uintptr_t)a->buffer < (uintptr_t)b->buffer;
@ -98,7 +120,7 @@ struct HeapBlock
      d.type = MTLHeapTypeAutomatic;
      heap = [device newHeapWithDescriptor: d];
      if (heap) {
-        [heap setPurgeableState:MTLPurgeableStateEmpty];
+        [heap setPurgeableState:MTLPurgeableStateNonVolatile];
      }
      [d release];
    }
@ -176,6 +198,9 @@ public:
  void Free(void* ptr);
  void EmptyCache();
  bool isSharedBuffer(void* ptr);
+  ssize_t getRequestedBufferSize(void* ptr);
+  void setBufferShape(void* ptr, const IntArrayRef& shape);
+  IntArrayRef getBufferShape(void* ptr);

  inline id<MTLDevice> Device() const { return m_device; }
  void enable_debug_info() { m_enable_debug_info = true; }
@ -209,6 +234,7 @@ private:
  void release_buffers(BufferPool& pool);
  bool release_available_cached_buffers(const AllocParams& p);
  bool release_cached_buffers();
+  void trigger_memory_callbacks(BufferBlock* buffer_block, IMpsAllocatorCallback::EventType event);

  BufferPool& get_pool(size_t Size, bool useShared) {
      return Size <= kMaxSmallAlloc ? (useShared ? m_small_pool_shared : m_small_pool_private) :
--- a/aten/src/ATen/mps/MPSAllocator.mm
+++ b/aten/src/ATen/mps/MPSAllocator.mm
@ -8,6 +8,8 @@
 namespace at {
 namespace mps {

+C10_DEFINE_REGISTRY(MPSAllocatorCallbacksRegistry, IMpsAllocatorCallback);
+
 namespace HeapAllocator {

 HeapBlock* MPSHeapAllocatorImpl::get_free_heap(AllocParams& p)
@ -56,7 +58,7 @@ bool MPSHeapAllocatorImpl::alloc_buffer(AllocParams& p)
  TORCH_INTERNAL_ASSERT(buffer);
  // insert heap after a buffer was created on it to update the order of heap's set
  p.pool->heaps.insert(heap);
-  p.buffer_block = new BufferBlock(p.size(), buffer, heap, m_allocated_buffers.size() + 1);
+  p.buffer_block = new BufferBlock(p.size(), p.requested_size, buffer, heap, m_allocated_buffers.size() + 1);
  m_allocated_buffers[p.buffer_block->buffer] = p.buffer_block;
  m_total_allocated_memory += p.size();

@ -64,7 +66,8 @@ bool MPSHeapAllocatorImpl::alloc_buffer(AllocParams& p)
    std::cerr << "Allocated "
              << (p.pool->is_shared ? "shared" : "private")
              << " buffer #" << p.buffer_block->buf_id
-              << " with aligned size " << format_size(p.size())
+              << " of size " << format_size(p.size())
+              << " at " << p.buffer_block->buffer
              << " (requested size: " << format_size(p.requested_size)
              << ", heap size: " << format_size(heap->size.available)
              << ", total allocated: " << format_size(m_total_allocated_memory) << ")\n";
@ -90,7 +93,8 @@ bool MPSHeapAllocatorImpl::get_free_buffer(AllocParams& p)
    std::cerr << "Reusing "
              << (p.pool->is_shared ? "shared" : "private")
              << " buffer #" << p.buffer_block->buf_id
-              << " with aligned size " << format_size(p.buffer_block->size)
+              << " of size " << format_size(p.buffer_block->size)
+              << " at " << p.buffer_block->buffer
              << " (requested size: " << format_size(p.requested_size) << ")\n";
  }
  return true;
@ -101,7 +105,6 @@ id<MTLBuffer> MPSHeapAllocatorImpl::Malloc(size_t size, bool sharedStorage)
  TORCH_CHECK(size < m_max_buffer_size, "Invalid buffer size: ", format_size(size));

  std::lock_guard<std::mutex> lock(m_mutex);
-  __block id<MTLBuffer> buf = nil;

  size_t alloc_size = get_allocation_size(size, sharedStorage);
  auto& pool = get_pool(alloc_size, sharedStorage);
@ -126,7 +129,9 @@ id<MTLBuffer> MPSHeapAllocatorImpl::Malloc(size_t size, bool sharedStorage)
 void MPSHeapAllocatorImpl::free_buffer(BufferBlock* buffer_block)
 {
  TORCH_INTERNAL_ASSERT(buffer_block->in_use);
+  trigger_memory_callbacks(buffer_block, IMpsAllocatorCallback::EventType::FREED);
  buffer_block->in_use = false;
+  buffer_block->shape.clear(); // reset shape
  BufferPool *pool = buffer_block->heap->pool;
  // Makes sure the BufferBlock* isn't already present in the pool we're freeing it back into.
  TORCH_INTERNAL_ASSERT(pool->buffers.insert(buffer_block).second);
@ -134,14 +139,19 @@ void MPSHeapAllocatorImpl::free_buffer(BufferBlock* buffer_block)

 BufferBlock* MPSHeapAllocatorImpl::get_allocated_buffer_block(void* ptr)
 {
-  id<MTLBuffer> buf = __builtin_bit_cast(id<MTLBuffer>, ptr);
-  auto it = m_allocated_buffers.find(buf);
+  auto it = m_allocated_buffers.find(ptr);
  if (it == m_allocated_buffers.end())
    return nullptr;

  return it->second;
 }

+void MPSHeapAllocatorImpl::trigger_memory_callbacks(BufferBlock* buffer_block, IMpsAllocatorCallback::EventType event) {
+  for (const auto& name : MPSAllocatorCallbacksRegistry()->Keys()) {
+    MPSAllocatorCallbacksRegistry()->Create(name)->executeMPSAllocatorCallback(buffer_block->buffer, event);
+  }
+}
+
 bool MPSHeapAllocatorImpl::isSharedBuffer(void* ptr)
 {
  std::lock_guard<std::mutex> lock(m_mutex);
@ -151,6 +161,40 @@ bool MPSHeapAllocatorImpl::isSharedBuffer(void* ptr)
  return buffer_block && buffer_block->heap->pool->is_shared;
 }

+ssize_t MPSHeapAllocatorImpl::getRequestedBufferSize(void* ptr)
+{
+  std::lock_guard<std::mutex> lock(m_mutex);
+
+  BufferBlock *buffer_block = get_allocated_buffer_block(ptr);
+  if (buffer_block)
+    return (ssize_t) buffer_block->requested_size;
+  // this indicates the passed buffer pointer wasn't found
+  return -1;
+}
+
+void MPSHeapAllocatorImpl::setBufferShape(void* ptr, const IntArrayRef& shape)
+{
+  std::lock_guard<std::mutex> lock(m_mutex);
+
+  BufferBlock *buffer_block = get_allocated_buffer_block(ptr);
+  TORCH_INTERNAL_ASSERT(buffer_block, "failed to find the buffer ", ptr);
+  // note that the IntArrayRef doesn't own the underlying data, and the backing
+  // memory for shape data must persist as long as the buffer is in use.
+  // So we need to copy to vector.
+  buffer_block->shape = shape.vec();
+}
+
+IntArrayRef MPSHeapAllocatorImpl::getBufferShape(void* ptr)
+{
+  std::lock_guard<std::mutex> lock(m_mutex);
+
+  BufferBlock *buffer_block = get_allocated_buffer_block(ptr);
+  if (buffer_block && buffer_block->shape.size() > 0)
+    return IntArrayRef{buffer_block->shape};
+
+  return IntArrayRef();
+}
+
 void MPSHeapAllocatorImpl::Free(void* ptr)
 {
  std::lock_guard<std::mutex> lock(m_mutex);
@ -168,6 +212,8 @@ void MPSHeapAllocatorImpl::EmptyCache()

 void MPSHeapAllocatorImpl::release_buffer(BufferBlock* buffer_block, bool remove_empty_heap)
 {
+  trigger_memory_callbacks(buffer_block, IMpsAllocatorCallback::EventType::RELEASED);
+
  HeapBlock *heap = buffer_block->heap;
  BufferPool *pool = heap->pool;
  m_total_allocated_memory -= buffer_block->size;
@ -252,39 +298,44 @@ bool MPSHeapAllocatorImpl::release_cached_buffers()
 } // namespace HeapAllocator

 // Use "at::mps::GetMPSAllocator()" to acquire a handle to MPS Allocator
-static HeapAllocator::MPSHeapAllocatorImpl s_allocatorImpl;
+namespace {
+HeapAllocator::MPSHeapAllocatorImpl& _getAllocImpl() {
+  static HeapAllocator::MPSHeapAllocatorImpl s_allocatorImpl;
+  return s_allocatorImpl;
+}
+}

 // MPS allocator struct to be registered with Pytorch
 struct TORCH_API MPSAllocator final : public at::Allocator {
 public:
  explicit MPSAllocator(bool useSharedStorage) :
-      m_has_unified_memory(s_allocatorImpl.Device().hasUnifiedMemory), m_use_shared_storage(useSharedStorage)
+      m_has_unified_memory(_getAllocImpl().Device().hasUnifiedMemory), m_use_shared_storage(useSharedStorage)
  {
    const bool enable_debug_info = isEnvVarEnabled("PYTORCH_DEBUG_MPS_ALLOCATOR");
    if (enable_debug_info) {
-      s_allocatorImpl.enable_debug_info();
+      _getAllocImpl().enable_debug_info();
      if (!m_use_shared_storage || m_has_unified_memory) {
        std::cerr << "Initializing "
                  << (useSharedStorage ? "shared" : "private")
                  << " heap allocator on "
                  << (m_has_unified_memory ? "unified" : "discrete")
                  << " device memory of size "
-                  << s_allocatorImpl.Device().recommendedMaxWorkingSetSize / 1048576UL << " MB\n";
+                  << _getAllocImpl().Device().recommendedMaxWorkingSetSize / 1048576UL << " MB\n";
      }
    }
  }

  ~MPSAllocator() override {
-    s_allocatorImpl.EmptyCache();
+    _getAllocImpl().EmptyCache();
  }

  DataPtr allocate(const size_t nbytes) const override {
-    __block id<MTLBuffer> buf = nbytes > 0 ? s_allocatorImpl.Malloc(nbytes, m_use_shared_storage) : nullptr;
+    __block id<MTLBuffer> buf = nbytes > 0 ? _getAllocImpl().Malloc(nbytes, m_use_shared_storage) : nullptr;
    return { buf, buf, &Delete, at::Device(at::DeviceType::MPS, 0)};
  }

  DeleterFnPtr raw_deleter() const override { return &Delete; }
-  bool is_shared(void* ptr) const { return s_allocatorImpl.isSharedBuffer(ptr); }
+  bool is_shared(void* ptr) const { return _getAllocImpl().isSharedBuffer(ptr); }
  bool is_shared_storge_supported() const { return m_has_unified_memory; }

 private:
@ -292,7 +343,11 @@ private:
  // use shared buffers on unified memory
  bool m_use_shared_storage;

-  static void Delete(void* ptr) { if (ptr) s_allocatorImpl.Free(ptr); }
+  static void Delete(void* ptr) {
+    if (ptr) {
+      _getAllocImpl().Free(ptr);
+    }
+  }

  static bool isEnvVarEnabled(const char *envvar) {
    const char *e = getenv(envvar);
@ -305,15 +360,45 @@ private:
  }
 };

-static MPSAllocator s_mps_shared_alloc(true);
+namespace {
+MPSAllocator& _getSharedAllocator() {
+  static MPSAllocator s_mps_shared_alloc(true);
+  return s_mps_shared_alloc;
+}
+
+MPSAllocator& _getPrivateAllocator() {
+  static mps::MPSAllocator s_mps_private_alloc(false);
+  return s_mps_private_alloc;
+}
+} // anonymous namespace
+
 at::Allocator* getMPSSharedAllocator()
 {
-  if (s_mps_shared_alloc.is_shared_storge_supported())
-    return &s_mps_shared_alloc;
+  auto& sa = _getSharedAllocator();
+  if (sa.is_shared_storge_supported()) {
+    return &sa;
+  }

  return nullptr;
 }

+at::Allocator* getMPSStaticAllocator() {
+  return &_getPrivateAllocator();
+}
+
+// TODO: create MPSHooks interface and move these there.
+ssize_t get_requested_buffer_size(void* ptr) {
+  return _getAllocImpl().getRequestedBufferSize(ptr);
+}
+
+void set_buffer_shape(void* ptr, const IntArrayRef& shape) {
+  _getAllocImpl().setBufferShape(ptr, shape);
+}
+
+IntArrayRef get_buffer_shape(void* ptr) {
+  return _getAllocImpl().getBufferShape(ptr);
+};
+
 } // namespace mps

 namespace native {
@ -325,7 +410,7 @@ namespace native {
 bool is_pinned_mps(const Tensor& self, c10::optional<Device> device)
 {
  TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!device.has_value() || device->is_mps());
-  return at::mps::s_mps_shared_alloc.is_shared(self.storage().data());
+  return at::mps::_getSharedAllocator().is_shared(self.storage().data());
 }

 // torch.pin_memory() implementation
@ -344,8 +429,4 @@ Tensor _pin_memory_mps(const Tensor& self, c10::optional<Device> device)
 }

 } // namespace native
-
-static mps::MPSAllocator s_mps_private_alloc(false);
-REGISTER_ALLOCATOR(DeviceType::MPS, &s_mps_private_alloc);
-
 } // namespace at
--- a/aten/src/ATen/mps/MPSDevice.h
+++ b/aten/src/ATen/mps/MPSDevice.h
@ -56,6 +56,8 @@ class TORCH_API MPSDevice {
  MPSDevice();
 };

+TORCH_API bool is_available();
+
 at::Allocator* GetMPSAllocator(bool useSharedAllocator = false);

 } // namespace mps
--- a/aten/src/ATen/mps/MPSDevice.mm
+++ b/aten/src/ATen/mps/MPSDevice.mm
@ -20,12 +20,26 @@ MPSDevice::~MPSDevice() {
  _mtl_device = nil;
 }

-MPSDevice::MPSDevice() {
-  NSArray* devices = MTLCopyAllDevices();
+MPSDevice::MPSDevice(): _mtl_device(nil) {
+  // Check that MacOS 12.3+ version of MPS framework is available
+  // Create the MPSGraph and check method introduced in 12.3+
+  // which is used by MPS backend.
+  id mpsCD = NSClassFromString(@"MPSGraph");
+  if ([mpsCD instancesRespondToSelector:@selector(LSTMWithSourceTensor:
+                                                       recurrentWeight:
+                                                           inputWeight:
+                                                                  bias:
+                                                             initState:
+                                                              initCell:
+                                                            descriptor:
+                                                                  name:)] == NO) {
+    return;
+  }
+  NSArray* devices = [MTLCopyAllDevices() autorelease];
  for (unsigned long i = 0 ; i < [devices count] ; i++) {
    id<MTLDevice>  device = devices[i];
    if(![device isLowPower]) { // exclude Intel GPUs
-      _mtl_device = device;
+      _mtl_device = [device retain];
      break;
    }
  }
@ -33,8 +47,13 @@ MPSDevice::MPSDevice() {
 }

 at::Allocator* getMPSSharedAllocator();
+at::Allocator* getMPSStaticAllocator();
 at::Allocator* GetMPSAllocator(bool useSharedAllocator) {
-  return useSharedAllocator ? getMPSSharedAllocator() : GetAllocator(DeviceType::MPS);
+  return useSharedAllocator ? getMPSSharedAllocator() : getMPSStaticAllocator();
+}
+
+bool is_available() {
+  return MPSDevice::getInstance()->device() != nil;
 }

 } // namespace mps
--- a/aten/src/ATen/native/DispatchStub.cpp
+++ b/aten/src/ATen/native/DispatchStub.cpp
@ -39,7 +39,9 @@ static CPUCapability compute_cpu_capability() {

 #if !defined(__powerpc__) && !defined(__s390x__)
  if (cpuinfo_initialize()) {
-#ifdef HAVE_AVX512_CPU_DEFINITION
+    // AVX512 can be slower then AVX2, so lets keep it as opt-in
+    // see https://github.com/pytorch/pytorch/issues/80252
+#if defined(HAVE_AVX512_CPU_DEFINITION) && false
    // GCC supports some AVX512 intrinsics such as _mm512_set_epi16 only in
    // versions 9 & beyond. So, we want to ensure that only releases built with
    // supported compilers on supported hardware return CPU Capability AVX512,
--- a/aten/src/ATen/native/WeightNorm.cpp
+++ b/aten/src/ATen/native/WeightNorm.cpp
@ -82,7 +82,10 @@ Tensor _weight_norm
  auto v = v_in.contiguous();
  auto g = g_in.contiguous();

-  bool can_use_fused = (dim == 0) || (dim == v.dim() - 1);
+  auto has_half_dtype = v.scalar_type() == at::ScalarType::Half
+    || g.scalar_type() == at::ScalarType::Half;
+
+  bool can_use_fused = !has_half_dtype && ((dim == 0) || (dim == v.dim() - 1));

  if (can_use_fused) {
    // weight_norm does not have a derivative defined for it, so this will route back through
--- a/aten/src/ATen/native/cpu/WeightNormKernel.cpp
+++ b/aten/src/ATen/native/cpu/WeightNormKernel.cpp
@ -328,8 +328,14 @@ void weight_norm_backward_last_dim_kernel(
  auto grad_v_data = grad_v.data_ptr<scalar_t>();
  auto grad_g_data = grad_g.data_ptr<scalar_t>();

+  // the temp buffer will be used twice:
+  // 1. vertical reduction from [M, N] to [T, N]
+  // 2. store the intermediate data of `sum`, `a` and `b`,
+  //    so need to make sure it has at least 3 rows
+  //
  int num_threads = at::get_num_threads();
-  Tensor buffer = at::empty({num_threads, N}, saved_norm.options()).zero_();
+  int K = std::max(3, num_threads);
+  Tensor buffer = at::empty({K, N}, saved_norm.options()).zero_();
  auto buffer_data = buffer.data_ptr<accscalar_t>();

  // vertical parallel reduction
@ -351,6 +357,9 @@ void weight_norm_backward_last_dim_kernel(
    buffer_data[j] = sum;
  }

+  // reuse the 1st row of buffer to store the sum
+  // 2nd row to store coefficient a
+  // 3rd row to store coefficient b
  accscalar_t* per_dim_sum = buffer_data;
  accscalar_t* a = buffer_data + N;
  accscalar_t* b = buffer_data + 2 * N;
--- a/aten/src/ATen/native/cuda/PersistentSoftmax.cuh
+++ b/aten/src/ATen/native/cuda/PersistentSoftmax.cuh
@ -123,12 +123,14 @@ __global__ void softmax_warp_forward(output_t *dst, const input_t *src, int batc
        for (int it = 0;  it < WARP_ITERATIONS;  ++it) {
            if (is_masked) {
                int idx = it*WARP_SIZE;
-                if (!is_transformer_mask) {
-                    idx += i*element_count;
-                }
-                if (!mask[idx]) {
-                    max_value[i] = (is_meaningful_max && max_value[i] > elements[i][it]) ? max_value[i] : elements[i][it];
-                    is_meaningful_max = true;
+                if ((idx + local_idx) < element_count) {
+                    if (!is_transformer_mask) {
+                        idx += i*element_count;
+                    }
+                    if (!mask[idx]) {
+                        max_value[i] = (is_meaningful_max && max_value[i] > elements[i][it]) ? max_value[i] : elements[i][it];
+                        is_meaningful_max = true;
+                    }
                }
            } else {
                max_value[i] = max_value[i] > elements[i][it] ? max_value[i] : elements[i][it];
@ -156,22 +158,28 @@ __global__ void softmax_warp_forward(output_t *dst, const input_t *src, int batc
                }
            } else {
                int idx = it*WARP_SIZE;
+                bool valid = (idx + local_idx) < element_count;
                if (!is_transformer_mask) {
                    idx += i*element_count;
                }
-
-                if (!mask[idx]) {
-                    if (is_log_softmax) {
-                        sum[i] += std::exp(elements[i][it] - max_value[i]);
+                if (valid) {
+                    if (!mask[idx]) {
+                        if (is_log_softmax) {
+                            sum[i] += std::exp(elements[i][it] - max_value[i]);
+                        } else {
+                            elements[i][it] = std::exp(elements[i][it] - max_value[i]);
+                            sum[i] += elements[i][it];
+                        }
                    } else {
-                        elements[i][it] = std::exp(elements[i][it] - max_value[i]);
-                        sum[i] += elements[i][it];
+                        if (!is_log_softmax) {
+                            // Masked values are treated as -infinity, and std::exp(-infinity) is 0.
+                            elements[i][it] = 0;
+                        }
                    }
                } else {
-                  if (!is_log_softmax) {
-                    // Masked values are treated as -infinity, and std::exp(-infinity) is 0.
-                    elements[i][it] = 0;
-                  }
+                    if (!is_log_softmax) {
+                        elements[i][it] = 0.;
+                    }
                }
            }
        }
--- a/aten/src/ATen/native/cudnn/ConvShared.cpp
+++ b/aten/src/ATen/native/cudnn/ConvShared.cpp
@ -63,6 +63,7 @@ namespace at { namespace native {

 std::ostream& operator<<(std::ostream & out, const ConvolutionParams& params) {
  out << "ConvolutionParams \n"
+    << "    memory_format = " << params.memory_format << "\n"
    << "    data_type = " << cudnnTypeToString(params.dataType) << "\n"
    << "    padding = " << ArrayRef<int>{params.padding} << "\n"
    << "    stride = " << ArrayRef<int>{params.stride} << "\n"
@ -83,7 +84,7 @@ void setConvolutionParams(
    ConvolutionParams* params,
    const at::Tensor& input, const at::Tensor& weight,
    IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation,
-    int64_t groups, bool deterministic, bool allow_tf32) {
+    int64_t groups, bool deterministic, bool allow_tf32, at::MemoryFormat memory_format) {

  cudnnDataType_t dataType = getCudnnDataType(input);
  memset(params, 0, sizeof(ConvolutionParams));
@ -91,7 +92,7 @@ void setConvolutionParams(
  params->dataType = dataType;
  // ASSERT(weight.dim() == input.dim())
  params->input_dim = input.dim();
-  params->memory_format = input.suggest_memory_format();
+  params->memory_format = memory_format;
  for (int i = 0; i != params->input_dim; ++i) {
    params->input_size[i] = (int) input.sizes()[i];
    params->weight_size[i] = (int) weight.sizes()[i];
--- a/aten/src/ATen/native/cudnn/ConvShared.h
+++ b/aten/src/ATen/native/cudnn/ConvShared.h
@ -48,7 +48,7 @@ void setConvolutionParams(
    ConvolutionParams* params,
    const at::Tensor& input, const at::Tensor& weight,
    IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation,
-    int64_t groups, bool deterministic, bool allow_tf32);
+    int64_t groups, bool deterministic, bool allow_tf32, at::MemoryFormat memory_format);

 std::string repro_from_args(const ConvolutionParams& args);

--- a/aten/src/ATen/native/cudnn/Conv_v7.cpp
+++ b/aten/src/ATen/native/cudnn/Conv_v7.cpp
@ -628,8 +628,8 @@ void raw_cudnn_convolution_forward_out_32bit(

  ConvolutionArgs args{ input, output, weight };
  args.handle = getCudnnHandle();
-  setConvolutionParams(&args.params, input, weight, padding, stride, dilation, groups, deterministic, allow_tf32);
  at::MemoryFormat memory_format = cudnn_conv_suggest_memory_format(input, weight);
+  setConvolutionParams(&args.params, input, weight, padding, stride, dilation, groups, deterministic, allow_tf32, memory_format);
  args.idesc.set(input, memory_format);
  args.wdesc.set(weight, memory_format, 0);
  args.odesc.set(output, memory_format);
@ -692,8 +692,8 @@ void raw_cudnn_convolution_backward_input_out_32bit(

  ConvolutionArgs args{ grad_input, grad_output, weight };
  args.handle = getCudnnHandle();
-  setConvolutionParams(&args.params, grad_input, weight, padding, stride, dilation, groups, deterministic, allow_tf32);
  at::MemoryFormat memory_format = cudnn_conv_suggest_memory_format(grad_input, weight);
+  setConvolutionParams(&args.params, grad_input, weight, padding, stride, dilation, groups, deterministic, allow_tf32, memory_format);
  args.idesc.set(grad_input, memory_format);
  args.wdesc.set(weight, memory_format, 0);
  args.odesc.set(grad_output, memory_format);
@ -755,8 +755,8 @@ void raw_cudnn_convolution_backward_weight_out_32bit(

  ConvolutionArgs args{ input, grad_output, grad_weight };
  args.handle = getCudnnHandle();
-  setConvolutionParams(&args.params, input, grad_weight, padding, stride, dilation, groups, deterministic, allow_tf32);
  at::MemoryFormat memory_format = cudnn_conv_suggest_memory_format(input, grad_weight);
+  setConvolutionParams(&args.params, input, grad_weight, padding, stride, dilation, groups, deterministic, allow_tf32, memory_format);
  args.idesc.set(input, memory_format);
  args.wdesc.set(grad_weight, memory_format, 0);
  args.odesc.set(grad_output, memory_format);
@ -868,6 +868,7 @@ void raw_cudnn_convolution_add_relu_out_v7(
  auto dataType = getCudnnDataType(input);
  ConvolutionArgs args{input, output, weight};
  args.handle = getCudnnHandle();
+  at::MemoryFormat memory_format = cudnn_conv_suggest_memory_format(input, weight);
  setConvolutionParams(
      &args.params,
      input,
@ -877,8 +878,8 @@ void raw_cudnn_convolution_add_relu_out_v7(
      dilation,
      groups,
      deterministic,
-      allow_tf32);
-  at::MemoryFormat memory_format = cudnn_conv_suggest_memory_format(input, weight);
+      allow_tf32,
+      memory_format);
  args.idesc.set(input, memory_format);
  args.wdesc.set(weight, memory_format, 0);
  args.odesc.set(output, memory_format);
--- a/aten/src/ATen/native/cudnn/Conv_v8.cpp
+++ b/aten/src/ATen/native/cudnn/Conv_v8.cpp
@ -152,7 +152,7 @@ BenchmarkCache<cudnn_frontend::ExecutionPlan, CacheKeyFused> benchmark_cache_fus
 // would not be a POD anymore.
 void setCacheKey(CacheKey& key, const cudnnBackendDescriptorType_t operation, const Tensor& y, const Tensor& x, const Tensor& w, const IntArrayRef padding, const IntArrayRef stride, const IntArrayRef dilation, int64_t groups, bool deterministic, bool allow_tf32) {
  memset(&key, 0, sizeof(key));
-  setConvolutionParams(&key.params, x, w, padding, stride, dilation, groups, deterministic, allow_tf32);
+  setConvolutionParams(&key.params, x, w, padding, stride, dilation, groups, deterministic, allow_tf32, x.suggest_memory_format());
  key.operation = operation;
  key.x_alignment = getAlignment(x);
  key.y_alignment = getAlignment(y);
@ -161,7 +161,7 @@ void setCacheKey(CacheKey& key, const cudnnBackendDescriptorType_t operation, co

 void setCacheKeyFused(CacheKeyFused& key, const Tensor& y, const Tensor& x, const Tensor& w, const Tensor& z, const Tensor& b, const float alpha, const IntArrayRef padding, const IntArrayRef stride, const IntArrayRef dilation, int64_t groups, bool deterministic, bool allow_tf32) {
  memset(&key, 0, sizeof(key));
-  setConvolutionParams(&key.params, x, w, padding, stride, dilation, groups, deterministic, allow_tf32);
+  setConvolutionParams(&key.params, x, w, padding, stride, dilation, groups, deterministic, allow_tf32, x.suggest_memory_format());
  key.x_alignment = getAlignment(x);
  key.y_alignment = getAlignment(y);
  key.w_alignment = getAlignment(w);
--- a/aten/src/ATen/native/mps/OperationUtils.h
+++ b/aten/src/ATen/native/mps/OperationUtils.h
@ -47,23 +47,26 @@ MPSDataType getMPSDataType(ScalarType scalar_type);
 MPSDataType getMPSScalarType(ScalarType scalar_type);
 std::string getMPSTypeString(ScalarType scalar_type);
 std::string getMPSShapeString(MPSShape* shape);
-std::string getTensorsStringKey(const TensorList& tensors);
+std::string getTensorsStringKey(const TensorList& tensors, bool use_scalar_value = true);
 double getMPSScalarValue(const Tensor& t);
 std::string getArrayRefString(const IntArrayRef s);
-std::string getStridedKey(const Tensor& self, const IntArrayRef sz,
-                          const IntArrayRef strides, int64_t offset);
-id<MTLBuffer> gatherViewTensor(const at::Tensor& src, id<MTLBuffer> s);
+// use has_storage() on the returned tensor to determine if src actually is a view
+Tensor gatherViewTensor(const at::Tensor& src, at::Tensor& dst);
+Tensor& scatterViewTensor(const at::Tensor& src, at::Tensor& output);

 MPSShape* getMPSShape(const Tensor& t);
 MPSShape* getMPSShape(IntArrayRef sizes);
 MPSShape* getMPSShape(c10::MaybeOwned<Tensor> t);

+static inline id<MTLBuffer> getMTLBufferStorage(const at::Tensor& tensor) {
+  return __builtin_bit_cast(id<MTLBuffer>, tensor.storage().data());
+}
+
 class Placeholder {
 public:
-  Placeholder() : _placeholder(nullptr), _value(nullptr) {}
-  Placeholder(MPSGraphTensor* mpsGraphTensor) : _placeholder(mpsGraphTensor), _value(nullptr) {}
-  Placeholder(MPSGraphTensor* mpsGraphTensor, const Tensor& self, MPSShape *mpsShape = nullptr,
-              bool check_view = true);
+  Placeholder() : _placeholder(nullptr), _value(nullptr), _tensor(Tensor()) {}
+  Placeholder(MPSGraphTensor* mpsGraphTensor) : _placeholder(mpsGraphTensor), _value(nullptr), _tensor(Tensor()) {}
+  Placeholder(MPSGraphTensor* mpsGraphTensor, const Tensor& self, MPSShape *mpsShape = nullptr);
  MPSGraphTensor* getMPSGraphTensor() {
    return _placeholder;
  }
@ -77,13 +80,14 @@ class Placeholder {
 private:
  MPSGraphTensor* _placeholder;
  MPSGraphTensorData* _value;
+  Tensor _tensor;
 };

 void resize_tensor(Tensor* output);
 MPSGraphTensor* trunc_tensor(MPSGraph* mpsGraph, MPSGraphTensor* inputTensor);
-MPSGraphTensorData *getMPSGraphTensorData(MPSGraph* mpsGraph,
-                                          MPSStream* mpsStream,
-                                          const Tensor& tensor);
+MPSGraphTensor* castMPSTensor(MPSGraph *mpsGraph, MPSGraphTensor* tensor, ScalarType toType);
+MPSGraphTensorData *getMPSGraphTensorData(MPSGraph* mpsGraph, MPSStream* mpsStream, const Tensor& tensor);
+MPSGraphTensorData* getMPSGraphTensorFromScalar(MPSStream* mpsStream, const Scalar& scalar, MPSDataType dataType);

 MPSGraph* make_mps_graph();
 void printTensorNDArray(const Tensor& t);
@ -91,12 +95,10 @@ void printTensorNDArray(const Tensor& t);
 MPSGraphTensor* mpsGraphUnrankedPlaceHolder(MPSGraph *mpsGraph, MPSDataType dataType);
 MPSGraphTensor* mpsGraphRankedPlaceHolder(MPSGraph *mpsGraph, MPSDataType dataType, MPSShape* mpsShape);
 MPSGraphTensor* mpsGraphRankedPlaceHolder(MPSGraph *mpsGraph, const Tensor& tensor);
-MPSGraphTensor* mpsGraphConstantFloatPlaceHolder(MPSGraph *mpsGraph, const double value, MPSShape* mpsShape);
-MPSGraphTensor* mpsGraphConstantPlaceHolder(MPSGraph *mpsGraph, const double value, MPSShape* mpsShape, MPSDataType dataType);

 string get_mem_format_string(c10::MemoryFormat memory_format);

-using MPSCacheKey = int64_t;
+using MPSCacheKey = uint64_t;

 // derive this class to cache a graph and its inputs/ouputs
 // can be used to store any NSObject
@ -107,16 +109,29 @@ struct MPSCachedGraph
   [_object release];
   _object = nullptr;
  }
+
+  template<typename T>
+  inline T* as() {
+    return static_cast<T*>(this);
+  }
+
  MPSGraph *graph() const { return (MPSGraph *)_object; }
  NSObject *object() const { return _object; }
 private:
  NSObject *_object = nullptr;
 };

+struct MPSUnaryCachedGraph : public MPSCachedGraph
+{
+  MPSUnaryCachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
+  MPSGraphTensor *inputTensor_ = nil;
+  MPSGraphTensor *outputTensor_ = nil;
+};
+
+
 // TODO: Improve the overall design of MPSGraphCache.
 // https://github.com/pytorch/pytorch/issues/77176
 // Cache holding various keys mapped to graphs
-
 struct MPSGraphCache
 {
  typedef MPSCachedGraph * (^CreateCachedGraphBlock)();
@ -148,7 +163,7 @@ struct MPSGraphCache
  MPSGraphCache(const MPSGraphCache&) = delete;
  void operator=(const MPSGraphCache&) = delete;

-  MPSCachedGraph* CreateCachedGraph(const std::string& key, CreateCachedGraphBlock createCacheBlock) {
+  MPSCachedGraph* CreateCachedGraph(const std::string& key, CreateCachedGraphBlock createCacheBlock, void* view_ptr = nullptr) {

    __block MPSCachedGraph * result = nil;

@ -166,6 +181,9 @@ struct MPSGraphCache
        result = createCacheBlock();
        CacheEntry entry(key, result);
        cache_.emplace(hash, entry);
+        if (view_ptr) {
+          views_list.insert(std::make_pair(view_ptr, hash));
+        }
      }
    });
    return result;
@ -187,6 +205,30 @@ struct MPSGraphCache
    });
    return result;
  }
+
+  template<typename T>
+  inline T* LookUpAs(const std::string& key) const {
+    return static_cast<T *>(LookUp(key));
+  }
+
+  void FindAndRemoveViewEntry(void* ptr) {
+    // this may find multiple view entries with the same buffer pointers
+    auto views_range = views_list.equal_range(ptr);
+    if (views_range.first == views_range.second)
+      return;
+    for (auto view_it = views_range.first; view_it != views_range.second; ++view_it) {
+      MPSCacheKey hash = view_it->second;
+      // find the cache entry associated with the hash
+      auto cache_it = cache_.find(hash);
+      if (cache_it != cache_.end()) {
+        cache_.erase(cache_it);
+        delete cache_it->second.cachedGraph_;
+      }
+    }
+    // this erase-by-key will remove all pairs in the list with the same key
+    views_list.erase(ptr);
+  }
+
 private:
  MPSGraphCache() {
    serialQueue_ = dispatch_queue_create("cache queue", DISPATCH_QUEUE_SERIAL);
@ -194,6 +236,9 @@ struct MPSGraphCache

  static MPSGraphCache* _instance_cache;
  std::unordered_map<MPSCacheKey, CacheEntry> cache_;
+  // list of buffers associated with view entries in the cache
+  // note that multiple view cache entries could use the same buffer pointer
+  std::unordered_multimap<void*, MPSCacheKey> views_list;
  dispatch_queue_t serialQueue_ = nullptr;

 };
--- a/aten/src/ATen/native/mps/OperationUtils.mm
+++ b/aten/src/ATen/native/mps/OperationUtils.mm
@ -1,6 +1,7 @@
 //  Copyright © 2022 Apple Inc.

 #include <ATen/native/mps/OperationUtils.h>
+#include <ATen/mps/MPSAllocator.h>

 namespace at {
 namespace native {
@ -25,7 +26,7 @@ MPSGeneratorImpl::MPSGeneratorImpl(DeviceIndex device_index)
 }

 const Generator& getDefaultMPSGenerator() {
-  auto gen = make_generator<MPSGeneratorImpl>(0);
+  static auto gen = make_generator<MPSGeneratorImpl>(0);
  gen.seed();
  return gen;
 }
@ -38,20 +39,15 @@ c10::intrusive_ptr<c10::TensorImpl> MPSGeneratorImpl::get_state() const {
  static const size_t total_size = seed_size + offset_size;

  auto state_tensor = at::detail::empty_cpu({(int64_t)total_size}, ScalarType::Byte, c10::nullopt, c10::nullopt, c10::nullopt, c10::nullopt);
-  auto rng_state = state_tensor.data_ptr<uint8_t>();

  return state_tensor.getIntrusivePtr();
 }

 void MPSGeneratorImpl::set_state(const c10::TensorImpl& new_state) {
  static const size_t seed_size = sizeof(uint64_t);
-  static const size_t offset_size = sizeof(int64_t);
-  static const size_t total_size = seed_size + offset_size;

  detail::check_rng_state(new_state);

-  auto new_state_size = new_state.numel();
-
  uint64_t input_seed;
  auto new_rng_state = new_state.data<uint8_t>();
  memcpy(&input_seed, new_rng_state, seed_size);
@ -64,22 +60,11 @@ MPSGeneratorImpl* MPSGeneratorImpl::clone_impl() const {
  return gen;
 }

-std::string getStridedKey(const Tensor& self, const IntArrayRef sz,
-                          const IntArrayRef strides, int64_t offset) {
-  // TODO: move storage_offset to a PlaceholderTensor and strides to a
-  // tensor too, to avoid too many cache entries.
-  return std::to_string((uintptr_t)self.storage().data()) +
-              ":" + mps::getArrayRefString(sz) +
-              ":" + mps::getArrayRefString(strides) +
-              ":" + std::to_string(offset);
-}
-
 void runMPSGraph(
    MPSStream* mpsStream,
    MPSGraph* mpsGraph,
    NSDictionary* feeds,
    NSDictionary* results) {
-
  dispatch_sync(mpsStream->queue(), ^() {
    @autoreleasepool {
      mpsStream->commit(true);
@ -113,12 +98,17 @@ MPSDataType getMPSDataType(ScalarType scalar_type) {
      return MPSDataTypeInt64;
    case ScalarType::Short:
      return MPSDataTypeInt16;
-    case ScalarType::Byte:
+    case ScalarType::Char:
      return MPSDataTypeInt8;
+    case ScalarType::Byte:
+      return MPSDataTypeUInt8;
    case ScalarType::Bool:
      return MPSDataTypeBool;
+    case ScalarType::Double:
+      TORCH_CHECK_TYPE(false, "Cannot convert a float64 Tensor to MPS as the MPS framework doesn't support float64. "
+                       "Please use float32 instead.")
    default:
-      TORCH_CHECK_TYPE(false, "Trying to convert ", scalar_type, " to the MPS backend but there is no mapping for it.")
+      TORCH_CHECK_TYPE(false, "Trying to convert ", scalar_type, " to the MPS backend but it does not have support for that dtype.")
  }
 }

@ -137,12 +127,14 @@ MPSDataType getMPSScalarType(ScalarType scalar_type) {
      return MPSDataTypeInt64;
    case ScalarType::Short:
      return MPSDataTypeInt16;
-    case ScalarType::Byte:
+    case ScalarType::Char:
      return MPSDataTypeInt8;
+    case ScalarType::Byte:
+      return MPSDataTypeUInt8;
    case ScalarType::Bool:
      return MPSDataTypeBool;
    default:
-      TORCH_INTERNAL_ASSERT(false, "Trying to convert ", scalar_type, " to the MPS backend but there is no mapping for it.")
+      TORCH_CHECK_TYPE(false, "Trying to convert ", scalar_type, " to the MPS backend but it does not have support for that dtype.")
  }
 }

@ -182,7 +174,7 @@ std::string getArrayRefString(const IntArrayRef s) {
  return ss.str();
 }

-std::string getTensorsStringKey(const TensorList& tensors) {
+std::string getTensorsStringKey(const TensorList& tensors, bool use_scalar_value) {
    std::string str;
    // The key format per tensor would look like ":MPSDataTypeFloat32[1,1,1,10]:"
    for (const Tensor& tensor: tensors) {
@ -191,7 +183,7 @@ std::string getTensorsStringKey(const TensorList& tensors) {
        str += getMPSTypeString(tensor.scalar_type()) + "[";
        // if tensor is a scalar
        if (tensor.dim() == 0) {
-          str += std::to_string(getMPSScalarValue(tensor));
+          str += (use_scalar_value ? std::to_string(getMPSScalarValue(tensor)) : "Scalar");
        } else {
          const NSString* ns_shape_key = [[getMPSShape(tensor) valueForKey:@"description"] componentsJoinedByString:@","];
          str += std::string(ns_shape_key.UTF8String);
@ -249,107 +241,48 @@ MPSShape* getMPSShape(IntArrayRef sizes) {

 void printTensorNDArray(const Tensor& t) {
  if (!t.is_mps()) return;
-  if(t.numel() == 0)
-  {
-      std::cout << "Empty tensor" << std::endl;
-      return;
-  }
+  if(t.numel() == 0) return;
  // Get shape and data type
  auto selfShape = getMPSShape(t);
  auto selfDType = getMPSDataType(t.scalar_type());

  // Initialize data
-  id<MTLBuffer> selfBuf = __builtin_bit_cast(id<MTLBuffer>, t.storage().data());
-  MPSGraphTensorData* tdata = [[MPSGraphTensorData alloc] initWithMTLBuffer:selfBuf
+  id<MTLBuffer> selfBuf = getMTLBufferStorage(t);
+  MPSGraphTensorData* tdata = [[[MPSGraphTensorData alloc] initWithMTLBuffer:selfBuf
                                                            shape:selfShape
-                                                         dataType:selfDType];
+                                                         dataType:selfDType] autorelease];
  [tdata printNDArray];
 }

-id<MTLBuffer> gatherViewTensor(const at::Tensor& src, id<MTLBuffer> sourceBuffer) {
-  assert (!src.is_contiguous());
-  id<MTLDevice> device = MPSDevice::getInstance()->device();
-  MPSStream* stream = getCurrentMPSStream();
-  @autoreleasepool {
-    struct CachedGraph : public MPSCachedGraph
-    {
-      CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
-      MPSGraphTensor* inputTensor_ = nil;
-      MPSGraphTensor* outputTensor_ = nil;
-      IntArrayRef size_;
-      IntArrayRef stride_;
-      int64_t storage_offset_;
-    };
-
-    MPSGraphCache* cache_ = MPSGraphCache::getInstance();
-    string key = getStridedKey(src, src.sizes(), src.strides(), src.storage_offset());
-    CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
-    if (cachedGraph) {
-      @autoreleasepool {
-        MPSGraphTensor* inputTensor = cachedGraph->inputTensor_;
-        auto output = at::native::empty_mps(
-                        src.sizes(),
-                        src.scalar_type(),
-                        c10::nullopt,
-                        kMPS,
-                        c10::nullopt,
-                        c10::nullopt);
-        MPSGraphTensorData* inputTensorData = [[MPSGraphTensorData alloc] initWithMTLBuffer: sourceBuffer
-                                                                            shape: [inputTensor shape]
-                                                                            dataType: [inputTensor dataType]];
-        id<MTLBuffer> resultBuffer = __builtin_bit_cast(id<MTLBuffer>, output.storage().data());
-        MPSGraphTensorData* outputTensorData = [[MPSGraphTensorData alloc] initWithMTLBuffer: resultBuffer
-                                                                            shape: getMPSShape(src.sizes())
-                                                                            dataType: getMPSDataType(src.scalar_type())];
-        NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* feeds = @{
-          inputTensor : inputTensorData
-        };
-
-        NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* results = @{
-          cachedGraph->outputTensor_ : outputTensorData
-        };
-
-        runMPSGraph(stream, cachedGraph->graph(), feeds, results);
-        return resultBuffer;
-      }
-    }
-  }
-  return nil;
-}
-
-Placeholder::Placeholder(MPSGraphTensor* mpsGraphTensor, const Tensor& src,
-                         MPSShape *mpsShape, bool check_view)
+Placeholder::Placeholder(MPSGraphTensor* mpsGraphTensor, const Tensor& src, MPSShape *mpsShape) : _tensor(src)
 {
-  Tensor src_ = src;
-  TORCH_CHECK(src_.is_mps(), "Placeholder storage has not been allocated on MPS device!");
-    // extract the pointer to MTLBuffer from the Tensor's storage
-  id<MTLBuffer> srcBuf = __builtin_bit_cast(id<MTLBuffer>, src.storage().data());
-  if (check_view && !src.is_contiguous()) {
-    id<MTLBuffer> gatherTensor = gatherViewTensor(src, srcBuf);
-    if (gatherTensor) {
-      srcBuf = gatherTensor;
-    } else {
-      src_ = src.contiguous();
-      srcBuf = __builtin_bit_cast(id<MTLBuffer>, src_.storage().data());
+  TORCH_CHECK(src.is_mps(), "Placeholder storage has not been allocated on MPS device!");
+  // extract the pointer to MTLBuffer from the Tensor's storage
+  id<MTLBuffer> srcBuf = getMTLBufferStorage(src);
+  // a view tensor could be contiguous (e.g., slice ops) or non-contiguous (e.g., transpose())
+  if (src.is_view() || !src.is_contiguous()) {
+     Tensor emptyShell = Tensor();
+    // use "_tensor" from Placeholder to retain view's output during its usage in other ops
+    _tensor = gatherViewTensor(src, emptyShell);
+    if (!_tensor.has_storage()) {
+      // if we cannot gather, we make the the tensor contiguous implicitly, and keep
+      // it in placeholder to be able to retrieve it when we return from constructor
+      _tensor = src.contiguous();
    }
+    srcBuf = getMTLBufferStorage(_tensor);
  }
-  const size_t buf_size = [srcBuf length];
-
  // tensor.numel() could be zero, but tensor is valid as long as the buffer size is non-zero.
-  // if buf_size is zero in here, it's not a user error. It could be a missing check for
+  // if buffer size is zero in here, it's not a user error. It could be a missing check for
  // tensor.numel() == 0 in our internal implementations of ops.
-  TORCH_INTERNAL_ASSERT(buf_size > 0, "Placeholder tensor is empty!");
+  TORCH_INTERNAL_ASSERT([srcBuf length] > 0, "Placeholder tensor is empty!");

-  TORCH_CHECK(src_.storage().nbytes() <= buf_size, "Placeholder buffer size (", buf_size,
-      ") is not large enough to contain the Tensor storage of size ", src_.storage().nbytes());
-
-  const MPSDataType mpsDataType = getMPSDataType(src_.scalar_type());
+  const MPSDataType mpsDataType = _tensor.dim() == 0 ? getMPSScalarType(_tensor.scalar_type()) : getMPSDataType(_tensor.scalar_type());
  if (!mpsShape)
-    mpsShape = getMPSShape(src_);
+    mpsShape = getMPSShape(_tensor);

-  _value = [[MPSGraphTensorData alloc] initWithMTLBuffer:srcBuf
-                                                   shape:mpsShape
-                                                dataType:mpsDataType];
+  _value = [[[MPSGraphTensorData alloc] initWithMTLBuffer:srcBuf
+                                                    shape:mpsShape
+                                                 dataType:mpsDataType] autorelease];
  TORCH_INTERNAL_ASSERT(_value);
  _placeholder = mpsGraphTensor;
 }
@ -362,7 +295,7 @@ MPSGraphTensorData *getMPSGraphTensorData(MPSGraph* mpsGraph,

  MPSGraphTensorData *result = nil;
  if (tensor.numel() > 0) {
-    id<MTLBuffer> buf = __builtin_bit_cast(id<MTLBuffer>, tensor.storage().data());
+    id<MTLBuffer> buf = getMTLBufferStorage(tensor);
    result = [[[MPSGraphTensorData alloc] initWithMTLBuffer:buf
                                                    shape:mpsShape
                                                 dataType:dataType]
@ -379,6 +312,46 @@ MPSGraphTensorData *getMPSGraphTensorData(MPSGraph* mpsGraph,
  return result;
 }

+MPSGraphTensorData* getMPSGraphTensorFromScalar(MPSStream* mpsStream, const Scalar& scalar, MPSDataType dataType) {
+  union {
+    float f; // MPS doesn't support 'double'
+    at::Half h;
+    int64_t i;
+    bool b;
+  } v;
+  switch (dataType) {
+    case MPSDataTypeFloat32:
+      v.f = scalar.to<float>();
+      break;
+    case MPSDataTypeFloat16:
+      v.h = scalar.to<at::Half>();
+      break;
+    case MPSDataTypeInt64:
+      v.i = scalar.to<int64_t>();
+      break;
+    case MPSDataTypeInt32:
+      v.i = scalar.to<int32_t>();
+      break;
+    case MPSDataTypeInt16:
+      v.i = scalar.to<int16_t>();
+      break;
+    case MPSDataTypeInt8:
+      v.i = scalar.to<int8_t>();
+      break;
+    case MPSDataTypeBool:
+      v.b = scalar.to<bool>();
+      break;
+    default:
+      TORCH_INTERNAL_ASSERT(false, "Unsupported scalar type on MPS backend.")
+  }
+
+  MPSNDArrayDescriptor *tensorDesc = [MPSNDArrayDescriptor descriptorWithDataType:dataType shape:@[@1]];
+  MPSNDArray *tensorNDArray = [[[MPSNDArray alloc] initWithDevice:mpsStream->device() descriptor:tensorDesc] autorelease];
+  [tensorNDArray writeBytes:&v strideBytes:nil];
+  MPSGraphTensorData* result = [[[MPSGraphTensorData alloc] initWithMPSNDArray:tensorNDArray] autorelease];
+  return result;
+}
+
 void resize_tensor(Tensor* output) {
  output->resize_(output->sizes());
 }
@ -389,24 +362,6 @@ MPSGraph* make_mps_graph() {
  return mpsGraph;
 }

-MPSGraphTensor* mpsGraphConstantFloatPlaceHolder(MPSGraph *mpsGraph, const double value, MPSShape* mpsShape) {
-  // "value" is always double, so is the Placeholder's type (we only support Float32).
-  return [mpsGraph constantWithScalar:value
-                                shape:mpsShape
-                             dataType:MPSDataTypeFloat32];
-}
-
-MPSGraphTensor* mpsGraphConstantPlaceHolder(MPSGraph *mpsGraph, const double value, MPSShape* mpsShape, MPSDataType dataType) {
-  // Bool is not handled by constantWithScalar
-  MPSGraphTensor* constPlaceHolder = [mpsGraph constantWithScalar:value
-                                                            shape:mpsShape
-                                                         dataType:(dataType == MPSDataTypeBool ? MPSDataTypeFloat32 : dataType)];
-  if (dataType == MPSDataTypeBool)
-    return [mpsGraph castTensor:constPlaceHolder toType:MPSDataTypeBool name:@"ConstantBoolTensor"];
-
-  return constPlaceHolder;
-}
-
 MPSGraphTensor* mpsGraphUnrankedPlaceHolder(MPSGraph *mpsGraph, MPSDataType dataType) {
  return [mpsGraph placeholderWithShape:nil
                               dataType:dataType
@ -421,10 +376,15 @@ MPSGraphTensor* mpsGraphRankedPlaceHolder(MPSGraph *mpsGraph, MPSDataType dataTy

 MPSGraphTensor* mpsGraphRankedPlaceHolder(MPSGraph *mpsGraph, const Tensor& tensor) {
    return [mpsGraph placeholderWithShape:getMPSShape(tensor)
-                                 dataType:getMPSDataType(tensor.scalar_type())
+                                 dataType:getMPSScalarType(tensor.scalar_type())
                                     name:nil];
 }

+// this is meant to suppress the availability warning on castTensor
+// we pass ScalarType instead of MPSDataType to handle MPSDataTypeBoolean's availability too
+MPSGraphTensor* castMPSTensor(MPSGraph *mpsGraph, MPSGraphTensor* tensor, ScalarType toType) {
+  return [mpsGraph castTensor:tensor toType:getMPSScalarType(toType) name:@"castTensor"];
+}

 string get_mem_format_string(c10::MemoryFormat memory_format) {
  string mem_format_key;
@ -444,6 +404,17 @@ string get_mem_format_string(c10::MemoryFormat memory_format) {

 MPSGraphCache* MPSGraphCache::_instance_cache = nullptr;

+class MPSGraphCacheCallback : public IMpsAllocatorCallback {
+public:
+  MPSGraphCacheCallback() : graph_cache(MPSGraphCache::getInstance()) { }
+
+  void executeMPSAllocatorCallback(void* ptr, EventType event) override { }
+private:
+  MPSGraphCache* graph_cache;
+};
+
+REGISTER_MPS_ALLOCATOR_CALLBACK("mps_graph_cache_callback", MPSGraphCacheCallback);
+
 } // namespace mps
 } // namespace native
 } // namespace at
--- a/aten/src/ATen/native/mps/operations/Activation.mm
+++ b/aten/src/ATen/native/mps/operations/Activation.mm
@ -18,24 +18,18 @@ namespace native {

 Tensor relu_mps(const Tensor& self) {
  using namespace mps;
+  using CachedGraph = MPSUnaryCachedGraph;
  Tensor output = at::empty_like(self);
  resize_tensor(&output);
  TORCH_CHECK(output.is_mps());

-  struct CachedGraph : public MPSCachedGraph
-  {
-    CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
-    MPSGraphTensor *inputTensor_ = nil;
-    MPSGraphTensor *outputTensor_ = nil;
-  };
-
  MPSGraphCache* cache_ = MPSGraphCache::getInstance();

  MPSStream* stream = getCurrentMPSStream();

  @autoreleasepool {
    string key = "relu" + getTensorsStringKey({self});
-    CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+    CachedGraph* cachedGraph = cache_->LookUpAs<CachedGraph>(key);
    if(!cachedGraph) {
      MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ MPSCachedGraph * () {

@ -79,24 +73,18 @@ Tensor relu_mps(const Tensor& self) {

 Tensor & relu_mps_(Tensor & self) {
  using namespace mps;
+  using CachedGraph = MPSUnaryCachedGraph;
  // Inplace relu
  Tensor &output = self;
  TORCH_CHECK(output.is_mps());

-  struct CachedGraph : public MPSCachedGraph
-  {
-    CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
-    MPSGraphTensor *inputTensor_ = nil;
-    MPSGraphTensor *outputTensor_ = nil;
-  };
-
  MPSGraphCache* cache_ = MPSGraphCache::getInstance();

  MPSStream* stream = getCurrentMPSStream();

  @autoreleasepool {
    string key = "relu_" + getTensorsStringKey({self});
-    CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+    CachedGraph* cachedGraph = cache_->LookUpAs<CachedGraph>(key);
    if(!cachedGraph) {
      MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ MPSCachedGraph * () {

@ -141,15 +129,9 @@ Tensor & relu_mps_(Tensor & self) {
 TORCH_IMPL_FUNC(leaky_relu_out_mps) (
  const Tensor& self, const Scalar& negative_slope, const Tensor& output) {
  using namespace mps;
+  using CachedGraph = MPSUnaryCachedGraph;
  TORCH_CHECK(output.is_mps());

-  struct CachedGraph : public MPSCachedGraph
-  {
-    CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
-    MPSGraphTensor *inputTensor_ = nil;
-    MPSGraphTensor *outputTensor_ = nil;
-  };
-
  MPSGraphCache* cache_ = MPSGraphCache::getInstance();

  MPSStream *stream = getCurrentMPSStream();
@ -157,7 +139,7 @@ TORCH_IMPL_FUNC(leaky_relu_out_mps) (
  @autoreleasepool {

    string key = "leaky_relu" + getTensorsStringKey({self}) + ":" + to_string(negative_slope.to<double>());
-    CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+    CachedGraph* cachedGraph = cache_->LookUpAs<CachedGraph>(key);

    if(!cachedGraph) {
      MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ MPSCachedGraph * () {
@ -185,7 +167,7 @@ TORCH_IMPL_FUNC(leaky_relu_out_mps) (
        }
        return newCachedGraph;
      });
-      cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
+      cachedGraph = tmpCachedGraph->as<CachedGraph>();
    }

    Placeholder selfPlaceholder = Placeholder(cachedGraph->inputTensor_, self);
@ -296,25 +278,19 @@ TORCH_IMPL_FUNC(log_softmax_mps_out) (
  const bool half_to_float,
  const Tensor &out) {
  using namespace mps;
+  using CachedGraph = MPSUnaryCachedGraph;

  if (self.numel() == 0) {
    return;
  }

-  struct CachedGraph : public MPSCachedGraph
-  {
-    CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
-    MPSGraphTensor* inputTensor_ = nil;
-    MPSGraphTensor* outputTensor_ = nil;
-  };
-
  MPSGraphCache* cache_ = MPSGraphCache::getInstance();

  MPSStream* stream = at::mps::getCurrentMPSStream();

  @autoreleasepool {
    string key = "log_softmax_mps_out" + getTensorsStringKey({self}) + ":" + to_string(dim);
-    CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+    CachedGraph* cachedGraph = cache_->LookUpAs<CachedGraph>(key);

    if(!cachedGraph) {
      MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ MPSCachedGraph * () {
@ -438,22 +414,16 @@ TORCH_IMPL_FUNC(sigmoid_out_mps)(
  const Tensor& self,
  const Tensor& output) {
  using namespace mps;
+  using CachedGraph = MPSUnaryCachedGraph;
  TORCH_CHECK(output.is_mps());

-  struct CachedGraph : public MPSCachedGraph
-  {
-    CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
-    MPSGraphTensor *inputTensor_ = nil;
-    MPSGraphTensor *outputTensor_ = nil;
-  };
-
  MPSGraphCache* cache_ = MPSGraphCache::getInstance();

  MPSStream* stream = getCurrentMPSStream();

  @autoreleasepool {
    string key = "sigmoid_out_mps" + getTensorsStringKey({self});
-    CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+    CachedGraph* cachedGraph = cache_->LookUpAs<CachedGraph>(key);
    if(!cachedGraph) {
      MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ MPSCachedGraph * () {

@ -651,15 +621,9 @@ TORCH_IMPL_FUNC(threshold_out_mps)(
  const Scalar& value,
  const Tensor& result) {
  using namespace mps;
+  using CachedGraph = MPSUnaryCachedGraph;
  TORCH_CHECK(self.is_mps());

-  struct CachedGraph : public MPSCachedGraph
-  {
-    CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
-    MPSGraphTensor *inputTensor_ = nil;
-    MPSGraphTensor *outputTensor_ = nil;
-  };
-
  MPSGraphCache* cache_ = MPSGraphCache::getInstance();

  MPSStream* stream = getCurrentMPSStream();
@ -669,7 +633,7 @@ TORCH_IMPL_FUNC(threshold_out_mps)(
                                       to_string(threshold.to<double>()) + ":" +
                                       to_string(value.to<double>());

-    CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+    CachedGraph* cachedGraph = cache_->LookUpAs<CachedGraph>(key);
    if(!cachedGraph) {
      MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ MPSCachedGraph * () {

@ -871,7 +835,7 @@ TORCH_IMPL_FUNC(gelu_out_mps) (
                                                                  getMPSDataType(self.scalar_type()),
                                                                  getMPSShape(self));

-          MPSGraphTensor* outputTensor =  normcdf(mpsGraph, inputTensor);
+          MPSGraphTensor* outputTensor = normcdf(mpsGraph, inputTensor);
          outputTensor = [mpsGraph multiplicationWithPrimaryTensor:outputTensor
                                                   secondaryTensor:inputTensor
                                                              name:nil];
@ -1266,6 +1230,409 @@ TORCH_IMPL_FUNC(elu_backward_out_mps) (

 }

+TORCH_IMPL_FUNC(glu_out_mps) (
+    const Tensor& self, const int64_t dim, const Tensor& output
+  ) {
+  using namespace mps;
+  TORCH_CHECK(output.is_mps());
+
+  // Empty output
+  if(output.numel() == 0)
+    return;
+
+  // this can't pass anyway because a 0-dimensional tensor has "size" 1, which
+  // can't be evenly halved, but give a nicer error message here.
+  TORCH_CHECK(self.dim() > 0, "glu does not support 0-dimensional tensors");
+  auto wrap_dim = maybe_wrap_dim(dim, self.dim());
+  const int64_t nIn = self.size(wrap_dim);
+  TORCH_CHECK(nIn % 2 == 0, "Halving dimension must be even, but dimension ",
+              wrap_dim, " is size ", nIn);
+
+  struct CachedGraph : public MPSCachedGraph
+  {
+    CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
+    MPSGraphTensor *inputTensor_ = nil;
+    MPSGraphTensor *outputTensor_ = nil;
+  };
+
+  MPSGraphCache* cache_ = MPSGraphCache::getInstance();
+
+  MPSStream* stream = getCurrentMPSStream();
+
+  @autoreleasepool {
+    string key = "glu_out_mps" + getTensorsStringKey({self}) + ":" + to_string(dim);;
+    CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+    if(!cachedGraph) {
+      MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ MPSCachedGraph * () {
+
+        CachedGraph *newCachedGraph = nil;
+
+        @autoreleasepool {
+          MPSGraph* mpsGraph = make_mps_graph();
+          newCachedGraph = new CachedGraph(mpsGraph);
+
+          MPSGraphTensor* inputTensor = mpsGraphRankedPlaceHolder(mpsGraph,
+                                                                  getMPSDataType(self.scalar_type()),
+                                                                  getMPSShape(self));
+          NSArray<MPSGraphTensor *> * outputTensorsArray = [mpsGraph splitTensor:inputTensor
+                                                                       numSplits:2
+                                                                            axis:wrap_dim
+                                                                            name:nil];
+          MPSGraphTensor* firstHalf = outputTensorsArray[0];
+          MPSGraphTensor* secondHalf = [mpsGraph sigmoidWithTensor:outputTensorsArray[1]
+                                              name:nil];
+
+          MPSGraphTensor* outputTensor = [mpsGraph multiplicationWithPrimaryTensor:firstHalf
+                                                   secondaryTensor:secondHalf
+                                                              name:nil];
+          newCachedGraph->inputTensor_ = inputTensor;
+          newCachedGraph->outputTensor_ = outputTensor;
+        }
+        return newCachedGraph;
+      });
+      cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
+    }
+
+    Placeholder selfPlaceholder   = Placeholder(cachedGraph->inputTensor_, self);
+    Placeholder outputPlaceholder = Placeholder(cachedGraph->outputTensor_, output);
+
+    // Create dictionary of inputs and outputs
+    NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* feeds = @{
+      selfPlaceholder.getMPSGraphTensor() : selfPlaceholder.getMPSGraphTensorData()
+    };
+
+    NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* results = @{
+      outputPlaceholder.getMPSGraphTensor() : outputPlaceholder.getMPSGraphTensorData()
+    };
+    runMPSGraph(stream, cachedGraph->graph(), feeds, results);
+
+  }
+
+}
+
+Tensor& glu_backward_mps_out (
+    const Tensor& grad_output, const Tensor& self, const int64_t dim, Tensor& grad_input
+  ) {
+  using namespace mps;
+
+  // Empty output
+  if(grad_input.numel() == 0)
+    return grad_input;
+
+  // this can't pass anyway because a 0-dimensional tensor has "size" 1, which
+  // can't be evenly halved, but give a nicer error message here.
+  TORCH_CHECK(self.dim() > 0, "glu does not support 0-dimensional tensors");
+  auto wrap_dim = maybe_wrap_dim(dim, self.dim());
+  const int64_t nIn = self.size(wrap_dim);
+  TORCH_CHECK(nIn % 2 == 0, "Halving dimension must be even, but dimension ",
+              wrap_dim, " is size ", nIn);
+
+  struct CachedGraph : public MPSCachedGraph
+  {
+    CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
+    MPSGraphTensor *gradOutputTensor_ = nil;
+    MPSGraphTensor *inputTensor_ = nil;
+    MPSGraphTensor *gradInputTensor_ = nil;
+  };
+
+  MPSGraphCache* cache_ = MPSGraphCache::getInstance();
+
+  MPSStream* stream = getCurrentMPSStream();
+
+  @autoreleasepool {
+    string key = "glu_backward_mps_out" + getTensorsStringKey({grad_output, self}) + ":" + to_string(dim);
+    CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+    if(!cachedGraph) {
+      MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ MPSCachedGraph * () {
+
+        CachedGraph *newCachedGraph = nil;
+
+        @autoreleasepool {
+          MPSGraph* mpsGraph = make_mps_graph();
+          newCachedGraph = new CachedGraph(mpsGraph);
+
+          MPSGraphTensor* inputTensor = mpsGraphRankedPlaceHolder(mpsGraph,
+                                                                  getMPSDataType(self.scalar_type()),
+                                                                  getMPSShape(self));
+          MPSGraphTensor* gradOutputTensor = mpsGraphRankedPlaceHolder(mpsGraph,
+                                                                  getMPSDataType(grad_output.scalar_type()),
+                                                                  getMPSShape(grad_output));
+          NSArray<MPSGraphTensor *> * inputTensorsArray = [mpsGraph splitTensor:inputTensor
+                                                                      numSplits:2
+                                                                           axis:wrap_dim
+                                                                           name:nil];
+
+          // first half
+          MPSGraphTensor* sigmoidOutputTensor = [mpsGraph sigmoidWithTensor:inputTensorsArray[1]
+                                                                         name:nil];
+          MPSGraphTensor* firstHalfOutputTensor = [mpsGraph multiplicationWithPrimaryTensor : sigmoidOutputTensor
+                                                            secondaryTensor : gradOutputTensor
+                                                                       name : nil];
+
+          // second half
+          MPSGraphTensor* one_val = [mpsGraph constantWithScalar:1.0
+                                                           shape:@[@1]
+                                                        dataType:getMPSDataType(self.scalar_type())];
+
+          MPSGraphTensor* secondHalfOutputTensor = [mpsGraph subtractionWithPrimaryTensor : one_val
+                                                                secondaryTensor : sigmoidOutputTensor
+                                                                           name : nil];
+          secondHalfOutputTensor = [mpsGraph multiplicationWithPrimaryTensor : secondHalfOutputTensor
+                                                                   secondaryTensor : sigmoidOutputTensor
+                                                                              name : nil];
+          secondHalfOutputTensor = [mpsGraph multiplicationWithPrimaryTensor : secondHalfOutputTensor
+                                                                   secondaryTensor : inputTensorsArray[0]
+                                                                              name : nil];
+          secondHalfOutputTensor = [mpsGraph multiplicationWithPrimaryTensor : secondHalfOutputTensor
+                                                                   secondaryTensor : gradOutputTensor
+                                                                              name : nil];
+
+          MPSGraphTensor* outputTensor = [mpsGraph concatTensor : firstHalfOutputTensor
+                                                     withTensor : secondHalfOutputTensor
+                                                      dimension : wrap_dim
+                                                           name : nil];
+          newCachedGraph->gradInputTensor_ = outputTensor;
+          newCachedGraph->inputTensor_ = inputTensor;
+          newCachedGraph->gradOutputTensor_ = gradOutputTensor;
+        }
+        return newCachedGraph;
+      });
+      cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
+    }
+
+    Placeholder gradInputPlaceholder   = Placeholder(cachedGraph->gradInputTensor_, grad_input);
+    Placeholder selfPlaceholder   = Placeholder(cachedGraph->inputTensor_, self);
+    Placeholder gradOutputPlaceholder = Placeholder(cachedGraph->gradOutputTensor_, grad_output);
+
+    // Create dictionary of inputs and outputs
+    NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* feeds = @{
+      selfPlaceholder.getMPSGraphTensor() : selfPlaceholder.getMPSGraphTensorData(),
+      gradOutputPlaceholder.getMPSGraphTensor() : gradOutputPlaceholder.getMPSGraphTensorData()
+    };
+
+    NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* results = @{
+      gradInputPlaceholder.getMPSGraphTensor() : gradInputPlaceholder.getMPSGraphTensorData(),
+    };
+    runMPSGraph(stream, cachedGraph->graph(), feeds, results);
+
+  }
+  return grad_input;
+
+}
+
+Tensor glu_backward_mps (const Tensor& grad_output,
+   const Tensor& self,
+   const int64_t dim) {
+
+  Tensor grad_input = at::native::empty_mps(
+                      self.sizes(),
+                      self.scalar_type(),
+                      c10::nullopt,
+                      kMPS,
+                      c10::nullopt,
+                      c10::nullopt);
+  grad_input = glu_backward_mps_out(grad_output, self, dim, grad_input);
+  return grad_input;
+}
+
+
+TORCH_IMPL_FUNC(softplus_out_mps) (
+  const Tensor& self,
+  const Scalar& beta,
+  const Scalar& threshold,
+  const Tensor& result) {
+      using namespace mps;
+      TORCH_CHECK(self.is_mps());
+      // Applies the Softplus function :math:`\text{Softplus}(x) = \frac{1}{\beta} *
+      // \log(1 + \exp(\beta * x))` element-wise.
+      // For numerical stability the implementation reverts to the linear function
+      // when :math:`input \times \beta > threshold`.
+
+      // Empty output
+      if(result.numel() == 0)
+        return;
+
+      struct CachedGraph : public MPSCachedGraph
+      {
+        CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
+        MPSGraphTensor *inputTensor_ = nil;
+        MPSGraphTensor *outputTensor_ = nil;
+      };
+
+      MPSGraphCache* cache_ = MPSGraphCache::getInstance();
+
+      MPSStream* stream = getCurrentMPSStream();
+
+      @autoreleasepool {
+        string key = "softplus_out_mps:" + getTensorsStringKey({self});
+
+        CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+        if(!cachedGraph) {
+          MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ MPSCachedGraph * () {
+
+            CachedGraph *newCachedGraph = nil;
+
+            @autoreleasepool {
+              MPSGraph* mpsGraph = make_mps_graph();
+              newCachedGraph = new CachedGraph(mpsGraph);
+              MPSGraphTensor *inputTensor = mpsGraphRankedPlaceHolder(mpsGraph, self);
+
+              MPSGraphTensor *reluTensor = [mpsGraph reLUWithTensor:inputTensor
+                                                               name:nil];
+
+              MPSGraphTensor* unitTensor = [mpsGraph constantWithScalar:1.0
+                                                                  shape:@[@1]
+                                                               dataType:getMPSDataType(self.scalar_type())];
+
+              MPSGraphTensor* betaTensor = [mpsGraph constantWithScalar:beta.to<double>()
+                                                                  shape:@[@1]
+                                                               dataType:getMPSDataType(self.scalar_type())];
+              MPSGraphTensor* reciprocalBetaTensor = [mpsGraph reciprocalWithTensor:betaTensor
+                                                                             name:nil];
+              MPSGraphTensor* bxTensor = [mpsGraph multiplicationWithPrimaryTensor:inputTensor
+                                                                  secondaryTensor:betaTensor
+                                                                  name:nil];
+              MPSGraphTensor* thresholdTensor = [mpsGraph constantWithScalar:threshold.to<double>()
+                                                                       shape:@[@1]
+                                                               dataType:getMPSDataType(self.scalar_type())];
+              MPSGraphTensor* predicateTensor = [mpsGraph greaterThanWithPrimaryTensor:bxTensor
+                                                                       secondaryTensor:thresholdTensor
+                                                                                  name:nil];
+              MPSGraphTensor* expTensor = [mpsGraph exponentWithTensor:bxTensor
+                                                                  name:nil];
+              MPSGraphTensor* expPlusOneTensor = [mpsGraph additionWithPrimaryTensor:expTensor
+                                                                     secondaryTensor:unitTensor
+                                                                                name:nil];
+
+              MPSGraphTensor* logTensor = [mpsGraph logarithmWithTensor:expPlusOneTensor
+                                                                   name:nil];
+
+              MPSGraphTensor* softplusTensor = [mpsGraph multiplicationWithPrimaryTensor:logTensor
+                                                                       secondaryTensor:reciprocalBetaTensor
+                                                                            name:nil];
+              MPSGraphTensor* outputTensor = [mpsGraph selectWithPredicateTensor:predicateTensor
+                                                             truePredicateTensor:reluTensor
+                                                            falsePredicateTensor:softplusTensor
+                                                                            name:nil];
+
+              newCachedGraph->inputTensor_ = inputTensor;
+                newCachedGraph->outputTensor_ = outputTensor;
+            }
+            return newCachedGraph;
+          });
+          cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
+        }
+        Placeholder selfPlaceholder = Placeholder(cachedGraph->inputTensor_, self);
+        Placeholder outputPlaceholder = Placeholder(cachedGraph->outputTensor_, result);
+
+        // Create dictionary of inputs and outputs
+        NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* feeds = @{
+          selfPlaceholder.getMPSGraphTensor() : selfPlaceholder.getMPSGraphTensorData()
+        };
+        NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* results = @{
+          outputPlaceholder.getMPSGraphTensor() : outputPlaceholder.getMPSGraphTensorData()
+        };
+        runMPSGraph(stream, cachedGraph->graph(), feeds, results);
+      }
+}
+
+TORCH_IMPL_FUNC(softplus_backward_out_mps) (
+  const Tensor& grad_output,
+  const Tensor& self,
+  const Scalar& beta,
+  const Scalar& threshold,
+  const Tensor& grad_input
+) {
+      using namespace mps;
+      TORCH_CHECK(self.is_mps());
+
+      // Empty output
+      if(grad_input.numel() == 0)
+        return;
+
+      struct CachedGraph : public MPSCachedGraph
+      {
+        CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
+        MPSGraphTensor *gradOutputTensor_ = nil;
+        MPSGraphTensor *inputTensor_ = nil;
+        MPSGraphTensor *outputTensor_ = nil;
+      };
+
+      MPSGraphCache* cache_ = MPSGraphCache::getInstance();
+
+      MPSStream* stream = getCurrentMPSStream();
+
+      @autoreleasepool {
+        string key = "softplus_backward_out_mps:" + getTensorsStringKey({grad_output, self});
+
+        CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+        if(!cachedGraph) {
+          MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ MPSCachedGraph * () {
+
+            CachedGraph *newCachedGraph = nil;
+
+            @autoreleasepool {
+              MPSGraph* mpsGraph = make_mps_graph();
+              newCachedGraph = new CachedGraph(mpsGraph);
+              MPSGraphTensor *gradOutputTensor = mpsGraphRankedPlaceHolder(mpsGraph, grad_output);
+
+              MPSGraphTensor *inputTensor = mpsGraphRankedPlaceHolder(mpsGraph, self);
+
+              MPSGraphTensor* unitTensor = [mpsGraph constantWithScalar:1.0
+                                                                  shape:@[@1]
+                                                               dataType:getMPSDataType(self.scalar_type())];
+              MPSGraphTensor* betaTensor = [mpsGraph constantWithScalar:beta.to<double>()
+                                                                  shape:@[@1]
+                                                               dataType:getMPSDataType(self.scalar_type())];
+              MPSGraphTensor* bxTensor = [mpsGraph multiplicationWithPrimaryTensor:inputTensor
+                                                                  secondaryTensor:betaTensor
+                                                                  name:nil];
+              MPSGraphTensor* expBxTensor = [mpsGraph exponentWithTensor:bxTensor
+                                                                  name:nil];
+              MPSGraphTensor* unitExpBxTensor = [mpsGraph additionWithPrimaryTensor:expBxTensor
+                                                                    secondaryTensor:unitTensor
+                                                                               name:nil];
+              MPSGraphTensor* rTensor = [mpsGraph multiplicationWithPrimaryTensor:gradOutputTensor
+                                                                secondaryTensor:expBxTensor
+                                                                  name:nil];
+              rTensor = [mpsGraph divisionWithPrimaryTensor:rTensor
+                                            secondaryTensor:unitExpBxTensor
+                                                       name:nil];
+              MPSGraphTensor* thresholdTensor = [mpsGraph constantWithScalar:threshold.to<double>()
+                                                                       shape:@[@1]
+                                                               dataType:getMPSDataType(self.scalar_type())];
+              MPSGraphTensor* predicateTensor = [mpsGraph greaterThanWithPrimaryTensor:bxTensor
+                                                                       secondaryTensor:thresholdTensor
+                                                                                 name:nil];
+              MPSGraphTensor* outputTensor = [mpsGraph selectWithPredicateTensor:predicateTensor
+                                                             truePredicateTensor:gradOutputTensor
+                                                            falsePredicateTensor:rTensor
+                                                                            name:nil];
+
+              newCachedGraph->gradOutputTensor_ = gradOutputTensor;
+              newCachedGraph->inputTensor_ = inputTensor;
+              newCachedGraph->outputTensor_ = outputTensor;
+            }
+            return newCachedGraph;
+          });
+          cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
+        }
+        Placeholder gradOutputPlaceholder = Placeholder(cachedGraph->gradOutputTensor_, grad_output);
+        Placeholder selfPlaceholder = Placeholder(cachedGraph->inputTensor_, self);
+        Placeholder gradInputPlaceholder = Placeholder(cachedGraph->outputTensor_, grad_input);
+
+        // Create dictionary of inputs and outputs
+        NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* feeds = @{
+          gradOutputPlaceholder.getMPSGraphTensor() : gradOutputPlaceholder.getMPSGraphTensorData(),
+          selfPlaceholder.getMPSGraphTensor() : selfPlaceholder.getMPSGraphTensorData()
+        };
+        NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* results = @{
+          gradInputPlaceholder.getMPSGraphTensor() : gradInputPlaceholder.getMPSGraphTensorData()
+        };
+        runMPSGraph(stream, cachedGraph->graph(), feeds, results);
+      }
+}
+
 TORCH_IMPL_FUNC(silu_out_mps) (
  const Tensor& self,
  const Tensor& result) {
--- a/aten/src/ATen/native/mps/operations/AdaptiveAveragePooling.mm
+++ b/aten/src/ATen/native/mps/operations/AdaptiveAveragePooling.mm
@ -26,6 +26,8 @@ void set_kernel_params
  kernel_sizeW = isizeW - (osizeW-1) * strideW;
 }

+// Adaptive average pooling
+
 Tensor& adaptive_avg_pool2d_out_mps
  (const Tensor& input,
   IntArrayRef output_size,
@ -150,5 +152,93 @@ Tensor adaptive_avg_pool2d_backward_mps

 }

+// Adaptive max pooling
+
+TORCH_IMPL_FUNC(adaptive_max_pool2d_out_mps)
+  (const Tensor& input,
+   IntArrayRef output_size,
+   const Tensor& output,
+   const Tensor& indices) {
+
+  for (int64_t i = 1; i < input.ndimension(); i++) {
+    TORCH_CHECK(input.size(i) > 0,
+      "adaptive_max_pool2d(): Expected input to have non-zero size for non-batch dimensions, "
+      "but input has sizes ", input.sizes(), " with dimension ", i, " being "
+      "empty");
+  }
+
+  int64_t isizeH = input.size(-2);
+  int64_t isizeW = input.size(-1);
+
+  int64_t osizeH = output_size[0];
+  int64_t osizeW = output_size[1];
+
+  if(input.suggest_memory_format() == at::MemoryFormat::ChannelsLast)
+    TORCH_CHECK(input.ndimension() == 4,
+                    "adaptive_avg_pool2d(): Expected 4D tensor, but got ",
+                    input.sizes())
+
+  switch (input.suggest_memory_format()) {
+    case at::MemoryFormat::Contiguous:
+    case at::MemoryFormat::ChannelsLast:
+      break;
+    default:
+        TORCH_CHECK(
+          false,
+          "Unsupported memory format. Supports only ChannelsLast, Contiguous")
+  }
+
+  int64_t strideH;
+  int64_t strideW;
+  int64_t kernel_sizeH;
+  int64_t kernel_sizeW;
+
+  set_kernel_params(isizeH, isizeW,
+                    osizeH, osizeW,
+                    strideH, strideW,
+                    kernel_sizeH, kernel_sizeW);
+
+  auto outputs = at::max_pool2d_with_indices(input,
+                              IntArrayRef({kernel_sizeH, kernel_sizeW}),
+                              IntArrayRef({strideH, strideW}),
+                              IntArrayRef({0, 0}),
+                              IntArrayRef({1, 1}),
+                              false);
+
+  output.copy_(std::get<0>(outputs));
+  indices.copy_(std::get<1>(outputs));
+}
+
+TORCH_IMPL_FUNC(adaptive_max_pool2d_backward_out_mps)
+  (const Tensor& gradOutput,
+   const Tensor& input,
+   const Tensor& indices,
+   const Tensor& gradInput) {
+
+  int64_t isizeH = input.size(-2);
+  int64_t isizeW = input.size(-1);
+  int64_t osizeH = gradOutput.size(-2);
+  int64_t osizeW = gradOutput.size(-1);
+
+  int64_t strideH, strideW, kernel_sizeH, kernel_sizeW;
+
+  set_kernel_params(isizeH, isizeW,
+                    osizeH, osizeW,
+                    strideH, strideW,
+                    kernel_sizeH, kernel_sizeW);
+
+  auto returnGradInput = at::max_pool2d_with_indices_backward(gradOutput,
+                                                              input,
+                                                              IntArrayRef({kernel_sizeH, kernel_sizeW}),
+                                                              IntArrayRef({strideH, strideW}),
+                                                              IntArrayRef({0, 0}),
+                                                              IntArrayRef({1, 1}),
+                                                              false,
+                                                              indices);
+
+  gradInput.copy_(returnGradInput);
+
+}
+
 }
 }
--- a/aten/src/ATen/native/mps/operations/BinaryOps.mm
+++ b/aten/src/ATen/native/mps/operations/BinaryOps.mm
@ -7,6 +7,7 @@
 #include <ATen/native/mps/OperationUtils.h>
 #include <torch/library.h>
 #include <c10/util/Optional.h>
+#include <ATen/native/BinaryOps.h>

 namespace at {
 namespace native {
@ -15,27 +16,46 @@ namespace mps {
 struct BinaryOpCachedGraph : public MPSCachedGraph
 {
  BinaryOpCachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
-  MPSGraphTensor *primaryTensor = nil, *secondaryTensor = nil, *outputTensor = nil;
+  MPSGraphTensor *primaryTensor = nil, *secondaryTensor = nil;
+  MPSGraphTensor *alphaTensor = nil, *outputTensor = nil;
 };

-typedef MPSGraphTensor* (^BinaryOpBlock)(MPSGraph*, MPSGraphTensor*, MPSGraphTensor*);
-#define BinaryOpFn() MPSGraphTensor* (MPSGraph* mpsGraph, MPSGraphTensor* primary, MPSGraphTensor* secondary)
+typedef MPSGraphTensor* (^BinaryOpBlock)(BinaryOpCachedGraph*, MPSGraphTensor*, MPSGraphTensor*);
+#define BinaryOpFn(graph, primary, secondary) MPSGraphTensor* (mps::BinaryOpCachedGraph* graph, MPSGraphTensor* primary, MPSGraphTensor* secondary)

-void binaryOpTensor(const Tensor& self_t, const Tensor& other_t, const Tensor& output, std::string op_name, BinaryOpBlock binaryBlock)
+// alpha is always 1.0 except when this function is called from add_sub_template()
+void binaryOpTensor(const Tensor& self, const Tensor& other, const Scalar& alpha,
+                    const Tensor& output_, std::string op_name, BinaryOpBlock binaryBlock)
 {
  // it's possible to receive empty tensors here
-  if (self_t.numel() == 0 || other_t.numel() == 0) {
+  if (self.numel() == 0 || other.numel() == 0) {
    return;
  }
+  MPSStream* mpsStream = getCurrentMPSStream();

-  const bool is_self_scalar = self_t.dim() == 0;
-  const bool is_other_scalar = other_t.dim() == 0;
-  Tensor self = is_self_scalar ? self_t : self_t.contiguous(at::MemoryFormat::Contiguous);
-  Tensor other = is_other_scalar ? other_t : other_t.contiguous(at::MemoryFormat::Contiguous);
+  const bool is_self_scalar = self.dim() == 0;
+  const bool is_other_scalar = other.dim() == 0;
+
+  auto new_size = at::infer_size(self.sizes(), other.sizes());
+  if (!output_.sizes().equals(new_size)) {
+      output_.resize_(new_size);
+  }
+
+  Tensor output = output_;
+  bool needsCopyToOutput = false;
+
+  if (!output_.is_contiguous()) {
+    output = output_.contiguous();
+    needsCopyToOutput = true;
+  // else, determine if this is an in-place operation on a view output
+  } else if (output_.is_view() && (self.is_alias_of(output_) || other.is_alias_of(output_))) {
+    output = at::native::empty_mps(output_.sizes(), output_.scalar_type(), c10::nullopt, kMPS);
+    needsCopyToOutput = true;
+  }

  MPSGraphCache* cache_ = MPSGraphCache::getInstance();
  @autoreleasepool {
-    string key = op_name + getTensorsStringKey({self, other});
+    string key = op_name + getTensorsStringKey({self, other, output_}, /*use_scalar_value*/ false);
    BinaryOpCachedGraph* cachedGraph = static_cast<BinaryOpCachedGraph *>(cache_->LookUp(key));

    if(!cachedGraph) {
@ -44,17 +64,26 @@ void binaryOpTensor(const Tensor& self_t, const Tensor& other_t, const Tensor& o
        @autoreleasepool {
          MPSGraph* mpsGraph = make_mps_graph();
          newCachedGraph = new BinaryOpCachedGraph(mpsGraph);
-          newCachedGraph->primaryTensor = !is_self_scalar ? mpsGraphRankedPlaceHolder(mpsGraph, self) :
-                                          mpsGraphConstantPlaceHolder(mpsGraph, getMPSScalarValue(self), getMPSShape(other),
-                                                                      // if other is scalar too, then use self's data type here and let the other
-                                                                      // have the same data type as self in the secondaryTensor
-                                                                      getMPSDataType((!is_other_scalar ? other : self).scalar_type()));
+          newCachedGraph->primaryTensor   = mpsGraphRankedPlaceHolder(mpsGraph, self);
+          newCachedGraph->secondaryTensor = mpsGraphRankedPlaceHolder(mpsGraph, other);

-          newCachedGraph->secondaryTensor = !is_other_scalar ? mpsGraphRankedPlaceHolder(mpsGraph, other) :
-                                            mpsGraphConstantPlaceHolder(mpsGraph, getMPSScalarValue(other), getMPSShape(self),
-                                                                        // regardless of self's data type, the scondaryTensor's type must match it.
-                                                                        getMPSDataType(self.scalar_type()));
-          newCachedGraph->outputTensor = binaryBlock(mpsGraph, newCachedGraph->primaryTensor, newCachedGraph->secondaryTensor);
+          MPSGraphTensor* primaryCastTensor   = newCachedGraph->primaryTensor;
+          MPSGraphTensor* secondaryCastTensor = newCachedGraph->secondaryTensor;
+
+          // this type inference is only required at the time of graph creation
+          const ScalarType common_dtype = c10::promoteTypes(self.scalar_type(), other.scalar_type());
+          if (self.scalar_type() != common_dtype) {
+            primaryCastTensor = castMPSTensor(mpsGraph, newCachedGraph->primaryTensor, common_dtype);
+          }
+          if (other.scalar_type() != common_dtype) {
+            secondaryCastTensor = castMPSTensor(mpsGraph, newCachedGraph->secondaryTensor, common_dtype);
+          }
+          newCachedGraph->outputTensor = binaryBlock(newCachedGraph, primaryCastTensor, secondaryCastTensor);
+          // Cast output tensor to an expected type if needed, which addresses discrepancy when int64 scalar is added to int32 tensor
+          // Output tensor should have been promoted but it remains an int32 tensor
+          if (output_.scalar_type() != common_dtype) {
+            newCachedGraph->outputTensor = castMPSTensor(mpsGraph, newCachedGraph->outputTensor, output_.scalar_type());
+          }
        }
        return newCachedGraph;
      });
@ -62,35 +91,52 @@ void binaryOpTensor(const Tensor& self_t, const Tensor& other_t, const Tensor& o
    }

    NSMutableDictionary *feeds = [[NSMutableDictionary new] autorelease];
-    if (!is_self_scalar) {
-      Placeholder selfPlaceholder = Placeholder(cachedGraph->primaryTensor, self);
+    Placeholder selfPlaceholder;
+    Placeholder otherPlaceholder;
+
+    if (is_self_scalar) {
+      feeds[cachedGraph->primaryTensor] = getMPSGraphTensorFromScalar(mpsStream, self.item(), getMPSScalarType(self.scalar_type()));
+    } else {
+      selfPlaceholder = Placeholder(cachedGraph->primaryTensor, self);
      feeds[selfPlaceholder.getMPSGraphTensor()] = selfPlaceholder.getMPSGraphTensorData();
    }
-    if (!is_other_scalar) {
-      Placeholder otherPlaceholder = Placeholder(cachedGraph->secondaryTensor, other);
+    if (is_other_scalar) {
+      feeds[cachedGraph->secondaryTensor] = getMPSGraphTensorFromScalar(mpsStream, other.item(), getMPSScalarType(other.scalar_type()));
+    } else {
+      otherPlaceholder = Placeholder(cachedGraph->secondaryTensor, other);
      feeds[otherPlaceholder.getMPSGraphTensor()] = otherPlaceholder.getMPSGraphTensorData();
    }
+    // 'cachedGraph->alphaTensor' is not nil only if add_sub_template() was called with an alpha value != 1.0
+    if (cachedGraph->alphaTensor) {
+      feeds[cachedGraph->alphaTensor] = getMPSGraphTensorFromScalar(mpsStream, alpha, getMPSScalarType(other.scalar_type()));
+    }

-    Placeholder outputPlaceholder = Placeholder(cachedGraph->outputTensor, output);
+    Placeholder outputPlaceholder = Placeholder(cachedGraph->outputTensor, needsCopyToOutput ? output : output_);
    NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* results = @{
      outputPlaceholder.getMPSGraphTensor() : outputPlaceholder.getMPSGraphTensorData()
    };
-    runMPSGraph(getCurrentMPSStream(), cachedGraph->graph(), feeds, results);
+    runMPSGraph(mpsStream, cachedGraph->graph(), feeds, results);
+
+    if (needsCopyToOutput) {
+      output_.copy_(output);
+    }
  }
 }

-void binaryOpScalar(const Tensor& self, const Scalar& other, const Tensor& output, std::string op_name, BinaryOpBlock binaryBlock)
+void binaryOpScalar(const Tensor& self, const Scalar& other, const Scalar& alpha,
+                    const Tensor& output, std::string op_name, BinaryOpBlock binaryBlock)
 {
-  binaryOpTensor(self, wrapped_scalar_tensor(other), output, op_name, binaryBlock);
+  binaryOpTensor(self, wrapped_scalar_tensor(other), alpha, output, op_name, binaryBlock);
 }

 void div_mode_template(const Tensor& self, const Tensor& other,
                       c10::optional<c10::string_view> rounding_mode,
                       const Tensor& output, const string op_name)
 {
-  BinaryOpBlock div_mode_op_block = ^BinaryOpFn() {
-    MPSGraphTensor* divTensor =  [mpsGraph divisionWithPrimaryTensor:primary
-                                                     secondaryTensor:secondary
+  BinaryOpBlock div_mode_op_block = ^BinaryOpFn(cachedGraph, primaryCastTensor, secondaryCastTensor) {
+    MPSGraph* mpsGraph = cachedGraph->graph();
+    MPSGraphTensor* divTensor =  [mpsGraph divisionWithPrimaryTensor:primaryCastTensor
+                                                     secondaryTensor:secondaryCastTensor
                                                                name:nil];
    if (!rounding_mode.has_value()) {
      return divTensor;
@ -102,77 +148,104 @@ void div_mode_template(const Tensor& self, const Tensor& other,
    assert(0 && "Invalid rounding mode\n");
    return nullptr;
  };
-  binaryOpTensor(self, other, output, op_name + "_out_mps:" + (rounding_mode.has_value() ? c10::str(*rounding_mode) : ""), div_mode_op_block);
+  binaryOpTensor(self, other, Scalar(1.0), output, op_name + "_out_mps:" + (rounding_mode.has_value() ? c10::str(*rounding_mode) : ""), div_mode_op_block);
 }

 void add_sub_template(const Tensor& self, const Tensor& other, const Scalar& alpha, const Tensor& output, std::string op_name)
 {
-  BinaryOpBlock add_sub_op_block = ^BinaryOpFn() {
-    double alpha_val = alpha.toDouble();
-    MPSGraphTensor* secondaryTensor = secondary;
+  if (alpha.toDouble() == 0.0)
+    const_cast<Tensor&>(output) = self.clone();
+
+  const bool alpha_has_value = alpha.toDouble() != 1.0;
+  if (alpha_has_value) {
+    auto commonDtype = at::result_type(self, other);
+    at::native::alpha_check(commonDtype, alpha);
+  }
+
+  BinaryOpBlock add_sub_op_block = ^BinaryOpFn(cachedGraph, primaryCastTensor, secondaryCastTensor) {
+    MPSGraph* mpsGraph = cachedGraph->graph();
+    MPSGraphTensor* secondaryTensor = secondaryCastTensor;

    // if alpha is 1.0, then we don't bother adding another multiply to graph
-    if (alpha_val != 1.0) {
-      MPSGraphTensor* alphaTensor = mpsGraphConstantPlaceHolder(mpsGraph, alpha_val, getMPSShape(other), getMPSDataType(other.scalar_type()));
-      secondaryTensor = [mpsGraph multiplicationWithPrimaryTensor:secondary
-                                                  secondaryTensor:alphaTensor
+    if (alpha_has_value) {
+      cachedGraph->alphaTensor = mpsGraphRankedPlaceHolder(mpsGraph, getMPSScalarType(other.scalar_type()), @[@1]);
+      secondaryTensor = [mpsGraph multiplicationWithPrimaryTensor:secondaryCastTensor
+                                                  secondaryTensor:cachedGraph->alphaTensor
                                                             name:nil];
    }
    if (op_name == "add")
-      return [mpsGraph additionWithPrimaryTensor:primary
+      return [mpsGraph additionWithPrimaryTensor:primaryCastTensor
                                 secondaryTensor:secondaryTensor
                                            name:nil];
    else
-      return [mpsGraph subtractionWithPrimaryTensor:primary
+      return [mpsGraph subtractionWithPrimaryTensor:primaryCastTensor
                                    secondaryTensor:secondaryTensor
                                               name:nil];
  };
-  binaryOpTensor(self, other, output, op_name + "_out_mps:" + std::to_string(alpha.toDouble()), add_sub_op_block);
+  // add alpha's type to the key only if multiply was added to graph
+  binaryOpTensor(self, other, alpha, output, op_name + "_out_mps:" + (alpha_has_value ? getMPSTypeString(alpha.type()) : ""), add_sub_op_block);
 }

 } // namespace mps

-#define CREATE_MPS_BINARY_OP_FUNC(func_out, func_stub, other_type)                              \
+#define CREATE_MPS_BINARY_COMPARISON_OP_FUNC(func_out, func_stub, other_type)                                             \
+Tensor& func_out (const Tensor& self, const other_type& other, Tensor& output) {                                          \
+  mps::binaryOp##other_type(self, other, Scalar(1.0), output, #func_stub,                                                 \
+    ^BinaryOpFn(cachedGraph, primaryCastTensor, secondaryCastTensor) {                                                    \
+      MPSGraph* mpsGraph = cachedGraph->graph();                                                                          \
+      return [mpsGraph func_stub##WithPrimaryTensor:mps::castMPSTensor(mpsGraph, primaryCastTensor, ScalarType::Bool)     \
+                                    secondaryTensor:mps::castMPSTensor(mpsGraph, secondaryCastTensor, ScalarType::Bool)   \
+                                               name:nil]; });                                                             \
+  return output;                                                                                                          \
+}
+
+#define CREATE_MPS_STRUCTURED_BINARY_OP_FUNC(func_out, func_stub, other_type)                   \
 TORCH_IMPL_FUNC(func_out) (const Tensor& self, const other_type& other, const Tensor& output) { \
-  mps::binaryOp##other_type(self, other, output, #func_stub,                                    \
-    ^BinaryOpFn() {                                                                             \
-      return [mpsGraph func_stub##WithPrimaryTensor:primary                                     \
-                                    secondaryTensor:secondary                                   \
+  mps::binaryOp##other_type(self, other, Scalar(1.0), output, #func_stub,                       \
+    ^BinaryOpFn(cachedGraph, primaryCastTensor, secondaryCastTensor) {                          \
+      MPSGraph* mpsGraph = cachedGraph->graph();                                                \
+      return [mpsGraph func_stub##WithPrimaryTensor:primaryCastTensor                           \
+                                    secondaryTensor:secondaryCastTensor                         \
                                               name:nil]; });                                   \
 }

 // Boolean Ops require casting output to "MPSDataTypeBool"
-#define CREATE_MPS_BOOLEAN_OP_FUNC(func_out, func_stub, other_type)                             \
+#define CREATE_MPS_STRUCTURED_BOOLEAN_OP_FUNC(func_out, func_stub, other_type)                  \
 TORCH_IMPL_FUNC(func_out) (const Tensor& self, const other_type& other, const Tensor& output) { \
-  mps::binaryOp##other_type(self, other, output, #func_stub,                                    \
-    ^BinaryOpFn() {                                                                             \
-      MPSGraphTensor* outputTensor = [mpsGraph func_stub##WithPrimaryTensor:primary             \
-                                                            secondaryTensor:secondary           \
+  mps::binaryOp##other_type(self, other, Scalar(1.0), output, #func_stub,                       \
+    ^BinaryOpFn(cachedGraph, primaryCastTensor, secondaryCastTensor) {                          \
+      MPSGraph* mpsGraph = cachedGraph->graph();                                                \
+      MPSGraphTensor* outputTensor = [mpsGraph func_stub##WithPrimaryTensor:primaryCastTensor   \
+                                                            secondaryTensor:secondaryCastTensor \
                                                                       name:nil];               \
-      return [mpsGraph castTensor:outputTensor toType:MPSDataTypeBool name:@"boolOut"]; });     \
+      return mps::castMPSTensor(mpsGraph, outputTensor, ScalarType::Bool); });                  \
 }

 // Boolean Binary Ops
-CREATE_MPS_BOOLEAN_OP_FUNC(eq_scalar_out_mps, equal, Scalar);
-CREATE_MPS_BOOLEAN_OP_FUNC(eq_tensor_out_mps, equal, Tensor);
-CREATE_MPS_BOOLEAN_OP_FUNC(ne_scalar_out_mps, notEqual, Scalar);
-CREATE_MPS_BOOLEAN_OP_FUNC(ne_tensor_out_mps, notEqual, Tensor);
-CREATE_MPS_BOOLEAN_OP_FUNC(le_scalar_out_mps, lessThanOrEqualTo, Scalar);
-CREATE_MPS_BOOLEAN_OP_FUNC(le_tensor_out_mps, lessThanOrEqualTo, Tensor);
-CREATE_MPS_BOOLEAN_OP_FUNC(lt_scalar_out_mps, lessThan, Scalar);
-CREATE_MPS_BOOLEAN_OP_FUNC(lt_tensor_out_mps, lessThan, Tensor);
-CREATE_MPS_BOOLEAN_OP_FUNC(ge_scalar_out_mps, greaterThanOrEqualTo, Scalar);
-CREATE_MPS_BOOLEAN_OP_FUNC(ge_tensor_out_mps, greaterThanOrEqualTo, Tensor);
-CREATE_MPS_BOOLEAN_OP_FUNC(gt_scalar_out_mps, greaterThan, Scalar);
-CREATE_MPS_BOOLEAN_OP_FUNC(gt_tensor_out_mps, greaterThan, Tensor);
+CREATE_MPS_STRUCTURED_BOOLEAN_OP_FUNC(eq_scalar_out_mps, equal, Scalar);
+CREATE_MPS_STRUCTURED_BOOLEAN_OP_FUNC(eq_tensor_out_mps, equal, Tensor);
+CREATE_MPS_STRUCTURED_BOOLEAN_OP_FUNC(ne_scalar_out_mps, notEqual, Scalar);
+CREATE_MPS_STRUCTURED_BOOLEAN_OP_FUNC(ne_tensor_out_mps, notEqual, Tensor);
+CREATE_MPS_STRUCTURED_BOOLEAN_OP_FUNC(le_scalar_out_mps, lessThanOrEqualTo, Scalar);
+CREATE_MPS_STRUCTURED_BOOLEAN_OP_FUNC(le_tensor_out_mps, lessThanOrEqualTo, Tensor);
+CREATE_MPS_STRUCTURED_BOOLEAN_OP_FUNC(lt_scalar_out_mps, lessThan, Scalar);
+CREATE_MPS_STRUCTURED_BOOLEAN_OP_FUNC(lt_tensor_out_mps, lessThan, Tensor);
+CREATE_MPS_STRUCTURED_BOOLEAN_OP_FUNC(ge_scalar_out_mps, greaterThanOrEqualTo, Scalar);
+CREATE_MPS_STRUCTURED_BOOLEAN_OP_FUNC(ge_tensor_out_mps, greaterThanOrEqualTo, Tensor);
+CREATE_MPS_STRUCTURED_BOOLEAN_OP_FUNC(gt_scalar_out_mps, greaterThan, Scalar);
+CREATE_MPS_STRUCTURED_BOOLEAN_OP_FUNC(gt_tensor_out_mps, greaterThan, Tensor);

 // Arithmetic Binary Ops
-CREATE_MPS_BINARY_OP_FUNC(minimum_out_mps, minimum, Tensor);
-CREATE_MPS_BINARY_OP_FUNC(maximum_out_mps, maximum, Tensor);
-CREATE_MPS_BINARY_OP_FUNC(mul_out_mps, multiplication, Tensor);
-CREATE_MPS_BINARY_OP_FUNC(pow_tensor_scalar_out_mps, power, Scalar);
-CREATE_MPS_BINARY_OP_FUNC(pow_tensor_tensor_out_mps, power, Tensor);
-CREATE_MPS_BINARY_OP_FUNC(atan2_mps_out, atan2, Tensor);
+CREATE_MPS_STRUCTURED_BINARY_OP_FUNC(minimum_out_mps, minimum, Tensor);
+CREATE_MPS_STRUCTURED_BINARY_OP_FUNC(maximum_out_mps, maximum, Tensor);
+CREATE_MPS_STRUCTURED_BINARY_OP_FUNC(mul_out_mps, multiplication, Tensor);
+CREATE_MPS_STRUCTURED_BINARY_OP_FUNC(pow_tensor_scalar_out_mps, power, Scalar);
+CREATE_MPS_STRUCTURED_BINARY_OP_FUNC(pow_tensor_tensor_out_mps, power, Tensor);
+CREATE_MPS_STRUCTURED_BINARY_OP_FUNC(atan2_mps_out, atan2, Tensor);
+
+CREATE_MPS_BINARY_COMPARISON_OP_FUNC(logical_and_out_mps, logicalAND, Tensor);
+CREATE_MPS_BINARY_COMPARISON_OP_FUNC(logical_or_out_mps, logicalOR, Tensor);
+CREATE_MPS_BINARY_COMPARISON_OP_FUNC(logical_xor_out_mps, logicalXOR, Tensor);


 TORCH_IMPL_FUNC(div_out_mode_mps) (const Tensor& self, const Tensor& other, c10::optional<c10::string_view> rounding_mode, const Tensor& output) {
--- a/aten/src/ATen/native/mps/operations/Convolution.mm
+++ b/aten/src/ATen/native/mps/operations/Convolution.mm
@ -132,7 +132,7 @@ Tensor _mps_convolution(
          MPSGraph* mpsGraph = native_mps::make_mps_graph();
          newCachedGraph = new CachedGraph(mpsGraph);

-          MPSGraphConvolution2DOpDescriptor *descriptor_ = [MPSGraphConvolution2DOpDescriptor new];
+          MPSGraphConvolution2DOpDescriptor *descriptor_ = [[MPSGraphConvolution2DOpDescriptor new] autorelease];
          fill_conv_desc(descriptor_, stride[0], stride[1],
                                      dilation[0], dilation[1],
                                      padding[1], padding[0],
@ -173,7 +173,7 @@ Tensor _mps_convolution(
      biasPlaceholder = native_mps::Placeholder(cachedGraph->biasTensor_, (bias_opt.value()).view({1, bias_shape[0], 1, 1}));
    auto outputPlaceholder = native_mps::Placeholder(cachedGraph->outputTensor_, *output);

-    NSMutableDictionary<MPSGraphTensor*, MPSGraphTensorData*>* feeds = [[NSMutableDictionary alloc] initWithCapacity: 3];
+    NSMutableDictionary<MPSGraphTensor*, MPSGraphTensorData*>* feeds = [[[NSMutableDictionary alloc] initWithCapacity: 3] autorelease];
    feeds[inputPlaceholder.getMPSGraphTensor()] = inputPlaceholder.getMPSGraphTensorData();
    feeds[weightsPlaceholder.getMPSGraphTensor()] = weightsPlaceholder.getMPSGraphTensorData();
    if(bias_defined) {
@ -262,7 +262,7 @@ Tensor mps_convolution_backward_input(
          MPSGraph* mpsGraph = native_mps::make_mps_graph();
          newCachedGraph = new CachedGraph(mpsGraph);

-          MPSGraphConvolution2DOpDescriptor *descriptor_ = [MPSGraphConvolution2DOpDescriptor new];
+          MPSGraphConvolution2DOpDescriptor *descriptor_ = [[MPSGraphConvolution2DOpDescriptor new] autorelease];
          fill_conv_desc(descriptor_, stride[0], stride[1],
                                      dilation[0], dilation[1],
                                      padding[1], padding[0],
@ -373,7 +373,7 @@ Tensor mps_convolution_backward_weights(
          MPSGraph* mpsGraph = native_mps::make_mps_graph();
          newCachedGraph = new CachedGraph(mpsGraph);

-          MPSGraphConvolution2DOpDescriptor *descriptor_ = [MPSGraphConvolution2DOpDescriptor new];
+          MPSGraphConvolution2DOpDescriptor *descriptor_ = [[MPSGraphConvolution2DOpDescriptor new] autorelease];
          fill_conv_desc(descriptor_, stride[0], stride[1],
                                      dilation[0], dilation[1],
                                      padding[1], padding[0],
--- a/aten/src/ATen/native/mps/operations/Copy.mm
+++ b/aten/src/ATen/native/mps/operations/Copy.mm
@ -12,185 +12,9 @@
 #include <ATen/native/Resize.h>
 #include <c10/util/Optional.h>

+
 namespace at {
 namespace native {
-
-MPSGraphTensor* chainViewOperation(MPSGraph* mpsGraph, IntArrayRef size,
-                             IntArrayRef stride, int64_t storage_offset,
-                             MPSGraphTensor* inputTensor, const Tensor& self) {
-  MPSGraphTensor *outputTensor = nil;
-  const size_t shape_size = size.size();
-
-  @autoreleasepool {
-      int32_t* sizeArray = new int32_t[shape_size];
-      const int64_t int_max = std::numeric_limits<int32_t>::max();
-      for (int i = 0; i < shape_size; i++) {
-        TORCH_CHECK(size[i] <= int_max);
-        sizeArray[i] = static_cast<int32_t>(size[i]);
-      }
-      NSData* shapeData = [NSData dataWithBytes:sizeArray
-                                         length:shape_size * sizeof(int32_t)];
-      MPSGraphTensor* shapeTensor =  [mpsGraph constantWithData:shapeData
-                                                          shape:@[[NSNumber numberWithUnsignedInteger: shape_size]]
-                                                       dataType:MPSDataTypeInt32];
-      delete[] sizeArray;
-
-      MPSGraphTensor* storageOffsetTensor = [mpsGraph constantWithScalar:storage_offset
-                                                                dataType:MPSDataTypeInt32];
-      MPSGraphTensor* strideTensor = [mpsGraph constantWithScalar:stride[shape_size - 1]
-                                                         dataType:MPSDataTypeInt32];
-      MPSGraphTensor* rangeTensor = [mpsGraph coordinateAlongAxis:-1
-                                                  withShapeTensor:shapeTensor
-                                                             name:nil];
-      MPSGraphTensor* indexTensor = [mpsGraph multiplicationWithPrimaryTensor:rangeTensor
-                                                              secondaryTensor:strideTensor
-                                                                         name:nil];
-      MPSGraphTensor* indicesTensor = indexTensor;
-      // create stride Tensors for each rank of the input tensor
-      for (int i = 1; i < shape_size; i++) {
-        strideTensor = [mpsGraph constantWithScalar:stride[shape_size - i - 1]
-                                           dataType:MPSDataTypeInt32];
-        MPSGraphTensor* rangeTensor = [mpsGraph coordinateAlongAxis:(-i - 1)
-                                                    withShapeTensor:shapeTensor
-                                                               name:nil];
-        MPSGraphTensor* indexTensor = [mpsGraph multiplicationWithPrimaryTensor:rangeTensor
-                                                                secondaryTensor:strideTensor
-                                                                           name:nil];
-        indicesTensor = [mpsGraph additionWithPrimaryTensor:indexTensor
-                                            secondaryTensor:indicesTensor
-                                                       name:nil];
-      }
-      indicesTensor = [mpsGraph additionWithPrimaryTensor:indicesTensor
-                                          secondaryTensor:storageOffsetTensor
-                                                     name:nil];
-      MPSGraphTensor *reshapedInputTensor = [mpsGraph reshapeTensor:inputTensor
-                                                          withShape:@[@-1]
-                                                               name:nil];
-      MPSGraphTensor *reshapedIndicesTensor = [mpsGraph reshapeTensor:indicesTensor
-                                                            withShape:@[@-1]
-                                                                 name:nil];
-      // Call gather to coalesce the needed values. Result will be of same shape as flattened indices tensor
-      MPSGraphTensor *gatheredTensor = [mpsGraph gatherWithUpdatesTensor:reshapedInputTensor
-                                                           indicesTensor:reshapedIndicesTensor
-                                                                    axis:0
-                                                         batchDimensions:0
-                                                                    name:nil];
-      // Reshape the data to desired size
-      outputTensor =  [mpsGraph reshapeTensor:gatheredTensor
-                              withShapeTensor:shapeTensor
-                                         name:nil];
-  }
-  return outputTensor;
-}
-
-
-// There are few cases we need to consider:
-// Here nodes are the Tensors and the edges are the operations performed on the
-// Tensor. As a result of the operation performed we can have result as View
-// Tensor (View T) or a Non view tensor (NonView T). The difference is if its
-// mapped by the same underlying storage ptr or a new MTLBuffer was allocated.
-//                T = Tensor
-//                 ----------
-//                 | Orig T |
-//                 ----------
-//                /     |     \
-//             View T  View T  NonView T
-//             /      /    \      |
-//            View T /      \     |
-//            |     /        \    |
-//            |    /          \   |
-//            |   /            \  |
-//            NonView T         NonView T
-Tensor as_strided_tensorimpl_mps(const Tensor& self, IntArrayRef size,
-                                 IntArrayRef stride,
-                                 optional<int64_t> storage_offset_) {
-  using namespace mps;
-  // Use the size and stride to create a unique key
-  auto result = detail::make_tensor<TensorImpl>(
-      c10::TensorImpl::VIEW, Storage(self.storage()), self.key_set(), self.dtype());
-  auto storage_offset = storage_offset_.value_or(self.storage_offset());
-  setStrided(result, size, stride, storage_offset);
-
-  // 0 sizes won't result in any change in the shape of the Tensor so we can
-  // skip it. Also if the memory is contiguous we don't need to do
-  // gather-scatter operations using graph.
-  if (size.size() > 0 && (!result.is_contiguous())) {
-
-    // If self itself was a view tensor, that means we need to chain the graphs
-    // else we will create a new entry in the cache
-    struct CachedGraph : public MPSCachedGraph
-    {
-      CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
-      MPSGraphTensor* inputTensor_ = nil;
-      MPSGraphTensor* outputTensor_ = nil;
-      IntArrayRef size_;
-      IntArrayRef stride_;
-      int64_t storage_offset_;
-    };
-
-    MPSGraphCache* cache_ = MPSGraphCache::getInstance();
-
-    @autoreleasepool {
-      string lookup_key = mps::getStridedKey(self, self.sizes(), self.strides(),
-                      self.storage_offset());
-      CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(lookup_key));
-
-      if(!cachedGraph) {
-        string insert_key = mps::getStridedKey(self,size, stride, storage_offset);
-        CachedGraph* insertCachedGraph = static_cast<CachedGraph *>(cache_->LookUp(insert_key));
-        if (!insertCachedGraph) {
-          MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(insert_key, ^ MPSCachedGraph * () {
-            CachedGraph *newCachedGraph = nil;
-            @autoreleasepool {
-                MPSGraph* mpsGraph = make_mps_graph();
-                newCachedGraph = new CachedGraph(mpsGraph);
-
-                // Self is the input tensor we are creating view of
-                MPSGraphTensor* inputTensor = [mpsGraph placeholderWithShape : getMPSShape(self)
-                                                                    dataType : getMPSDataType(self.scalar_type())
-                                                                    name : nil];
-                newCachedGraph->inputTensor_ = inputTensor;
-                newCachedGraph->outputTensor_ = chainViewOperation(mpsGraph, size,
-                                                                   stride,
-                                                                   storage_offset,
-                                                                   inputTensor,
-                                                                   self);
-                newCachedGraph->size_ = size;
-                newCachedGraph->stride_ = stride;
-                newCachedGraph->storage_offset_ = storage_offset;
-            }
-            return newCachedGraph;
-          });
-          cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
-        }
-      } else {
-        // Else part takes care of the chaining where multiple view operations
-        // were implemented on the same underlying data storage ptr
-        string insert_key = mps::getStridedKey(self, size, stride, storage_offset);
-        MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(insert_key, ^ MPSCachedGraph * () {
-        CachedGraph *newCachedGraph = nil;
-          @autoreleasepool {
-              MPSGraph* mpsGraph = cachedGraph->graph();
-              newCachedGraph = new CachedGraph(mpsGraph);
-              newCachedGraph->inputTensor_ = cachedGraph->inputTensor_;
-              newCachedGraph->outputTensor_ = chainViewOperation(mpsGraph, size,
-                                                                 stride,
-                                                                 storage_offset,
-                                                                 cachedGraph->outputTensor_,
-                                                                 self);
-              newCachedGraph->size_ = size;
-              newCachedGraph->stride_ = stride;
-              newCachedGraph->storage_offset_ = storage_offset;
-          }
-          return newCachedGraph;
-        });
-        cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
-      }
-    }
-  }
-  return result;
-}
-
 namespace mps {

 void* pageAlignedBlockPtr(
@ -209,23 +33,64 @@ void* pageAlignedBlockPtr(
  return (void*)alignedAddress;
 }

-static bool copy_requires_temporaries(const Tensor& dst, const Tensor& src) {
-  bool same_dtype = src.dtype() == dst.dtype();
-  if (same_dtype && src.is_contiguous() && dst.is_contiguous()) {
-    return false;
-  } else {
-    return true;
+// Copy sourceBuffer into destBuffer, casting sourceBuffer to src.scalar_type().
+// The shapes and dtypes are taken from dst and src, but their storage pointers are not used.
+void copy_cast_mps(at::Tensor& dst, const at::Tensor& src,
+                   id<MTLBuffer> destBuffer, id<MTLBuffer> sourceBuffer) {
+  using namespace mps;
+
+  struct CachedGraph : public MPSCachedGraph
+  {
+    CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
+    MPSGraphTensor* inputTensor_ = nil;
+    MPSGraphTensor* outputTensor_ = nil;
+  };
+
+  MPSStream* stream = getCurrentMPSStream();
+  MPSGraphCache* cache_ = MPSGraphCache::getInstance();
+
+  MPSDataType dstDType = getMPSDataType(dst.scalar_type());
+  MPSDataType srcDType = getMPSDataType(src.scalar_type());
+  MPSShape* dstShape = getMPSShape(dst);
+  MPSShape* srcShape = getMPSShape(src);
+
+  @autoreleasepool {
+    string key = "copy_cast_mps" + getTensorsStringKey({src, dst});
+    CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+
+    if (!cachedGraph) {
+      MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ MPSCachedGraph * () {
+        CachedGraph *newCachedGraph = nil;
+        @autoreleasepool {
+          MPSGraph* mpsGraph = make_mps_graph();
+          newCachedGraph = new CachedGraph(mpsGraph);
+
+          MPSGraphTensor* inputTensor = mpsGraphRankedPlaceHolder(mpsGraph, src);
+          MPSGraphTensor* outputTensor = [mpsGraph castTensor:inputTensor toType:dstDType name:@"cast"];
+
+          newCachedGraph->inputTensor_ = inputTensor;
+          newCachedGraph->outputTensor_ = outputTensor;
+        }
+        return newCachedGraph;
+      });
+      cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
+    }
+    MPSGraphTensorData* srcData = [[[MPSGraphTensorData alloc]
+                                    initWithMTLBuffer:sourceBuffer shape:srcShape dataType:srcDType]
+                                   autorelease];
+    MPSGraphTensorData* dstData = [[[MPSGraphTensorData alloc]
+                                    initWithMTLBuffer:destBuffer shape:dstShape dataType:dstDType]
+                                   autorelease];
+    NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* feeds = @{cachedGraph->inputTensor_: srcData};
+    NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* results = @{cachedGraph->outputTensor_: dstData};
+    runMPSGraph(stream, cachedGraph->graph(), feeds, results);
  }
 }

-static at::Tensor& copy_from_mps_(at::Tensor& dst_, const at::Tensor& src_,
-                           bool non_blocking) {
-
-  using namespace mps;
+static at::Tensor& copy_from_mps_(at::Tensor& dst_, const at::Tensor& src_, bool non_blocking)
+{
  id<MTLDevice> device = MPSDevice::getInstance()->device();
  MPSStream* stream = getCurrentMPSStream();
-  uint64_t size = src_.nbytes();
-  if (size == 0) return dst_;
  Tensor dst;
  Tensor src;
  if (!dst_.is_contiguous()) {
@ -233,43 +98,41 @@ static at::Tensor& copy_from_mps_(at::Tensor& dst_, const at::Tensor& src_,
  } else {
    dst = dst_;
  }
-  dst._set_conj(dst_.is_conj());
-  src._set_conj(src_.is_conj());
-
-  dst._set_neg(dst_.is_neg());
-  src._set_neg(src_.is_neg());

  auto storage_byte_offset = src_.storage_offset() * src_.itemsize();
-  id<MTLBuffer> sourceBuffer = __builtin_bit_cast(id<MTLBuffer>, src_.storage().data());
  if (!src_.is_contiguous()) {
-    id<MTLBuffer> gatherTensor = gatherViewTensor(src_, sourceBuffer);
-    if (gatherTensor) {
-      sourceBuffer = gatherTensor;
+    Tensor emptyShell = Tensor();
+    src = gatherViewTensor(src_, emptyShell);
+    if (src.has_storage()) {
      storage_byte_offset = 0;
    } else {
      src = src_.expand_as(dst).contiguous();
-      sourceBuffer = __builtin_bit_cast(id<MTLBuffer>, src.storage().data());
      storage_byte_offset = src.storage_offset() * src.itemsize();
    }
  } else {
    src = src_;
  }
+  id<MTLBuffer> sourceBuffer = getMTLBufferStorage(src);
+  const size_t src_size = src.nbytes();
+  // if there's anything wrong with source, we shouldn't return dst_ silently and must error out.
+  TORCH_CHECK(sourceBuffer && src_size > 0);

-  void* host_dst = dst.storage().data();
-
-  if (sourceBuffer == nil) return dst_;
-  NSUInteger destOffset = dst.storage_offset() * dst.itemsize();
+  // In case of dtype change, first convert src inplace
+  if (src_.dtype() != dst_.dtype()) {
+    copy_cast_mps(dst, src, sourceBuffer, sourceBuffer);
+  }

  @autoreleasepool {
    MTLResourceOptions options = MTLResourceOptionCPUCacheModeDefault | MTLResourceStorageModeShared;
    NSUInteger alignedLength = 0;

-    void* alignedPtr = pageAlignedBlockPtr(host_dst, (NSUInteger)size, &alignedLength);
+    void* host_dst = dst.storage().data();
+    void* alignedPtr = pageAlignedBlockPtr(host_dst, (NSUInteger)src_size, &alignedLength);
    id<MTLBuffer> destBuffer = [device newBufferWithBytesNoCopy:alignedPtr
                                                         length:alignedLength
                                                        options:options
                                                    deallocator:nil];
-    destOffset = uintptr_t(host_dst) - uintptr_t(alignedPtr);
+     NSUInteger destOffset = uintptr_t(host_dst) - uintptr_t(alignedPtr);
    // 4 bytes alignment required on macos for blits.
    TORCH_CHECK(destOffset % 4 == 0, "Unaligned blit request");

@ -283,7 +146,7 @@ static at::Tensor& copy_from_mps_(at::Tensor& dst_, const at::Tensor& src_,
                       sourceOffset:(NSUInteger)storage_byte_offset
                           toBuffer:destBuffer
                  destinationOffset:(NSUInteger)destOffset
-                               size:(NSUInteger)size];
+                               size:(NSUInteger)src_size];
        [blitEncoder endEncoding];

        if (non_blocking) {
@ -302,26 +165,26 @@ static at::Tensor& copy_from_mps_(at::Tensor& dst_, const at::Tensor& src_,
  return dst_;
 }

-static at::Tensor& copy_to_mps_(at::Tensor& dst_, const at::Tensor& src_,
-                         bool non_blocking) {
+static at::Tensor& copy_to_mps_(at::Tensor& dst_, const at::Tensor& src_, bool non_blocking)
+{
  MPSStream* stream = getCurrentMPSStream();
  Tensor dst;
  Tensor src;

  id<MTLDevice> device = MPSDevice::getInstance()->device();
  auto dst_byte_offset = dst_.storage_offset() * dst_.itemsize();
-  id<MTLBuffer> destBuffer = __builtin_bit_cast(id<MTLBuffer>, dst_.storage().data());
+  id<MTLBuffer> destBuffer = getMTLBufferStorage(dst_);

-
-  if (!src.is_contiguous()) {
+  if (src_.is_view()) {
    src = src_.to(dst_.dtype()).expand_as(dst_).contiguous();
  } else {
    src = src_;
+    if (src.dtype() != dst_.dtype()) {
+      // In case of dtype change, perform conversion on source device
+      src = src.to(dst_.dtype());
+    }
  }

-  if (!dst_.is_contiguous()) {
-    TORCH_WARN("The dst MTL buffer in copy_to_mps is non-contiguous");
-  }
  const void* host_src = src.storage().data();
  uint64_t size = src.nbytes();

@ -336,6 +199,8 @@ static at::Tensor& copy_to_mps_(at::Tensor& dst_, const at::Tensor& src_,
                                         options:options
                                     deallocator:nil];
    sourceOffset = uintptr_t(host_src) - uintptr_t(alignedPtr);
+    if (src_.is_view() || !src_.is_contiguous())
+      sourceOffset += src_.storage_offset() * src_.itemsize();

    dispatch_sync(stream->queue(), ^() {
      @autoreleasepool {
@ -383,56 +248,65 @@ void copy_blit_mps(void* dst, const void* src, size_t size) {
  });
 }

-
-static at::Tensor& copy_kernel_mps(at::Tensor& dst_, const at::Tensor& src_,
-                            bool non_blocking) {
-  MPSStream* stream = getCurrentMPSStream();
-  uint64_t size = src_.nbytes();
+static at::Tensor& copy_kernel_mps(at::Tensor& dst_, const at::Tensor& src_, bool non_blocking)
+{
  auto src_byte_offset = src_.storage_offset() * src_.itemsize();
-  id<MTLBuffer> sourceBuffer = __builtin_bit_cast(id<MTLBuffer>, src_.storage().data());
+  auto dst_byte_offset = dst_.storage_offset() * dst_.itemsize();
+
+  // If dst is contiguous and there is no byte offset, we can save directly the result of
+  // gather into dst. This reduces the overhead of doing an additional blit for most cases
+  bool returnGatherOutput = (dst_.is_contiguous() && !dst_byte_offset);
  Tensor src;
+
  if (!src_.is_contiguous()) {
-    id<MTLBuffer> gatherTensor = gatherViewTensor(src_, sourceBuffer);
-    if (gatherTensor) {
-      sourceBuffer = gatherTensor;
+    Tensor emptyShell = Tensor();
+    src = gatherViewTensor(src_, returnGatherOutput ? dst_ : emptyShell);
+
+    if (src.has_storage()) {
+      if (returnGatherOutput)
+        return dst_;
+
      src_byte_offset = 0;
    } else {
      src = src_.expand_as(dst_).contiguous();
-      sourceBuffer = __builtin_bit_cast(id<MTLBuffer>, src.storage().data());
      src_byte_offset = src.storage_offset() * src.itemsize();
    }
  } else {
    src = src_;
  }
-  Tensor dst = dst_;
-  dst._set_conj(dst_.is_conj());
+  // Scatter to `dst` if the memory is not contiguous
+  // If the memory is not contiguous, it means that the tensor has strides and we would not be
+  // able to do the copy using a single blit
+  if (!dst_.is_contiguous()) {
+    return scatterViewTensor(src, dst_);
+  }
  src._set_conj(src_.is_conj());
-
-  dst._set_neg(dst_.is_neg());
  src._set_neg(src_.is_neg());

-  auto dst_byte_offset = dst.storage_offset() * dst.itemsize();
-  id<MTLBuffer> destBuffer = __builtin_bit_cast(id<MTLBuffer>, dst.storage().data());
+  id<MTLBuffer> destBuffer = getMTLBufferStorage(dst_);
+  id<MTLBuffer> sourceBuffer = getMTLBufferStorage(src);
+  const size_t src_size = src.nbytes();

-  dispatch_sync(stream->queue(), ^() {
-    @autoreleasepool {
-      id<MTLCommandBuffer> commandBuffer = stream->commandBuffer();
-      id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];
-
-      [blitEncoder copyFromBuffer:sourceBuffer
-                     sourceOffset:src_byte_offset
-                         toBuffer:destBuffer
-                destinationOffset:dst_byte_offset
-                             size:size];
-      [blitEncoder endEncoding];
-      if (non_blocking) {
+  if (src.dtype() == dst_.dtype()) {
+    MPSStream* stream = getCurrentMPSStream();
+    dispatch_sync(stream->queue(), ^() {
+      @autoreleasepool {
+        id<MTLCommandBuffer> commandBuffer = stream->commandBuffer();
+        id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer blitCommandEncoder];
+        [blitEncoder copyFromBuffer:sourceBuffer
+                       sourceOffset:src_byte_offset
+                           toBuffer:destBuffer
+                  destinationOffset:dst_byte_offset
+                               size:src_size];
+        [blitEncoder endEncoding];
+        // GPU to GPU copy needs flushing only, and no synchronization with CPU is necessary
        stream->commit(true);
-      } else {
-        stream->commitAndWait();
      }
-    }
-  });
-  return dst;
+    });
+  } else {
+    copy_cast_mps(dst_, src, destBuffer, sourceBuffer);
+  }
+  return dst_;
 }

 at::Tensor& mps_copy_(at::Tensor& dst, const at::Tensor& src, bool non_blocking)
--- a/aten/src/ATen/native/mps/operations/Distributions.mm
+++ b/aten/src/ATen/native/mps/operations/Distributions.mm
@ -127,6 +127,62 @@ Tensor& normal_mps_(Tensor& self, double mean, double std, c10::optional<Generat
  return normal_mps_out(mean_t, std_t, gen, self);
 }

+Tensor normal_mps(const Tensor& mean, double std, c10::optional<Generator> gen) {
+  Tensor output = empty_mps(
+                      mean.sizes(),
+                      mean.scalar_type(),
+                      c10::nullopt,
+                      kMPS,
+                      c10::nullopt,
+                      c10::nullopt);
+
+  Tensor std_t = empty_mps(
+                      output.sizes(),
+                      output.scalar_type(),
+                      c10::nullopt,
+                      kMPS,
+                      c10::nullopt,
+                      c10::nullopt);
+  std_t.fill_(std);
+
+  return normal_mps_out(mean, std_t, gen, output);
+}
+
+Tensor normal_mps(double mean, const Tensor& std, c10::optional<Generator> gen) {
+  Tensor output = empty_mps(
+                      std.sizes(),
+                      std.scalar_type(),
+                      c10::nullopt,
+                      kMPS,
+                      c10::nullopt,
+                      c10::nullopt);
+
+  Tensor mean_t = empty_mps(
+                      output.sizes(),
+                      output.scalar_type(),
+                      c10::nullopt,
+                      kMPS,
+                      c10::nullopt,
+                      c10::nullopt);
+  mean_t.fill_(mean);
+
+  return normal_mps_out(mean_t, std, gen, output);
+}
+
+Tensor normal_mps(const Tensor& mean, const Tensor& std, c10::optional<Generator> gen) {
+  auto shape = at::infer_size(mean.sizes(), std.sizes());
+
+  Tensor output = empty_mps(
+                      shape,
+                      mean.scalar_type(),
+                      c10::nullopt,
+                      kMPS,
+                      c10::nullopt,
+                      c10::nullopt);
+
+  return normal_mps_out(mean, std, gen, output);
+}
+
 Tensor& normal_mps_out(const Tensor& mean, double std, c10::optional<Generator> gen, Tensor& output) {
  TORCH_CHECK(std >= 0.0, "normal_mps_out expects std >= 0.0, but found std=", std);

@ -185,7 +241,7 @@ Tensor& normal_mps_out(const Tensor& mean, const Tensor& std, c10::optional<Gene

  @autoreleasepool {
    MPSShape* input_shape = getMPSShape(output);
-    string key = "normal_mps_out:" + getMPSShapeString(input_shape) + ":" + getMPSTypeString(output.scalar_type());
+    string key = "normal_mps_out:" + getMPSShapeString(input_shape) + ":" + getMPSTypeString(output.scalar_type()) + ":" + to_string(seed_);
    CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));

    if(!cachedGraph) {
@ -210,6 +266,7 @@ Tensor& normal_mps_out(const Tensor& mean, const Tensor& std, c10::optional<Gene
          // MPSGenerator
          MPSGraphTensor* randomTensor = [mpsGraph randomTensorWithShape:input_shape
                                                              descriptor:desc
+                                                                    seed:seed_
                                                                    name:nil];
          MPSGraphTensor* scaleTensor = [mpsGraph multiplicationWithPrimaryTensor:randomTensor
                                                                  secondaryTensor:stdTensor
@ -455,5 +512,69 @@ Tensor& random_mps_
  return random_mps_(self, 0, to, gen);
 }

+// Exponential distribution
+
+Tensor& exponential_mps_(Tensor& self, double lambda, c10::optional<Generator> gen) {
+
+  using namespace mps;
+
+  if (self.numel() == 0) {
+    return self;
+  }
+
+  TORCH_CHECK(lambda > 0, "exponential_mps_: lambda must be greater than zero")
+
+  struct CachedGraph : public MPSCachedGraph
+  {
+    CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
+    MPSGraphTensor *outputTensor_ = nil;
+  };
+
+  MPSGraphCache* cache_ = MPSGraphCache::getInstance();
+
+  MPSStream* stream = getCurrentMPSStream();
+  uint64_t seed_ = c10::detail::getNonDeterministicRandom(true);
+
+  @autoreleasepool {
+    MPSShape* self_shape = getMPSShape(self);
+
+    MPSGraph* mpsGraph = make_mps_graph();
+    // TODO: right now taking the default seed. Extend it to be extracted from the
+    // MPSGenerator
+    MPSGraphTensor* randomTensor = [mpsGraph randomUniformTensorWithShape:self_shape
+                                                                     seed:seed_
+                                                                     name:nil];
+    MPSGraphTensor* unitTensor = [mpsGraph constantWithScalar:1.0f
+                                                     dataType:MPSDataTypeFloat32];
+    MPSGraphTensor* minusLambdaTensor = [mpsGraph constantWithScalar:-lambda
+                                                       dataType:MPSDataTypeFloat32];
+    MPSGraphTensor* subtractTensor = [mpsGraph subtractionWithPrimaryTensor:unitTensor
+                                                            secondaryTensor:randomTensor
+                                                                       name:nil];
+    MPSGraphTensor* logTensor = [mpsGraph logarithmWithTensor:subtractTensor
+                                                         name:nil];
+    MPSGraphTensor* outputTensor = [mpsGraph divisionWithPrimaryTensor:logTensor
+                                                       secondaryTensor:minusLambdaTensor
+                                                                  name:nil];
+
+    if(getMPSDataType(self.scalar_type()) != MPSDataTypeFloat32)
+      outputTensor = [mpsGraph castTensor:outputTensor
+                                   toType:getMPSDataType(self.scalar_type())
+                                     name:@"output"];
+
+    auto outputPlaceholder = Placeholder(outputTensor, self);
+    NSDictionary<MPSGraphTensor *, MPSGraphTensorData *> *feeds = nil;
+    NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* results = @{
+      outputPlaceholder.getMPSGraphTensor() : outputPlaceholder.getMPSGraphTensorData()
+    };
+
+    runMPSGraph(stream, mpsGraph, feeds, results);
+
+  }
+
+  return self;
+
+}
+
 } // namespace native
 } // namespace at
--- a/aten/src/ATen/native/mps/operations/Equal.cpp
+++ b/aten/src/ATen/native/mps/operations/Equal.cpp
@ -0,0 +1,37 @@
+#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
+#include <ATen/core/Tensor.h>
+#include <ATen/NamedTensorUtils.h>
+
+#ifndef AT_PER_OPERATOR_HEADERS
+#include <ATen/NativeFunctions.h>
+#include <ATen/MPSFunctions.h>
+#else
+#include <ATen/ops/eq_mps_dispatch.h>
+#include <ATen/ops/equal_native.h>
+#endif
+
+namespace at {
+namespace mps {
+TORCH_API at::Tensor eq(const at::Tensor & self, const at::Tensor & other);
+} // namespace
+namespace native {
+
+bool mps_equal(const Tensor& self, const Tensor &src) {
+  if (!at::namedinference::are_names_equal(
+          self.unsafeGetTensorImpl(), src.unsafeGetTensorImpl())) {
+    return false;
+  }
+  at::NoNamesGuard guard;
+  TORCH_CHECK(self.device() == src.device(), "Cannot compare two tensors on "
+              "different devices. Got: ", self.device(), " and ", src.device());
+  if (self.sizes() != src.sizes()) {
+    return false;
+  }
+  if (self.numel() == 0) {
+    return true;
+  }
+  return at::mps::eq(self, src).all().item().to<bool>();
+}
+
+} // namespace native
+} // namespace at
--- a/aten/src/ATen/native/mps/operations/Eye.mm
+++ b/aten/src/ATen/native/mps/operations/Eye.mm
@ -0,0 +1,120 @@
+#include <ATen/ATen.h>
+#include <ATen/Tensor.h>
+#include <ATen/Utils.h>
+#include <ATen/mps/MPSStream.h>
+#include <ATen/native/mps/OperationUtils.h>
+#include <torch/library.h>
+#include <c10/util/Optional.h>
+
+
+// Steps to add op for MPS backend:
+// 1. Register the op in aten/src/ATen/native/native_functions.yaml with the "MPS" dispatch key
+// 2. Define the function interface for the MPS backend similar to other
+//    backends depending on whether its structured or non-structured
+// 3. Add boiler-plate error checking code as expected for the Op
+// 4. The code structure roughly follows the pattern
+//    a) get the MPS stream handle to encode work onto
+//    b) get an instance of MPSGraphCache and create a key unique to the Graph
+//       needed for implementing this Op. Any shape, dataType or parameter
+//       passed to the MPSGraph during its construction will need to be included
+//       here.
+//    c) Create the graph using make_mps_graph() and add operations to the
+//       instance of MPSGraph. This is if the Cache->lookup() fails.
+//    d) Store the MPSGraphTensors for inputs and output which are needed at
+//       runtime.
+//    e) Use the CachedGraph instance's inputs and output to create Placeholders
+//       You will need to pass in Tensor to create MPSGraphTensorData objects.
+//    f) Using MPSGraphTensor and MPSGraphTensorData instances create a feeds
+//       dictionary.
+//    g) Then call runMPSGraph() with input params and return the result.
+//
+
+
+namespace at {
+namespace native {
+
+Tensor& eye_out_mps(int64_t n, Tensor& result) {
+  // the default value of `m` equals to `n`
+  return eye_out_mps(n, n, result);
+}
+
+Tensor& eye_out_mps(int64_t n, int64_t m, Tensor& result) {
+
+  // This is one example of boiler-plate error checking, taking after CPU/CUDA counterparts
+  TORCH_CHECK(n >= 0, "n must be greater or equal to 0, got ", n);
+  TORCH_CHECK(m >= 0, "m must be greater or equal to 0, got ", m);
+
+  result.resize_({n, m});
+  result.zero_();
+
+  // Handle empty outputs
+  if(result.numel() == 0)
+    return result;
+
+  // Get MPS stream
+  using namespace mps;
+  MPSStream* stream = getCurrentMPSStream();
+
+  // Derive from MPSCachedGraph
+  // This structure is used to cache an MPSGraph with certain keys, so that we don't have to compile the same MPSGraph time and time again for the same operation
+  // The keys of this structure are based on the inputs and outputs needed for the operation
+  // Here, we don't have any input tensors, just an output tensor
+  struct CachedGraph : public MPSCachedGraph
+  {
+    CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
+    MPSGraphTensor* outputTensor_ = nil;
+  };
+
+  MPSGraphCache* cache_ = MPSGraphCache::getInstance();
+
+  @autoreleasepool {
+    // A key is used to identify the MPSGraph which was created once, and can be reused if the parameters, data types etc match the earlier created MPSGraph
+    string key = "eye_out_mps:" + getTensorsStringKey({result});
+    CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+    if(!cachedGraph) {
+      MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ MPSCachedGraph * () {
+
+        CachedGraph *newCachedGraph = nil;
+
+        @autoreleasepool {
+          // Initialize graph
+          MPSGraph* mpsGraph = make_mps_graph();
+          newCachedGraph = new CachedGraph(mpsGraph);
+          MPSGraphTensor* onesTensor = [mpsGraph constantWithScalar:1.0f
+                                                              shape:getMPSShape(result)
+                                                           dataType:getMPSDataType(result.scalar_type())];
+
+          // Here we can call the MPSGraph API needed to execute the operation.
+          // The API details can be found here: https://developer.apple.com/documentation/metalperformanceshadersgraph/mpsgraph
+          MPSGraphTensor* outputTensor = [mpsGraph bandPartWithTensor:onesTensor
+                                                             numLower:0
+                                                             numUpper:0
+                                                                 name:nil];
+          newCachedGraph->outputTensor_ = outputTensor;
+        }
+        return newCachedGraph;
+      });
+      cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
+    }
+
+    // Create placeholders which use the keys of the CachedGraph to create inputs and outputs of the operation
+    Placeholder outputPlaceholder = Placeholder(cachedGraph->outputTensor_, result);
+
+    // Create dictionary of inputs/feeds and outputs/results
+    // In this case, there are no inputs, so the feeds are nil
+    NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* feeds = nil;
+
+    NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* results = @{
+      outputPlaceholder.getMPSGraphTensor() : outputPlaceholder.getMPSGraphTensorData()
+    };
+
+    // Run the graph
+    runMPSGraph(stream, cachedGraph->graph(), feeds, results);
+  }
+
+  return result;
+}
+
+
+}
+}
--- a/aten/src/ATen/native/mps/operations/Indexing.mm
+++ b/aten/src/ATen/native/mps/operations/Indexing.mm
@ -9,6 +9,7 @@
 #include <ATen/ExpandUtils.h>
 #include <ATen/MemoryOverlap.h>
 #include <ATen/mps/MPSStream.h>
+#include <ATen/WrapDimUtilsMulti.h>
 #include <ATen/native/LinearAlgebraUtils.h>
 #include <ATen/native/mps/OperationUtils.h>
 #include <ATen/native/Resize.h>
@ -18,6 +19,7 @@
 #include <ATen/native/IndexingUtils.h>
 #include <c10/util/irange.h>
 #include <c10/core/QScheme.h>
+#include <c10/util/SmallVector.h>

 #ifdef __OBJC__
 #include <MetalPerformanceShaders/MetalPerformanceShaders.h>
@ -26,28 +28,114 @@
 namespace at {
 namespace native {

+Tensor flip_mps(const Tensor& self, IntArrayRef dims) {
+  using namespace mps;
+
+  Tensor result = at::native::empty_mps(
+                    self.sizes(),
+                    self.scalar_type(),
+                    c10::nullopt,
+                    kMPS,
+                    c10::nullopt,
+                    c10::nullopt);
+
+  auto total_dims = self.dim();
+  // It wraps the dims and checks that there are no repeated dims
+  auto flip_dims_b = at::dim_list_to_bitset(dims, total_dims);
+  NSMutableArray<NSNumber*> * ns_dims = [NSMutableArray<NSNumber*> new];
+
+  for (const auto i : c10::irange(total_dims)) {
+    if(flip_dims_b[i] && self.size(i) > 1 && self.stride(i) != 0) {
+      [ns_dims addObject:[NSNumber numberWithInt:i]];
+    }
+  }
+
+  // Nothing to do, we return fast
+  if (dims.size() == 0 || self.numel() <=1) {
+    result.copy_(self);
+    return result;
+  }
+
+  MPSStream* stream = getCurrentMPSStream();
+
+  struct CachedGraph : public MPSCachedGraph
+  {
+    CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
+    MPSGraphTensor* inputTensor_ = nil;
+    MPSGraphTensor* outputTensor_ = nil;
+  };
+
+  MPSGraphCache* cache_ = MPSGraphCache::getInstance();
+
+  @autoreleasepool {
+    NSString* ns_dims_key = [[ns_dims valueForKey:@"description"] componentsJoinedByString:@","];
+    // A key is used to identify the MPSGraph which was created once, and can be reused if the parameters, data types etc match the earlier created MPSGraph
+    string key = "flip_mps:" + getTensorsStringKey({self}) + ":" + string([ns_dims_key UTF8String]);
+    CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+    if(!cachedGraph) {
+      MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ MPSCachedGraph * () {
+
+        CachedGraph *newCachedGraph = nil;
+
+        @autoreleasepool {
+          MPSGraph* mpsGraph = make_mps_graph();
+          newCachedGraph = new CachedGraph(mpsGraph);
+
+          MPSGraphTensor* inputTensor = mpsGraphRankedPlaceHolder(mpsGraph, self);
+          MPSGraphTensor* outputTensor = [mpsGraph reverseTensor:inputTensor
+                                                            axes:ns_dims
+                                                            name:nil];
+          newCachedGraph->inputTensor_ = inputTensor;
+          newCachedGraph->outputTensor_ = outputTensor;
+        }
+        return newCachedGraph;
+      });
+      cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
+    }
+
+    // Create placeholders which use the keys of the CachedGraph to create inputs and outputs of the operation
+    Placeholder inputPlaceholder = Placeholder(cachedGraph->inputTensor_, self);
+    Placeholder outputPlaceholder = Placeholder(cachedGraph->outputTensor_, result);
+
+
+    NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* feeds = @{
+      inputPlaceholder.getMPSGraphTensor() : inputPlaceholder.getMPSGraphTensorData()
+    };
+
+    NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* results = @{
+      outputPlaceholder.getMPSGraphTensor() : outputPlaceholder.getMPSGraphTensorData()
+    };
+
+    // Run the graph
+    runMPSGraph(stream, cachedGraph->graph(), feeds, results);
+  }
+
+  return result;
+
+}
+
 Tensor index_select_mps(const Tensor & self,
                         int64_t dim,
                         const Tensor & index) {
  IntArrayRef input_shape = self.sizes();
  auto num_input_dims = input_shape.size();

-  IntArrayRef index_shape = index.sizes();
  auto num_indices = index.numel();
  TORCH_CHECK_INDEX(index.dim() <= 1, "index_select(): Index is supposed to be a vector");

  dim = maybe_wrap_dim(dim, self.dim());
-  int64_t* shape_data = (int64_t*)malloc(num_input_dims * sizeof(int64_t));
+  std::vector<int64_t> shape_data(num_input_dims);

  // Calculate new shape
-  for(int i = 0; i < num_input_dims; i++) {
-    if(i == dim)
+  for(auto i : c10::irange(num_input_dims)) {
+    if (i == dim) {
      shape_data[i] = num_indices;
-    else
+    } else {
      shape_data[i] = input_shape[i];
+    }
  }

-  IntArrayRef output_shape = IntArrayRef(shape_data, num_input_dims);
+  IntArrayRef output_shape = IntArrayRef(shape_data.data(), num_input_dims);

  Tensor result = at::native::empty_mps(
                      output_shape,
@ -57,8 +145,6 @@ Tensor index_select_mps(const Tensor & self,
                      c10::nullopt,
                      c10::nullopt);

-  free(shape_data);
-
  index_select_out_mps(self, dim, index, result);
  return result;
 }
@ -245,13 +331,10 @@ Tensor embedding_dense_backward_mps(
    IntArrayRef indices_shape = indices.sizes();
    int64_t num_indices_dims = indices_shape.size();

-    int64_t* outgoing_gradient_shape = (int64_t *) malloc(sizeof(int64_t) * 2);
    int64_t D = incoming_gradient_shape[num_incoming_gradient_dims - 1];
-    outgoing_gradient_shape[0] = num_weights;
-    outgoing_gradient_shape[1] = D;
-    int64_t num_outgoing_gradient_dims = 2;
+    c10::SmallVector<int64_t, 2> outgoing_gradient_shape{num_weights, D};
    Tensor outgoing_gradient = at::native::empty_mps(
-                                IntArrayRef(outgoing_gradient_shape, num_outgoing_gradient_dims),
+                                IntArrayRef(outgoing_gradient_shape.data(), outgoing_gradient_shape.size()),
                                grad_.scalar_type(),
                                c10::nullopt,
                                kMPS,
@ -288,7 +371,7 @@ Tensor embedding_dense_backward_mps(
            MPSGraphTensor *outgoingGradTensor;
            outgoingGradTensor = [mpsGraph scatterNDWithUpdatesTensor:incomingGradTensor
                            indicesTensor:reshapedIndicesTensor
-                                    shape:native_mps::getMPSShape(IntArrayRef(outgoing_gradient_shape, num_outgoing_gradient_dims))
+                                    shape:native_mps::getMPSShape(IntArrayRef(outgoing_gradient_shape.data(), outgoing_gradient_shape.size()))
                          batchDimensions:0
                                     mode:MPSGraphScatterModeAdd
                                     name:@"edb"];
@ -316,7 +399,6 @@ Tensor embedding_dense_backward_mps(
      };
      native_mps::runMPSGraph(stream, cachedGraph->graph(), feeds, results);
    }
-    free(outgoing_gradient_shape);
    return outgoing_gradient;
 }

--- a/aten/src/ATen/native/mps/operations/Linear.mm
+++ b/aten/src/ATen/native/mps/operations/Linear.mm
@ -242,9 +242,9 @@ std::tuple<Tensor, Tensor> _mps_linear_backward_weights(
    MPSGraphTensor *biasTensor_ = nil;
  };

-  auto grad_output_reshaped = grad_output.dim() > 2 ?
+  auto grad_output_reshaped = grad_output.dim() != 2 ?
    grad_output.reshape({-1, grad_output.size(grad_output.dim() - 1)}) : grad_output;
-  auto input_reshaped = input.dim() > 2 ? input.reshape({-1, input.size(input.dim() - 1)}) : input;
+  auto input_reshaped = input.dim() != 2 ? input.reshape({-1, input.size(input.dim() - 1)}) : input;

  TORCH_CHECK(grad_output_reshaped.is_mps());
  TORCH_CHECK(input_reshaped.is_mps());
--- a/aten/src/ATen/native/mps/operations/LinearAlgebra.mm
+++ b/aten/src/ATen/native/mps/operations/LinearAlgebra.mm
@ -338,12 +338,9 @@ Tensor& addmm_out_mps_impl(
      cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
    }

-    Placeholder selfPlaceholder = Placeholder(cachedGraph->selfTensor_, self,
-    nullptr, true);
-    Placeholder otherPlaceholder = Placeholder(cachedGraph->otherTensor_, other,
-    nullptr, true);
-    Placeholder biasPlaceholder = Placeholder(cachedGraph->biasTensor_, bias,
-    nullptr, false);
+    Placeholder selfPlaceholder = Placeholder(cachedGraph->selfTensor_, self);
+    Placeholder otherPlaceholder = Placeholder(cachedGraph->otherTensor_, other);
+    Placeholder biasPlaceholder = Placeholder(cachedGraph->biasTensor_, bias);
    Placeholder outputPlaceholder = Placeholder(cachedGraph->outputTensor_, output);

    NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* feeds = @{
--- a/aten/src/ATen/native/mps/operations/LossOps.mm
+++ b/aten/src/ATen/native/mps/operations/LossOps.mm
@ -277,7 +277,7 @@ Tensor& bce_loss_out_impl(const Tensor& input, const Tensor& target,
                            newCachedGraph->gradInputTensor = bceLoss;
                        }
                    } else {
-                        newCachedGraph->lossTensor = reduceTensor(bceLoss, reduction, mpsGraph, input.sizes().size());
+                        newCachedGraph->lossTensor = reduceTensor(bceLoss, reduction, mpsGraph, input_squeezed.sizes().size());
                    }
                }
                return newCachedGraph;
@ -288,6 +288,7 @@ Tensor& bce_loss_out_impl(const Tensor& input, const Tensor& target,
        Placeholder lossPlaceholder   = Placeholder(cachedGraph->lossTensor, loss_squeezed);

        NSMutableDictionary *feeds = [[NSMutableDictionary new] autorelease];
+
        feeds[inputPlaceholder.getMPSGraphTensor()] = inputPlaceholder.getMPSGraphTensorData();
        feeds[targetPlaceholder.getMPSGraphTensor()] = targetPlaceholder.getMPSGraphTensorData();
        if (weight.defined()) {
@ -1003,7 +1004,7 @@ void smooth_l1_loss_backward_impl(

    NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* feeds = @{
      inputPlaceholder.getMPSGraphTensor() : inputPlaceholder.getMPSGraphTensorData(),
-      targetPlaceholder.getMPSGraphTensor() : targetPlaceholder .getMPSGraphTensorData()
+      targetPlaceholder.getMPSGraphTensor() : targetPlaceholder.getMPSGraphTensorData()
    };
    NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* results = @{
      gradInputPlaceholder.getMPSGraphTensor() : gradInputPlaceholder.getMPSGraphTensorData()
@ -1034,6 +1035,244 @@ void smooth_l1_loss_backward_template(

 // APIs exposed to at::native scope

+// HuberLoss
+
+Tensor& huber_loss_out_mps(const Tensor& input, const Tensor& target, int64_t reduction, double delta, Tensor& output){
+    string op_name = __func__;
+    using namespace mps;
+    TORCH_CHECK(delta > 0, "huber_loss does not support non-positive values for delta.")
+    TORCH_CHECK(target.is_same_size(input), op_name + ": target and input tensors must have identical shapes")
+    TORCH_CHECK(output.is_mps());
+
+    if(reduction == Reduction::None)
+        output.resize_(target.sizes());
+    if(reduction == Reduction::Sum)
+        output.resize_({});
+    if(reduction == Reduction::Mean)
+        output.resize_({});
+
+    struct CachedGraph : public MPSCachedGraph
+    {
+        CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
+        MPSGraphTensor* inputTensor_ = nil;
+        MPSGraphTensor* targetTensor_ = nil;
+        MPSGraphTensor* outputTensor_ = nil;
+    };
+    MPSGraphCache* cache_ = MPSGraphCache::getInstance();
+
+    MPSStream* stream = getCurrentMPSStream();
+
+    @autoreleasepool {
+        string key = op_name + ":" + reductionToString(reduction) + ":" + std::to_string(delta) + ":" + getTensorsStringKey({input, target});
+        CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+        if(!cachedGraph) {
+            MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ MPSCachedGraph * () {
+
+                CachedGraph *newCachedGraph = nil;
+
+                @autoreleasepool {
+                    MPSGraph* mpsGraph = make_mps_graph();
+                    newCachedGraph = new CachedGraph(mpsGraph);
+
+                    MPSGraphTensor* inputTensor = mpsGraphRankedPlaceHolder(mpsGraph, input);
+                    MPSGraphTensor* targetTensor = mpsGraphRankedPlaceHolder(mpsGraph, target);
+                    MPSGraphTensor* deltaTensor = [mpsGraph constantWithScalar:delta
+                                                                             shape:@[@1]
+                                                                          dataType:MPSDataTypeFloat32];
+                    MPSGraphTensor* halfTensor = [mpsGraph constantWithScalar:.5f
+                                                                             shape:@[@1]
+                                                                          dataType:MPSDataTypeFloat32];
+
+                    MPSGraphTensor* diffTensor = [mpsGraph subtractionWithPrimaryTensor: inputTensor
+                                                                        secondaryTensor: targetTensor
+                                                                                   name: nil];
+                    MPSGraphTensor* absDiffTensor = [mpsGraph absoluteWithTensor: diffTensor
+                                                                            name: nil];
+                    MPSGraphTensor* firstCondTensor = [mpsGraph multiplicationWithPrimaryTensor: absDiffTensor
+                                                                secondaryTensor: absDiffTensor
+                                                                            name: nil];
+                    firstCondTensor = [mpsGraph multiplicationWithPrimaryTensor: firstCondTensor
+                                                                secondaryTensor: halfTensor
+                                                                            name: nil];
+                    MPSGraphTensor* secondCondTensor = [mpsGraph multiplicationWithPrimaryTensor: deltaTensor
+                                                                secondaryTensor: halfTensor
+                                                                            name: nil];
+                    secondCondTensor = [mpsGraph subtractionWithPrimaryTensor: absDiffTensor
+                                                                secondaryTensor: secondCondTensor
+                                                                            name: nil];
+                    secondCondTensor = [mpsGraph multiplicationWithPrimaryTensor: deltaTensor
+                                                                secondaryTensor: secondCondTensor
+                                                                            name: nil];
+                    MPSGraphTensor* outputTensor = [mpsGraph selectWithPredicateTensor: [mpsGraph lessThanOrEqualToWithPrimaryTensor:absDiffTensor
+                                                                                                                     secondaryTensor:deltaTensor
+                                                                                                                                name:nil]
+                                                                 truePredicateTensor: firstCondTensor
+                                                                falsePredicateTensor: secondCondTensor
+                                                                                name:nil];
+
+                    newCachedGraph->inputTensor_ = inputTensor;
+                    newCachedGraph->targetTensor_ = targetTensor;
+                    newCachedGraph->outputTensor_ = reduceTensor(outputTensor, reduction, mpsGraph, input.sizes().size());
+                }
+                return newCachedGraph;
+            });
+            cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
+        }
+        Placeholder inputPlaceholder  = Placeholder(cachedGraph->inputTensor_, input);
+        Placeholder targetPlaceholder = Placeholder(cachedGraph->targetTensor_, target);
+        Placeholder outputPlaceholder = Placeholder(cachedGraph->outputTensor_, output);
+
+        // Create dictionary of inputs and outputs
+        NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* feeds = @{
+            inputPlaceholder.getMPSGraphTensor() : inputPlaceholder.getMPSGraphTensorData(),
+            targetPlaceholder.getMPSGraphTensor() : targetPlaceholder.getMPSGraphTensorData()
+        };
+        NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* results = @{
+            outputPlaceholder.getMPSGraphTensor() : outputPlaceholder.getMPSGraphTensorData()
+        };
+
+        runMPSGraph(getCurrentMPSStream(), cachedGraph->graph(), feeds, results);
+    }
+    return output;
+}
+
+Tensor huber_loss_mps(const Tensor& input, const Tensor& target, int64_t reduction, double delta) {
+  TORCH_CHECK(delta > 0, "huber_loss does not support non-positive values for delta.");
+  Tensor output = at::native::empty_mps(
+                      input.sizes(),
+                      input.scalar_type(),
+                      c10::nullopt,
+                      kMPS,
+                      c10::nullopt,
+                      c10::nullopt);
+  return huber_loss_out_mps(input, target, reduction, delta, output);
+}
+
+Tensor& huber_loss_backward_out_mps(
+    const Tensor& grad_output,
+    const Tensor& input,
+    const Tensor& target,
+    int64_t reduction,
+    double delta,
+    Tensor& grad_input)
+{
+    using namespace mps;
+    auto is_mean_reduction = reduction == Reduction::Mean;
+    auto input_numel = input.numel();
+
+    auto new_grad_output = grad_output.contiguous();
+
+    struct CachedGraph : public MPSCachedGraph
+    {
+        CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
+        MPSGraphTensor *gradOutputTensor_ = nil;
+        MPSGraphTensor *inputTensor_ = nil;
+        MPSGraphTensor *targetTensor_ = nil;
+        MPSGraphTensor *outputTensor_ = nil;
+    };
+
+    MPSGraphCache *cache_ = MPSGraphCache::getInstance();
+
+    MPSStream *stream= getCurrentMPSStream();
+
+    @autoreleasepool {
+        MPSShape* input_shape = getMPSShape(input);
+        NSString* ns_shape_key = [[input_shape valueForKey:@"description"] componentsJoinedByString:@","];
+
+        string key = "huber_loss_backward_out_mps:" + reductionToString(reduction) + ":" +
+                                            std::to_string(delta) + ":" +
+                                            [ns_shape_key UTF8String] + ":" +
+                                            getMPSTypeString(input.scalar_type()) + ":" +
+                                            getMPSTypeString(target.scalar_type());
+        CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+        if(!cachedGraph) {
+            cachedGraph = static_cast<CachedGraph*>(cache_->CreateCachedGraph(key, ^ MPSCachedGraph * () {
+
+                CachedGraph *newCachedGraph = nil;
+
+                @autoreleasepool {
+                    // Initialize graph
+                    MPSGraph* mpsGraph = make_mps_graph();
+                    newCachedGraph = new CachedGraph(mpsGraph);
+                    MPSGraphTensor* gradOutputTensor = mpsGraphRankedPlaceHolder(mpsGraph, getMPSDataType(new_grad_output.scalar_type()), getMPSShape(new_grad_output));
+                    MPSGraphTensor* inputTensor = mpsGraphRankedPlaceHolder(mpsGraph, getMPSDataType(input.scalar_type()), input_shape);
+                    MPSGraphTensor* targetTensor = mpsGraphRankedPlaceHolder(mpsGraph, getMPSDataType(target.scalar_type()), getMPSShape(target));
+                    MPSGraphTensor* isMeanReductionTensor = [mpsGraph constantWithScalar:is_mean_reduction
+                                                                                dataType:MPSDataTypeInt64]; // constant does not support MPSDataTypeBool
+                    MPSGraphTensor* inputNumelTensor = [mpsGraph constantWithScalar:input_numel
+                                                                           dataType:getMPSDataType(new_grad_output.scalar_type())];
+
+                    MPSGraphTensor* normGradOutputTensor = [mpsGraph selectWithPredicateTensor:isMeanReductionTensor
+                                                               truePredicateTensor: [mpsGraph divisionWithPrimaryTensor:gradOutputTensor
+                                                                                                        secondaryTensor:inputNumelTensor
+                                                                                                                   name:nil]
+                                                              falsePredicateTensor: gradOutputTensor
+                                                                              name:nil];
+                    MPSGraphTensor* deltaTensor = [mpsGraph constantWithScalar:delta
+                                                                         shape:getMPSShape(target)
+                                                                      dataType:MPSDataTypeFloat32];
+                    MPSGraphTensor* diffTensor = [mpsGraph subtractionWithPrimaryTensor:inputTensor
+                                                                        secondaryTensor:targetTensor
+                                                                                   name:nil];
+                    MPSGraphTensor* normGradOutputDeltaTensor = [mpsGraph multiplicationWithPrimaryTensor:normGradOutputTensor
+                                                                                          secondaryTensor:deltaTensor
+                                                                                                     name:nil];
+                    // first condition: (input - target) <= -delta
+                    // formula: -norm * grad_output * delta
+                    MPSGraphTensor* firstCondTensor = [mpsGraph negativeWithTensor: normGradOutputDeltaTensor
+                                                                              name: nil];
+                    // second condition: (input - target) >= delta
+                    // formula: norm * grad_output * delta
+                    MPSGraphTensor* secondCondTensor = normGradOutputDeltaTensor;
+
+                    // third condition: (input - target) within -delta to delta
+                    // formula: norm * (input - target) * grad_output
+                    MPSGraphTensor* thirdCondTensor = [mpsGraph multiplicationWithPrimaryTensor:normGradOutputTensor
+                                                                                secondaryTensor:diffTensor
+                                                                                           name:nil];
+
+                    MPSGraphTensor* secondThirdTensor = [mpsGraph selectWithPredicateTensor: [mpsGraph greaterThanOrEqualToWithPrimaryTensor:diffTensor
+                                                                                                                             secondaryTensor:deltaTensor
+                                                                                                                                        name:nil]
+                                                                        truePredicateTensor: secondCondTensor
+                                                                       falsePredicateTensor: thirdCondTensor
+                                                                                       name:nil];
+                    MPSGraphTensor* outputTensor = [mpsGraph selectWithPredicateTensor: [mpsGraph lessThanOrEqualToWithPrimaryTensor: diffTensor
+                                                                                                                     secondaryTensor:[mpsGraph negativeWithTensor: deltaTensor
+                                                                                                                                                             name: nil]
+                                                                                                                                name:nil]
+                                                                   truePredicateTensor: firstCondTensor
+                                                                  falsePredicateTensor: secondThirdTensor
+                                                                                  name:nil];
+
+                    newCachedGraph->gradOutputTensor_ = gradOutputTensor;
+                    newCachedGraph->inputTensor_ = inputTensor;
+                    newCachedGraph->targetTensor_ = targetTensor;
+                    newCachedGraph->outputTensor_ = outputTensor;
+                }
+                return newCachedGraph;
+            }));
+        }
+
+        Placeholder gradOutputPlaceholder = Placeholder(cachedGraph->gradOutputTensor_, new_grad_output);
+        Placeholder inputPlaceholder = Placeholder(cachedGraph->inputTensor_, input);
+        Placeholder targetPlaceholder = Placeholder(cachedGraph->targetTensor_, target);
+        Placeholder outputPlaceholder = Placeholder(cachedGraph->outputTensor_, grad_input);
+
+        NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* feeds = @{
+          gradOutputPlaceholder.getMPSGraphTensor() : gradOutputPlaceholder.getMPSGraphTensorData(),
+          inputPlaceholder.getMPSGraphTensor() : inputPlaceholder.getMPSGraphTensorData(),
+          targetPlaceholder.getMPSGraphTensor() : targetPlaceholder.getMPSGraphTensorData()
+        };
+        NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* results = @{
+          outputPlaceholder.getMPSGraphTensor() : outputPlaceholder.getMPSGraphTensorData()
+        };
+
+        runMPSGraph(stream, cachedGraph->graph(), feeds, results);
+    }
+    return grad_input;
+}
+
 // MSELoss
 TORCH_IMPL_FUNC(mse_loss_out_mps) (
        const Tensor& input, const Tensor& target, int64_t reduction, const Tensor& output) {
--- a/aten/src/ATen/native/mps/operations/Normalization.mm
+++ b/aten/src/ATen/native/mps/operations/Normalization.mm
@ -7,6 +7,7 @@
 #include <ATen/mps/MPSStream.h>
 #include <ATen/native/mps/OperationUtils.h>
 #include <ATen/native/Pool.h>
+#include <ATen/native/layer_norm.h>
 #include <torch/library.h>

 namespace at {
@ -69,7 +70,6 @@ std::tuple<Tensor&, Tensor&, Tensor&> batch_norm_mps_out
                    Tensor& save_var) {

  namespace native_mps = at::native::mps;
-
  struct CachedGraph : public native_mps::MPSCachedGraph
  {
    CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
@ -800,5 +800,426 @@ std::tuple<Tensor, Tensor, Tensor> batch_norm_backward_mps

 }

+// Layer norm forward for MPS
+std::tuple<Tensor, Tensor, Tensor> layer_norm_mps(
+    const Tensor& input,
+    IntArrayRef normalized_shape,
+    const c10::optional<Tensor>& weight_opt,
+    const c10::optional<Tensor>& bias_opt,
+    double eps) {
+
+  c10::MaybeOwned<Tensor> weight_maybe_owned = at::borrow_from_optional_tensor(weight_opt);
+  const Tensor& weight = *weight_maybe_owned;
+  c10::MaybeOwned<Tensor> bias_maybe_owned = at::borrow_from_optional_tensor(bias_opt);
+  const Tensor& bias = *bias_maybe_owned;
+
+  auto M_N = _check_layer_norm_inputs(input, normalized_shape, weight, bias);
+  auto M = M_N.first;
+  auto X = input.expect_contiguous();
+  auto gamma = weight.expect_contiguous();
+
+  auto input_shape = input.sizes();
+  const auto input_ndim = input.dim();
+  const int normalized_ndim = normalized_shape.size();
+  // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions)
+  const int axis = input_ndim - normalized_ndim;
+  at::Tensor input_reshaped = input.view({1, M, -1});
+  // Unlike Batch Normalization, which applies scalar scale and bias for each
+  // entire channel/plane with the affine option, Layer Normalization applies
+  // per-element scale and bias. E.g. For input {N, C, H, W}, weight for
+  // batchnorm has shape {C} while weight for layernorm has shape {H, W} or {W}.
+  auto outputs = at::native_batch_norm(
+      input_reshaped, /*weight=*/{}, /*bias=*/{}, /*running_mean=*/{},
+      /*running_var=*/{}, /*training=*/true, /*momentum=*/0, eps);
+  at::Tensor out = std::get<0>(outputs);
+  out = out.view(input_shape);
+  if (weight.defined() && bias.defined()) {
+    out = bias.addcmul(out, weight, 1);
+  } else if (weight.defined()) {
+    out = out.mul(weight);
+  } else if (bias.defined()) {
+    out = out.add(bias);
+  }
+  at::Tensor mean = std::get<1>(outputs);
+  at::Tensor variance = std::get<2>(outputs);
+
+  at::Tensor rstd = at::rsqrt(at::add(variance, eps));
+
+  std::vector<int64_t> stat_shape;
+  for (const auto idx : c10::irange(axis)) {
+    stat_shape.push_back(input_shape[idx]);
+  }
+  for (const auto idx : c10::irange(axis, input.dim())) {
+    (void)idx; // Suppress unused variable
+    stat_shape.push_back(1);
+  }
+  mean = mean.view(stat_shape);
+  rstd = rstd.view(stat_shape);
+  return std::make_tuple(out, mean, rstd);
+}
+
+std::tuple<Tensor, Tensor, Tensor> layer_norm_backward_mps(
+    const Tensor& grad_out,
+    const Tensor& input,
+    IntArrayRef normalized_shape,
+    const Tensor& mean,
+    const Tensor& rstd,
+    const c10::optional<Tensor>& weight_opt /* optional */,
+    const c10::optional<Tensor>& bias_opt /* optional */,
+    std::array<bool, 3> grad_input_mask) {
+
+  c10::MaybeOwned<Tensor> weight_maybe_owned =
+      at::borrow_from_optional_tensor(weight_opt);
+  const Tensor& weight = *weight_maybe_owned;
+  c10::MaybeOwned<Tensor> bias_maybe_owned =
+      at::borrow_from_optional_tensor(bias_opt);
+  const Tensor& bias = *bias_maybe_owned;
+
+  auto M_N = _check_layer_norm_inputs(input, normalized_shape, weight, bias);
+  auto M = M_N.first;
+  auto N = M_N.second;
+  auto X = input.expect_contiguous();
+  auto gamma = weight.expect_contiguous();
+  auto beta = bias.expect_contiguous();
+  auto dOut = grad_out.expect_contiguous();
+
+  Tensor grad_input;
+  Tensor grad_weight;
+  Tensor grad_bias;
+  if (grad_input_mask[0]) {
+    grad_input = at::native::empty_like(
+        *X,
+        c10::nullopt /* dtype */,
+        c10::nullopt /* layout */,
+        kMPS /* device */,
+        c10::nullopt /* pin_memory */,
+        at::MemoryFormat::Contiguous);
+  }
+  if (grad_input_mask[1]) {
+    grad_weight = M > 0 ? at::native::empty_like(
+                         *gamma,
+                         c10::nullopt /* dtype */,
+                         c10::nullopt /* layout */,
+                         kMPS /* device */,
+                         c10::nullopt /* pin_memory */,
+                         at::MemoryFormat::Contiguous)
+                   : at::native::zeros_like(
+                         *gamma,
+                         c10::nullopt /* dtype */,
+                         c10::nullopt /* layout */,
+                         kMPS /* device */,
+                         c10::nullopt /* pin_memory */,
+                         at::MemoryFormat::Contiguous);
+  }
+  if (grad_input_mask[2]) {
+    grad_bias = M > 0 ? at::native::empty_like(
+                        *beta,
+                        c10::nullopt /* dtype */,
+                        c10::nullopt /* layout */,
+                        kMPS /* device */,
+                        c10::nullopt /* pin_memory */,
+                        at::MemoryFormat::Contiguous)
+                  : at::native::zeros_like(
+                        *beta,
+                        c10::nullopt /* dtype */,
+                        c10::nullopt /* layout */,
+                        kMPS /* device */,
+                        c10::nullopt /* pin_memory */,
+                        at::MemoryFormat::Contiguous);
+  }
+  if (M > 0) {
+
+    namespace native_mps = at::native::mps;
+
+    // Derive from MPSCachedGraph
+    struct CachedGraph : public native_mps::MPSCachedGraph
+    {
+      CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
+      MPSGraphTensor* gradOutputTensor_ = nil;
+      MPSGraphTensor* inputTensor_ = nil;
+      MPSGraphTensor* weightTensor_ = nil;
+      MPSGraphTensor* meanTensor_ = nil;
+      MPSGraphTensor* rstdTensor_ = nil;
+      MPSGraphTensor* gradInputTensor_ = nil;
+      MPSGraphTensor* gradWeightTensor_ = nil;
+      MPSGraphTensor* gradBiasTensor_ = nil;
+    };
+
+    native_mps::MPSGraphCache* cache_ = native_mps::MPSGraphCache::getInstance();
+
+    auto stream = at::mps::getCurrentMPSStream();
+
+    const bool has_weight = (weight_opt.has_value() && weight_opt->defined());
+
+    if (grad_input.numel() == 0) {
+      return std::make_tuple(grad_input, grad_weight, grad_bias);
+    }
+
+    // const auto memory_format = input.suggest_memory_format();
+
+    @autoreleasepool {
+
+      MPSShape* input_shape = mps::getMPSShape(*X);
+      MPSShape* gamma_shape = mps::getMPSShape(normalized_shape);
+
+      auto num_normalized_dims = [gamma_shape count];
+      auto num_channel_dims = [input_shape count] - num_normalized_dims;
+
+      NSMutableArray<NSNumber*>* gamma_axes = [NSMutableArray<NSNumber*> arrayWithCapacity:num_channel_dims];
+
+      for(int i = 0; i < num_channel_dims; i++)
+        gamma_axes[i] = [NSNumber numberWithInt:i];
+
+      // Axes along which to reduce to get "batch norm" gradient
+      // This will be applied on shape [1, M, -1]
+      NSMutableArray<NSNumber*>* bn_axes = [NSMutableArray<NSNumber*> arrayWithCapacity:num_normalized_dims];
+      for(int i = 0; i < num_normalized_dims; i++)
+        bn_axes[i] = [NSNumber numberWithInt:(1+1+i)];
+
+      // Shape of input to do "batch norm" backward
+      // This is [1, M, -1]
+      NSMutableArray<NSNumber*>* bn_shape = [NSMutableArray<NSNumber*> arrayWithCapacity:(num_normalized_dims+2)];
+      bn_shape[0] = [NSNumber numberWithInt:1];
+      bn_shape[1] = [NSNumber numberWithInt:M];
+      for(int i = 0; i < num_normalized_dims; i++)
+        bn_shape[i+2] = input_shape[i+num_channel_dims];
+
+      // Shape of mean to do "batch norm" backward
+      // This is [1, M, [1,1,1..1]]
+      NSMutableArray<NSNumber*>* bn_mean_shape = [NSMutableArray<NSNumber*> arrayWithCapacity:(num_normalized_dims+2)];
+      bn_mean_shape[0] = [NSNumber numberWithInt:1];
+      bn_mean_shape[1] = [NSNumber numberWithInt:M];
+      for(int i = 0; i < num_normalized_dims; i++)
+        bn_mean_shape[i+2] = [NSNumber numberWithInt:1];
+
+      // Shape of gamma to multiply with "batch norm" backward
+      // This is [1, 1, -1]
+      NSMutableArray<NSNumber*>* bn_gamma_shape = [NSMutableArray<NSNumber*> arrayWithCapacity:(num_normalized_dims+2)];
+      bn_gamma_shape[0] = [NSNumber numberWithInt:1];
+      bn_gamma_shape[1] = [NSNumber numberWithInt:1];
+      for(int i = 0; i < num_normalized_dims; i++)
+        bn_gamma_shape[i+2] = input_shape[i+num_channel_dims];
+
+      string key = "layer_norm_backward_mps:"
+                        + std::to_string(has_weight) + ":"
+                        + native_mps::getArrayRefString(normalized_shape) + ":"
+                        + native_mps::getArrayRefString((*X).sizes()) + ":"
+                        + native_mps::getMPSTypeString((*X).scalar_type());
+
+      CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+
+      if(!cachedGraph) {
+        native_mps::MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ native_mps::MPSCachedGraph * () {
+
+          CachedGraph *newCachedGraph = nil;
+
+          @autoreleasepool {
+            MPSGraph* mpsGraph = native_mps::make_mps_graph();
+            newCachedGraph = new CachedGraph(mpsGraph);
+
+            MPSGraphTensor* inputTensor = native_mps::mpsGraphRankedPlaceHolder(mpsGraph, *X);
+            MPSGraphTensor* gradOutputTensor = native_mps::mpsGraphRankedPlaceHolder(mpsGraph, *dOut);
+            MPSGraphTensor* weightTensor = nil;
+            if(has_weight)
+              weightTensor = native_mps::mpsGraphRankedPlaceHolder(mpsGraph, *gamma);
+
+            // Mean and inv std tensors to be saved and returned
+            MPSGraphTensor* meanTensor = native_mps::mpsGraphRankedPlaceHolder(mpsGraph, mean);
+            MPSGraphTensor* rstdTensor = native_mps::mpsGraphRankedPlaceHolder(mpsGraph, rstd);
+
+            MPSGraphTensor* gradInputTensor = nil;
+            MPSGraphTensor* gradWeightTensor = nil;
+            MPSGraphTensor* gradBiasTensor = nil;
+
+            if(grad_input_mask[1]) {
+              MPSGraphTensor* xMinusMean = [mpsGraph subtractionWithPrimaryTensor:inputTensor
+                                                                  secondaryTensor:meanTensor
+                                                                             name:nil];
+              MPSGraphTensor* bnForwardTensor = [mpsGraph multiplicationWithPrimaryTensor:xMinusMean
+                                                                          secondaryTensor:rstdTensor
+                                                                                     name:nil];
+              MPSGraphTensor* gradBnMulTensor = [mpsGraph multiplicationWithPrimaryTensor:bnForwardTensor
+                                                                          secondaryTensor:gradOutputTensor
+                                                                                     name:nil];
+              gradWeightTensor = [mpsGraph reductionSumWithTensor:gradBnMulTensor
+                                                             axes:gamma_axes
+                                                             name:nil];
+            }
+            if(grad_input_mask[2]) {
+              gradBiasTensor = [mpsGraph reductionSumWithTensor:gradOutputTensor
+                                                           axes:gamma_axes
+                                                           name:nil];
+            }
+            if(grad_input_mask[0]) {
+
+              // Reshape input to [1, M, -1]
+              // Reshape mean and rstd to [1, M, -1]
+              // Reshape gamma to [1, 1, -1] (-1 has N dims)
+
+              MPSGraphTensor* bnInputTensor = [mpsGraph reshapeTensor:inputTensor
+                                                            withShape:bn_shape
+                                                                 name:nil];
+              MPSGraphTensor* bnGradOutputTensor = [mpsGraph reshapeTensor:gradOutputTensor
+                                                                 withShape:bn_shape
+                                                                      name:nil];
+              // Do this at the end
+              if(has_weight) {
+                MPSGraphTensor* bnGammaTensor = [mpsGraph reshapeTensor:weightTensor
+                                                              withShape:bn_gamma_shape
+                                                                   name:nil];
+                bnGradOutputTensor = [mpsGraph multiplicationWithPrimaryTensor:bnGradOutputTensor
+                                                               secondaryTensor:bnGammaTensor
+                                                                          name:nil];
+              }
+              MPSGraphTensor* bnMeanTensor = [mpsGraph reshapeTensor:meanTensor
+                                                           withShape:bn_mean_shape
+                                                                name:nil];
+              MPSGraphTensor* bnRstdTensor = [mpsGraph reshapeTensor:rstdTensor
+                                                           withShape:bn_mean_shape
+                                                                name:nil];
+
+              MPSGraphTensor* mulTensor = [mpsGraph constantWithScalar:N
+                                                   shape:@[@1]
+                                                dataType:MPSDataTypeInt32];
+
+              MPSGraphTensor* numberToReduceTensor = mulTensor;
+
+              MPSGraphTensor* cast2Tensor = [mpsGraph castTensor:numberToReduceTensor
+                                                          toType:bnInputTensor.dataType
+                                                            name:@"cast2Tensor"];
+
+              MPSGraphTensor* sizeReciprocalTensor = [mpsGraph reciprocalWithTensor:cast2Tensor
+                                                                               name:nil];
+
+              // TODO: Reduce redundant computation
+              MPSGraphTensor* xMinusMean = [mpsGraph subtractionWithPrimaryTensor:bnInputTensor
+                                                                  secondaryTensor:bnMeanTensor
+                                                                             name:nil];
+
+              MPSGraphTensor* normalizedTensor = [mpsGraph multiplicationWithPrimaryTensor:xMinusMean
+                                                                           secondaryTensor:bnRstdTensor
+                                                                                      name:nil];
+
+              MPSGraphTensor* bnGradMulTensor = [mpsGraph multiplicationWithPrimaryTensor:bnGradOutputTensor
+                                                                          secondaryTensor:normalizedTensor
+                                                                                     name:nil];
+
+              MPSGraphTensor* gammaGradient = [mpsGraph reductionSumWithTensor:bnGradMulTensor
+                                                                          axes:bn_axes
+                                                                          name:nil];
+
+              MPSGraphTensor* betaGradient = [mpsGraph reductionSumWithTensor:bnGradOutputTensor
+                                                                         axes:bn_axes
+                                                                         name:nil];
+
+              MPSGraphTensor* gradient1 = [mpsGraph multiplicationWithPrimaryTensor:bnGradOutputTensor
+                                                                    secondaryTensor:bnRstdTensor
+                                                                               name:nil];
+
+              MPSGraphTensor* gradient2_1 = [mpsGraph multiplicationWithPrimaryTensor:sizeReciprocalTensor
+                                                                      secondaryTensor:xMinusMean
+                                                                                 name:nil];
+
+              // reverseVariance is square of rstd
+              MPSGraphTensor* reverseVariance = [mpsGraph squareWithTensor:bnRstdTensor
+                                                                      name:nil];
+              MPSGraphTensor* gradient2_2 = [mpsGraph multiplicationWithPrimaryTensor:gammaGradient
+                                                                      secondaryTensor:reverseVariance
+                                                                                 name:nil];
+
+              MPSGraphTensor* gradient2 = [mpsGraph multiplicationWithPrimaryTensor:gradient2_1
+                                                                secondaryTensor:gradient2_2
+                                                                           name:nil];
+
+              MPSGraphTensor* gradient3_1 = [mpsGraph multiplicationWithPrimaryTensor:sizeReciprocalTensor
+                                                                      secondaryTensor:betaGradient
+                                                                                 name:nil];
+
+              MPSGraphTensor* gradient3 = [mpsGraph multiplicationWithPrimaryTensor:gradient3_1
+                                                                    secondaryTensor:bnRstdTensor
+                                                                               name:nil];
+
+              MPSGraphTensor* gradient4 = [mpsGraph subtractionWithPrimaryTensor:gradient1
+                                                                 secondaryTensor:gradient2
+                                                                            name:nil];
+
+              MPSGraphTensor* gradient = [mpsGraph subtractionWithPrimaryTensor:gradient4
+                                                                secondaryTensor:gradient3
+                                                                           name:nil];
+
+              gradInputTensor = [mpsGraph reshapeTensor:gradient
+                                              withShape:input_shape
+                                                   name:nil];
+
+            }
+
+            if(grad_input_mask[1]) {
+              gradWeightTensor = [mpsGraph reshapeTensor:gradWeightTensor
+                                               withShape:gamma_shape
+                                                    name:nil];
+            }
+            if(grad_input_mask[2]) {
+              gradBiasTensor = [mpsGraph reshapeTensor:gradBiasTensor
+                                             withShape:gamma_shape
+                                                  name:nil];
+            }
+
+            newCachedGraph->gradOutputTensor_ = gradOutputTensor;
+            newCachedGraph->inputTensor_ = inputTensor;
+            newCachedGraph->weightTensor_ = weightTensor;
+            newCachedGraph->meanTensor_ = meanTensor;
+            newCachedGraph->rstdTensor_ = rstdTensor;
+            newCachedGraph->gradInputTensor_ = gradInputTensor;
+            newCachedGraph->gradWeightTensor_ = gradWeightTensor;
+            newCachedGraph->gradBiasTensor_ = gradBiasTensor;
+          }
+          return newCachedGraph;
+        });
+        cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
+      }
+
+      auto inputPlaceholder = native_mps::Placeholder(cachedGraph->inputTensor_, *X);
+      auto gradOutputPlaceholder = native_mps::Placeholder(cachedGraph->gradOutputTensor_, *dOut);
+      auto weightPlaceholder = native_mps::Placeholder();
+      if(has_weight)
+        weightPlaceholder = native_mps::Placeholder(cachedGraph->weightTensor_, *gamma);
+      auto saveMeanPlaceholder = native_mps::Placeholder(cachedGraph->meanTensor_, mean);
+      auto saveVarPlaceholder = native_mps::Placeholder(cachedGraph->rstdTensor_, rstd);
+
+      auto gradInputPlaceholder = native_mps::Placeholder();
+      if(grad_input_mask[0])
+        gradInputPlaceholder = native_mps::Placeholder(cachedGraph->gradInputTensor_, grad_input);
+      auto gradWeightPlaceholder = native_mps::Placeholder();
+      if(grad_input_mask[1])
+        gradWeightPlaceholder = native_mps::Placeholder(cachedGraph->gradWeightTensor_, grad_weight);
+      auto gradBiasPlaceholder = native_mps::Placeholder();;
+      if(grad_input_mask[2])
+        gradBiasPlaceholder = native_mps::Placeholder(cachedGraph->gradBiasTensor_, grad_bias);
+
+      NSMutableDictionary *feeds = [[NSMutableDictionary new] autorelease];
+      feeds[inputPlaceholder.getMPSGraphTensor()] = inputPlaceholder.getMPSGraphTensorData();
+      feeds[gradOutputPlaceholder.getMPSGraphTensor()] = gradOutputPlaceholder.getMPSGraphTensorData();
+      if(has_weight)
+        feeds[weightPlaceholder.getMPSGraphTensor()] = weightPlaceholder.getMPSGraphTensorData();
+      feeds[saveMeanPlaceholder.getMPSGraphTensor()] = saveMeanPlaceholder.getMPSGraphTensorData();
+      feeds[saveVarPlaceholder.getMPSGraphTensor()] = saveVarPlaceholder.getMPSGraphTensorData();
+
+      NSMutableDictionary *results = [[NSMutableDictionary new] autorelease];
+      if(grad_input_mask[0])
+        results[gradInputPlaceholder.getMPSGraphTensor()] = gradInputPlaceholder.getMPSGraphTensorData();
+      if(grad_input_mask[1])
+        results[gradWeightPlaceholder.getMPSGraphTensor()] = gradWeightPlaceholder.getMPSGraphTensorData();
+      if(grad_input_mask[2])
+        results[gradBiasPlaceholder.getMPSGraphTensor()] = gradBiasPlaceholder.getMPSGraphTensorData();
+
+      native_mps::runMPSGraph(stream, cachedGraph->graph(), feeds, results);
+
+  }
+
+  }
+  return std::make_tuple(std::move(grad_input), std::move(grad_weight), std::move(grad_bias));
+
+}
+
 } // namespace native
 } // namespace at
--- a/aten/src/ATen/native/mps/operations/PointwiseOps.mm
+++ b/aten/src/ATen/native/mps/operations/PointwiseOps.mm
@ -15,26 +15,21 @@ Tensor& addc_mul_div_out_mps(const Tensor& self,
                             const bool is_div,
                             const string op_name)
 {
-  using scalar_t = double;
-  scalar_t value_scalar = value_opt.to<scalar_t>();
  if (&output != &self) {
    output.resize_(output.sizes());
  }
-  TORCH_CHECK(output.is_mps());
+  MPSStream* mpsStream = getCurrentMPSStream();

-  // Derive from MPSCachedGraph
  struct CachedGraph : public MPSCachedGraph
  {
    CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
    MPSGraphTensor *inputTensor = nil, *outputTensor = nil;
-    MPSGraphTensor *firstTensor = nil, *secondTensor = nil;
+    MPSGraphTensor *firstTensor = nil, *secondTensor = nil, *valueTensor = nil;
  };
  MPSGraphCache* cache_ = MPSGraphCache::getInstance();

  @autoreleasepool {
-    string key = op_name + to_string(value_scalar)
-                         + getTensorsStringKey({self, tensor1, tensor2})+ ":"
-                         + getMPSTypeString(value_opt.type());
+    string key = op_name + getTensorsStringKey({self, tensor1, tensor2}, false);

    CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));

@ -49,6 +44,7 @@ Tensor& addc_mul_div_out_mps(const Tensor& self,
            newCachedGraph->inputTensor = mpsGraphRankedPlaceHolder(mpsGraph, self);
            newCachedGraph->firstTensor = mpsGraphRankedPlaceHolder(mpsGraph, tensor1);
            newCachedGraph->secondTensor = mpsGraphRankedPlaceHolder(mpsGraph, tensor2);
+            newCachedGraph->valueTensor = mpsGraphUnrankedPlaceHolder(mpsGraph, getMPSScalarType(self.scalar_type()));

            // the tensor to be optionally multiplied by value_scalar
            MPSGraphTensor *multiplicandTensor = nil;
@ -62,15 +58,9 @@ Tensor& addc_mul_div_out_mps(const Tensor& self,
                                                                        name:nil];
            }
            // the tensor to be added to input_tensor
-            MPSGraphTensor *addendTensor = multiplicandTensor;
-            // if value_scalar is 1.0, then we don't bother adding another multiply to graph
-            if (value_scalar != 1.0) {
-              MPSGraphTensor* valueTensor = [mpsGraph constantWithScalar:value_scalar
-                                                                dataType:getMPSScalarType(value_opt.type())];
-              addendTensor = [mpsGraph multiplicationWithPrimaryTensor:multiplicandTensor
-                                                       secondaryTensor:valueTensor
-                                                                  name:nil];
-            }
+            MPSGraphTensor *addendTensor = [mpsGraph multiplicationWithPrimaryTensor:multiplicandTensor
+                                                      secondaryTensor:newCachedGraph->valueTensor
+                                                                name:nil];
            newCachedGraph->outputTensor = [mpsGraph additionWithPrimaryTensor:newCachedGraph->inputTensor
                                                               secondaryTensor:addendTensor
                                                                          name:nil];
@ -87,18 +77,18 @@ Tensor& addc_mul_div_out_mps(const Tensor& self,
    Placeholder outputPlaceholder = Placeholder(cachedGraph->outputTensor, output);

    // Create dictionary of inputs and outputs
-    // Utility to dump out graph : [mpsGraph dump];
    NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* feeds = @{
      selfPlaceholder.getMPSGraphTensor() : selfPlaceholder.getMPSGraphTensorData(),
      tensor1Placeholder.getMPSGraphTensor() : tensor1Placeholder.getMPSGraphTensorData(),
-      tensor2Placeholder.getMPSGraphTensor() : tensor2Placeholder.getMPSGraphTensorData()
+      tensor2Placeholder.getMPSGraphTensor() : tensor2Placeholder.getMPSGraphTensorData(),
+      cachedGraph->valueTensor : getMPSGraphTensorFromScalar(mpsStream, value_opt, getMPSScalarType(self.scalar_type())),
    };

    NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* results = @{
      outputPlaceholder.getMPSGraphTensor() : outputPlaceholder.getMPSGraphTensorData()
    };

-    runMPSGraph(getCurrentMPSStream(), cachedGraph->graph(), feeds, results);
+    runMPSGraph(mpsStream, cachedGraph->graph(), feeds, results);
  }

  return output;
--- a/aten/src/ATen/native/mps/operations/Pooling.mm
+++ b/aten/src/ATen/native/mps/operations/Pooling.mm
@ -103,16 +103,9 @@ Tensor _mps_max_pool2d(
    outputHeight, outputWidth, memory_format);

  namespace native_mps = at::native::mps;
+  using CachedGraph = native_mps::MPSUnaryCachedGraph;
  CheckedFrom c = "mps_max_pool2d";

-  // Derive from MPSCachedGraph
-  struct CachedGraph : public native_mps::MPSCachedGraph
-  {
-    CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
-    MPSGraphTensor *inputTensor_ = nil;
-    MPSGraphTensor *outputTensor_ = nil;
-  };
-
  native_mps::MPSGraphCache* cache_ = native_mps::MPSGraphCache::getInstance();

  Tensor output_t;
@ -161,7 +154,7 @@ Tensor _mps_max_pool2d(
                                     to_string(padW) + ":" + to_string(padH) + ":" +
                                     to_string(ceil_mode) + ":" + mem_format_key +
                                     mps::getTensorsStringKey({input_t});
-    CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+    CachedGraph* cachedGraph = cache_->LookUpAs<CachedGraph>(key);

    if(!cachedGraph) {
      native_mps::MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ native_mps::MPSCachedGraph * () {
@ -711,7 +704,7 @@ TORCH_IMPL_FUNC(avg_pool2d_out_mps) (
                                       to_string(ceil_mode) + ":" + mem_format_key + ":" +
                                       to_string(divisor_override_value) +
                                       mps::getTensorsStringKey({input});
-      CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+      CachedGraph* cachedGraph = cache_->LookUpAs<CachedGraph>(key);

      if(!cachedGraph) {
        native_mps::MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ native_mps::MPSCachedGraph * () {
--- a/aten/src/ATen/native/mps/operations/RangeFactories.mm
+++ b/aten/src/ATen/native/mps/operations/RangeFactories.mm
@ -13,6 +13,43 @@
 namespace at {
 namespace native {

+namespace {
+struct RangeCachedGraph : public mps::MPSCachedGraph {
+  API_AVAILABLE(macosx(12.3))
+  RangeCachedGraph(MPSGraph *mpsGraph, MPSDataType dataType, int32_t shapeVal, bool needsClamp = false, bool startLessEnd = false): MPSCachedGraph(mpsGraph) {
+    @autoreleasepool {
+      auto shapeTensor = [mpsGraph constantWithData:[NSData dataWithBytes:&shapeVal length:sizeof(int32_t)]
+                                              shape: @[@1]
+                                           dataType:MPSDataTypeInt32];
+      auto  coordsTensor = [mpsGraph coordinateAlongAxis:0
+                                         withShapeTensor:shapeTensor
+                                                    name:nil];
+      coordsTensor = [mpsGraph castTensor:coordsTensor toType:dataType name:@"coords"];
+
+      startTensor = mps::mpsGraphRankedPlaceHolder(mpsGraph, dataType, @[@1]);
+      multiplyTensor = mps::mpsGraphRankedPlaceHolder(mpsGraph, dataType, @[@1]);
+      auto scaledCoords = [mpsGraph multiplicationWithPrimaryTensor:coordsTensor
+                                                    secondaryTensor:multiplyTensor
+                                                               name:nil];
+      outputTensor = [mpsGraph additionWithPrimaryTensor:scaledCoords
+                                         secondaryTensor:startTensor
+                                                    name:nil];
+      if (needsClamp) {
+        endTensor = mps::mpsGraphRankedPlaceHolder(mpsGraph, dataType, @[@1]);
+        outputTensor = [mpsGraph clampWithTensor:outputTensor
+                                  minValueTensor: startLessEnd? startTensor : endTensor
+                                  maxValueTensor: startLessEnd? endTensor : startTensor
+                                  name: nil];
+      }
+    }
+  }
+  MPSGraphTensor *startTensor = nil;
+  MPSGraphTensor *endTensor = nil;
+  MPSGraphTensor *multiplyTensor = nil;
+  MPSGraphTensor *outputTensor = nil;
+};
+
+} // anonymous namespace

 Tensor& arange_mps_out(const Scalar& start, const Scalar& end, const Scalar& step, Tensor& result) {
  AT_DISPATCH_MPS_TYPES(result.scalar_type(), "arange_mps", [&]() {
@ -53,8 +90,30 @@ Tensor& arange_mps_out(const Scalar& start, const Scalar& end, const Scalar& ste
    }
    bool is_contiguous = result.is_contiguous();
    Tensor r = !is_contiguous ? at::empty_like(result, LEGACY_CONTIGUOUS_MEMORY_FORMAT) : result;
+    using namespace mps;
+    auto cache_ = MPSGraphCache::getInstance();
+    auto stream = getCurrentMPSStream();
+    auto mpsDataType = getMPSDataType(result.scalar_type());
+    @autoreleasepool {
+      string key = "arange_mps_out:" + getTensorsStringKey({result}) + ":" + to_string(size);
+      auto cachedGraph = static_cast<RangeCachedGraph *>(cache_->LookUp(key));
+      if (!cachedGraph) {
+        auto *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ MPSCachedGraph *() {
+          auto mpsGraph = make_mps_graph();
+          return new RangeCachedGraph(mpsGraph, mpsDataType, size);
+        });
+        cachedGraph = static_cast<RangeCachedGraph *>(tmpCachedGraph);
+      }
+      Placeholder outputPlaceholder = Placeholder(cachedGraph->outputTensor, r);
+      NSMutableDictionary *feeds   = [[NSMutableDictionary new] autorelease];
+      feeds[cachedGraph->startTensor] = getMPSGraphTensorFromScalar(stream, start, mpsDataType);
+      feeds[cachedGraph->multiplyTensor] = getMPSGraphTensorFromScalar(stream, Scalar(step), mpsDataType);

-    //TODO: Add arange Metal kernel.
+      NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* results = @{
+        outputPlaceholder.getMPSGraphTensor() : outputPlaceholder.getMPSGraphTensorData()
+      };
+      runMPSGraph(stream, cachedGraph->graph(), feeds, results);
+    }

    if(!is_contiguous) {
      result.copy_(r);
@ -63,4 +122,69 @@ Tensor& arange_mps_out(const Scalar& start, const Scalar& end, const Scalar& ste

  return result;
 }
+
+Tensor& linspace_out_mps(const Scalar& start, const Scalar& end, int64_t steps, Tensor& result) {
+  using namespace mps;
+
+  TORCH_CHECK(steps >= 0, "number of steps must be non-negative");
+  if (result.numel() != steps) {
+    result.resize_({steps});
+  }
+
+  if (steps == 0) {
+    // skip
+  } else if (steps == 1) {
+    result.fill_(start);
+  } else {
+    Tensor r = result.is_contiguous() ? result : result.contiguous();
+
+    // Do the MPSGraph computation
+    MPSGraphCache* cache_ = MPSGraphCache::getInstance();
+    MPSStream* stream = getCurrentMPSStream();
+
+    bool start_less_end = (start.to<double>() <= end.to<double>());
+
+    @autoreleasepool {
+      string key = "linspace_out_mps:" + getTensorsStringKey({result}) + ":" + to_string(steps) + to_string(start_less_end);
+      RangeCachedGraph* cachedGraph = static_cast<RangeCachedGraph *>(cache_->LookUp(key));
+
+      if(!cachedGraph) {
+        MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ MPSCachedGraph * () {
+
+          RangeCachedGraph *newCachedGraph = nil;
+
+          @autoreleasepool {
+            MPSGraph* mpsGraph = make_mps_graph();
+            newCachedGraph = new RangeCachedGraph(mpsGraph, MPSDataTypeFloat32, steps, true, start_less_end);
+
+            if(getMPSDataType(result.scalar_type()) != MPSDataTypeFloat32) {
+              newCachedGraph->outputTensor = [mpsGraph castTensor:newCachedGraph->outputTensor toType:getMPSDataType(result.scalar_type()) name:@"output"];
+            }
+          }
+          return newCachedGraph;
+        });
+        cachedGraph = static_cast<RangeCachedGraph *>(tmpCachedGraph);
+      }
+
+      NSMutableDictionary *feeds   = [[NSMutableDictionary new] autorelease];
+      auto multiplyScalar = (end.to<double>() - start.to<double>()) / ((double)steps - 1.0f);
+      Placeholder outputPlaceholder = Placeholder(cachedGraph->outputTensor, r);
+
+      // Create dictionary of inputs and outputs
+      feeds[cachedGraph->startTensor] = getMPSGraphTensorFromScalar(stream, start, MPSDataTypeFloat32);
+      feeds[cachedGraph->endTensor] = getMPSGraphTensorFromScalar(stream, end, MPSDataTypeFloat32);
+      feeds[cachedGraph->multiplyTensor] = getMPSGraphTensorFromScalar(stream, Scalar(multiplyScalar), MPSDataTypeFloat32);
+
+      NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* results = @{
+        outputPlaceholder.getMPSGraphTensor() : outputPlaceholder.getMPSGraphTensorData()
+      };
+      runMPSGraph(stream, cachedGraph->graph(), feeds, results);
+    }
+
+    if (!result.is_contiguous()) {
+      result.copy_(r);
+    }
+  }
+  return result;
+}
 }} // namespace at::native
--- a/aten/src/ATen/native/mps/operations/ReduceOps.mm
+++ b/aten/src/ATen/native/mps/operations/ReduceOps.mm
@ -13,13 +13,23 @@
 namespace at {
 namespace native {

-using namespace std;
-
 enum StdVarType {
  STANDARD_VARIANCE,
  STANDARD_DEVIATION
 };

+enum MPSReductionType {
+  MAX,
+  MIN,
+  AMAX,
+  AMIN,
+  SUM,
+  PROD,
+  MEAN,
+  COUNT_NONZERO
+};
+
+
 void set_apparent_shapes(NSMutableArray<NSNumber*> * &apparent_out_shape,
                         NSMutableArray<NSNumber*> * &apparent_in_shape,
                         int64_t num_reduce_dims,
@ -88,14 +98,51 @@ void set_axes(NSMutableArray<NSNumber *> * &axes,
    }
 }

+// Helper function to prepare axes and tensor shapes
+void set_axes_and_shapes(const Tensor& input_t,
+                         IntArrayRef dims,
+                         NSMutableArray<NSNumber*> * &axes,
+                         NSMutableArray<NSNumber*> * &apparent_input_shape,
+                         NSMutableArray<NSNumber*> * &apparent_output_shape,
+                         NSMutableArray<NSNumber*> * &output_shape) {
+
+  IntArrayRef input_shape = input_t.sizes();
+
+  int64_t num_input_dims = input_shape.size();
+  int64_t num_reduce_dims = dims.size();
+  int64_t num_output_dims;
+
+  num_output_dims = num_reduce_dims == 0 ? 1 : num_input_dims;
+
+  // Reduction axes
+  set_axes(axes, num_reduce_dims, dims, input_shape.size());
+
+  // Shapes
+  set_apparent_shapes(apparent_output_shape,
+                      apparent_input_shape,
+                      num_reduce_dims,
+                      num_input_dims,
+                      num_output_dims,
+                      input_shape,
+                      axes);
+
+  // Squeeze dims for output shape
+  output_shape = [NSMutableArray<NSNumber*> arrayWithCapacity:0];
+  for(int i=0; i < num_output_dims; i++) {
+    if([apparent_output_shape[i] longValue] != 1) {
+      [output_shape addObject:apparent_output_shape[i]];
+    }
+  }
+}
+
 void reduction_out_mps
   (const Tensor& input_t,
    IntArrayRef dim,
    bool keepdim,
    c10::optional<ScalarType> dtype,
    const Tensor& output_t,
-    string reduction_type,
-    string func_name) {
+    MPSReductionType reduction_type,
+    const std::string& func_name) {

  IntArrayRef input_shape = input_t.sizes();

@ -107,36 +154,14 @@ void reduction_out_mps

  namespace native_mps = at::native::mps;

-  // Derive from MPSCachedGraph
-  struct CachedGraph : public native_mps::MPSCachedGraph
-  {
-    CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
-    MPSGraphTensor *inputTensor_ = nil;
-    MPSGraphTensor *outputTensor_ = nil;
-  };
-
-  native_mps::MPSGraphCache* cache_ = native_mps::MPSGraphCache::getInstance();
-
-  int64_t num_input_dims = input_shape.size();
-  int64_t num_reduce_dims = dim.size();
-  int64_t num_output_dims;
-
-  // For output shape calculation, assume that keepdim is true
-  num_output_dims = num_input_dims;
-  NSMutableArray<NSNumber*> *apparent_output_shape = nil;
+  NSMutableArray<NSNumber*> *axes = nil;
  NSMutableArray<NSNumber*> *apparent_input_shape = nil;
+  NSMutableArray<NSNumber*> *apparent_output_shape = nil;
+  NSMutableArray<NSNumber*> *output_shape = nil;

-  // Reduction axes
-  NSMutableArray<NSNumber *> *axes;
-  set_axes(axes, num_reduce_dims, dim, input_shape.size());
+  set_axes_and_shapes(input_t, dim, axes, apparent_input_shape, apparent_output_shape, output_shape);

-  set_apparent_shapes(apparent_output_shape,
-                      apparent_input_shape,
-                      num_reduce_dims,
-                      num_input_dims,
-                      num_output_dims,
-                      input_shape,
-                      axes);
+   auto cache_ = native_mps::MPSGraphCache::getInstance();

  if (output_t.numel() == 0 || input_t.numel() == 0) {
    return;
@ -149,7 +174,8 @@ void reduction_out_mps
    // TODO: Make this key proper
    NSString* ns_key = [[axes valueForKey:@"description"] componentsJoinedByString:@","];
    string key =  func_name+":" + string([ns_key UTF8String]) + ":" + native_mps::getMPSTypeString(input_t.scalar_type()) + ":" + native_mps::getMPSTypeString(output_t.scalar_type());
-    CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+    using CachedGraph = native_mps::MPSUnaryCachedGraph;
+    auto cachedGraph = cache_->LookUpAs<CachedGraph>(key);

    if(!cachedGraph) {
      native_mps::MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ native_mps::MPSCachedGraph * () {
@ -173,22 +199,43 @@ void reduction_out_mps

          MPSGraphTensor* castOutputTensor = nil;

-          if(reduction_type == "sum")
+          if(reduction_type == MPSReductionType::SUM) {
            castOutputTensor = [mpsGraph reductionSumWithTensor:castInputTensor
                                                           axes:axes
                                                           name:nil];
-          else if(reduction_type == "prod")
+          } else if(reduction_type == MPSReductionType::PROD) {
            castOutputTensor = [mpsGraph reductionProductWithTensor:castInputTensor
                                                               axes:axes
                                                               name:nil];
-          else if(reduction_type == "mean")
+          } else if(reduction_type == MPSReductionType::MEAN) {
            castOutputTensor = [mpsGraph meanOfTensor:inputTensor
                                                 axes:axes
                                                 name:nil];
+          } else if(reduction_type == MPSReductionType::COUNT_NONZERO) {
+            MPSGraphTensor* zeros = [mpsGraph constantWithScalar:0
+                                                        dataType:castInputTensor.dataType];
+
+            MPSGraphTensor* nonZeros = [mpsGraph notEqualWithPrimaryTensor:castInputTensor
+                                                           secondaryTensor:zeros
+                                                                      name:nil];
+
+            castOutputTensor = [mpsGraph reductionSumWithTensor:nonZeros
+                                                           axes:axes
+                                                           name:nil];
+          }
+          else if(reduction_type == MPSReductionType::AMAX) {
+            castOutputTensor = [mpsGraph reductionMaximumWithTensor:inputTensor
+                                                               axes:axes
+                                                               name:nil];
+          } else if(reduction_type == MPSReductionType::AMIN) {
+            castOutputTensor = [mpsGraph reductionMinimumWithTensor:inputTensor
+                                                               axes:axes
+                                                               name:nil];
+          }

          MPSGraphTensor* outputTensor = nil;

-          if(input_t.scalar_type() != ScalarType::Float)
+          if(output_t.scalar_type() != ScalarType::Float)
            outputTensor = [mpsGraph castTensor:castOutputTensor
                                         toType:(native_mps::getMPSDataType(output_t.scalar_type()))
                                           name:@"outputTensor"];
@ -200,7 +247,7 @@ void reduction_out_mps
        }
        return newCachedGraph;
      });
-      cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
+      cachedGraph = tmpCachedGraph->as<CachedGraph>();
    }

    auto inputPlaceholder = native_mps::Placeholder();
@ -229,7 +276,7 @@ TORCH_IMPL_FUNC(sum_out_mps)
    c10::optional<ScalarType> dtype,
    const Tensor& output_t) {

-    reduction_out_mps(input_t, dim, keepdim, dtype, output_t, "sum", "sum_out_mps");
+    reduction_out_mps(input_t, dim, keepdim, dtype, output_t, MPSReductionType::SUM, "sum_out_mps");
 }

 TORCH_IMPL_FUNC(prod_out_mps)
@ -241,7 +288,7 @@ TORCH_IMPL_FUNC(prod_out_mps)

    int64_t dims[1] = {dim};

-    reduction_out_mps(input_t, IntArrayRef(dims, 1), keepdim, dtype, output_t, "prod", "prod_out_mps");
+    reduction_out_mps(input_t, IntArrayRef(dims, 1), keepdim, dtype, output_t, MPSReductionType::PROD, "prod_out_mps");
 }

 // Taken from ReduceOps.cpp
@ -259,6 +306,24 @@ inline ScalarType get_dtype_from_self(
  return src_type;
 }

+TORCH_IMPL_FUNC(amax_out_mps)
+   (const Tensor& input_t,
+    IntArrayRef dim,
+    bool keepdim,
+    const Tensor& output_t) {
+
+    reduction_out_mps(input_t, dim, keepdim, c10::nullopt, output_t, MPSReductionType::AMAX, "amax_out_mps");
+}
+
+TORCH_IMPL_FUNC(amin_out_mps)
+   (const Tensor& input_t,
+    IntArrayRef dim,
+    bool keepdim,
+    const Tensor& output_t) {
+
+    reduction_out_mps(input_t, dim, keepdim, c10::nullopt, output_t, MPSReductionType::AMIN, "amin_out_mps");
+}
+
 Tensor prod_mps(const Tensor &self, c10::optional<ScalarType> opt_dtype) {

  auto num_dims = self.dim();
@ -276,7 +341,36 @@ Tensor prod_mps(const Tensor &self, c10::optional<ScalarType> opt_dtype) {
                      c10::nullopt,
                      c10::nullopt);

-  reduction_out_mps(self, IntArrayRef(dims, num_dims), false, opt_dtype, const_cast<Tensor&>(output_t), "prod", "prod_mps");
+  reduction_out_mps(self, IntArrayRef(dims, num_dims), false, opt_dtype, const_cast<Tensor&>(output_t), MPSReductionType::PROD, "prod_mps");
+
+  return output_t;
+}
+
+
+Tensor count_nonzero_mps(const Tensor& self, IntArrayRef dims){
+  NSMutableArray<NSNumber*> *axes = nil;
+  NSMutableArray<NSNumber*> *apparent_input_shape = nil;
+  NSMutableArray<NSNumber*> *apparent_output_shape = nil;
+  NSMutableArray<NSNumber*> *output_shape = nil;
+
+  set_axes_and_shapes(self, dims, axes, apparent_input_shape, apparent_output_shape, output_shape);
+
+  int64_t* raw_output_shape = (int64_t *)malloc([output_shape count] * sizeof(int64_t));
+  for(int i=0; i < [output_shape count]; i++) {
+    raw_output_shape[i] = [output_shape[i] longValue];
+  }
+
+  Tensor output_t = at::native::empty_mps(
+                      IntArrayRef(raw_output_shape, [output_shape count]),
+                      ScalarType::Long,
+                      c10::nullopt,
+                      kMPS,
+                      c10::nullopt,
+                      c10::nullopt);
+
+  reduction_out_mps(self, dims, false, self.scalar_type(), const_cast<Tensor&>(output_t), MPSReductionType::COUNT_NONZERO, "count_nonzero_mps");
+
+  free(raw_output_shape);

  return output_t;
 }
@ -288,135 +382,7 @@ TORCH_IMPL_FUNC(mean_out_mps)
    c10::optional<ScalarType> dtype,
    const Tensor& output_t) {

-    reduction_out_mps(input_t, dim, keepdim, dtype, output_t, "mean", "mean_out_mps");
-}
-
-TORCH_IMPL_FUNC(argmax_out_mps)
-   (const Tensor& input_t,
-    c10::optional<int64_t> dim,
-    bool keepdim,
-    const Tensor& output_t) {
-
-    namespace native_mps = at::native::mps;
-
-    // Derive from MPSCachedGraph
-    struct CachedGraph : public native_mps::MPSCachedGraph
-    {
-      CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
-      MPSGraphTensor *inputTensor_ = nil;
-      MPSGraphTensor *outputTensor_ = nil;
-    };
-
-    native_mps::MPSGraphCache* cache_ = native_mps::MPSGraphCache::getInstance();
-
-    int64_t dim_;
-
-    if (dim.has_value()) {
-        dim_ = maybe_wrap_dim(dim.value(), input_t.dim());
-        native::zero_numel_check_dims(input_t, dim_, "argmax()");
-    } else {
-        TORCH_CHECK_INDEX(
-        input_t.numel() != 0,
-        "argmax()", ": Expected reduction dim to be specified for input.numel() == 0.");
-        // Since input will be flattened, take argmax along 0'th dimension
-        dim_ = 0;
-    }
-
-    // Calculate the output shape according to keepdim=True
-    // If there is no dim argument, the input shape is flattened
-    IntArrayRef input_shape = input_t.sizes();
-    int64_t num_input_dims = input_shape.size();
-    NSMutableArray<NSNumber*> *apparent_in_shape = nil;
-    NSMutableArray<NSNumber*> *apparent_out_shape = nil;
-
-    if(dim.has_value()) {
-        apparent_out_shape = [NSMutableArray<NSNumber*> arrayWithCapacity:num_input_dims];
-        for(int i = 0; i < num_input_dims; i++) {
-            if(dim_ == i)
-                apparent_out_shape[i] = @1;
-            else
-                apparent_out_shape[i] = [NSNumber numberWithInt:input_shape[i]];
-        }
-    }
-    else {
-        apparent_in_shape = [NSMutableArray<NSNumber*> arrayWithCapacity:1];
-        int64_t num_in_elements = 1;
-        for(int i = 0; i < num_input_dims; i++) {
-            num_in_elements *= input_shape[i];
-        }
-        apparent_in_shape[0] = [NSNumber numberWithInt:num_in_elements];
-
-        apparent_out_shape = [NSMutableArray<NSNumber*> arrayWithCapacity:1];
-        apparent_out_shape[0] = @1;
-    }
-
-    if (output_t.numel() == 0) {
-        return;
-    }
-
-    auto stream = at::mps::getCurrentMPSStream();
-
-    @autoreleasepool {
-        string key = "argmax_out_mps:" + to_string(dim_) + ":" + native_mps::getMPSTypeString(input_t.scalar_type());
-        CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
-
-        if(!cachedGraph) {
-          native_mps::MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ native_mps::MPSCachedGraph * () {
-
-            CachedGraph *newCachedGraph = nil;
-
-            @autoreleasepool {
-              MPSGraph* mpsGraph = native_mps::make_mps_graph();
-              newCachedGraph = new CachedGraph(mpsGraph);
-
-              MPSGraphTensor* inputTensor = native_mps::mpsGraphUnrankedPlaceHolder(mpsGraph, native_mps::getMPSDataType(input_t.scalar_type()));
-
-              MPSGraphTensor* castInputTensor = nil;
-
-              if(input_t.scalar_type() != ScalarType::Float &&
-                 input_t.scalar_type() != ScalarType::Int   &&
-                 input_t.scalar_type() != ScalarType::Half)
-                castInputTensor =  [mpsGraph castTensor:inputTensor
-                                                 toType:MPSDataTypeFloat32
-                                                   name:@"castInputTensor"];
-              else
-                castInputTensor = inputTensor;
-
-              MPSGraphTensor* argmaxOutTensor = [mpsGraph reductionArgMaximumWithTensor:castInputTensor
-                                                                                   axis:(NSInteger)dim_
-                                                                                   name:@"argmax_out"];
-              MPSGraphTensor* outputTensor = [mpsGraph castTensor:argmaxOutTensor
-                                                           toType:MPSDataTypeInt64
-                                                             name:@"cast_out"];
-
-              newCachedGraph->inputTensor_ = inputTensor;
-              newCachedGraph->outputTensor_ = outputTensor;
-            }
-            return newCachedGraph;
-          });
-          cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
-        }
-
-        native_mps::Placeholder inputPlaceholder = native_mps::Placeholder();
-        if(apparent_in_shape)
-            inputPlaceholder = native_mps::Placeholder(cachedGraph->inputTensor_, input_t, apparent_in_shape);
-        else
-            inputPlaceholder = native_mps::Placeholder(cachedGraph->inputTensor_, input_t);
-
-        auto outputPlaceholder = native_mps::Placeholder(cachedGraph->outputTensor_, output_t, apparent_out_shape);
-
-        NSDictionary<MPSGraphTensor *, MPSGraphTensorData *> *feeds = @{
-          inputPlaceholder.getMPSGraphTensor() : inputPlaceholder.getMPSGraphTensorData(),
-        };
-
-        NSDictionary<MPSGraphTensor *, MPSGraphTensorData *> *results = @{
-          outputPlaceholder.getMPSGraphTensor() : outputPlaceholder.getMPSGraphTensorData()
-        };
-
-        native_mps::runMPSGraph(stream, cachedGraph->graph(), feeds, results);
-
-    }
-
+    reduction_out_mps(input_t, dim, keepdim, dtype, output_t, MPSReductionType::MEAN, "mean_out_mps");
 }

 TORCH_IMPL_FUNC(norm_out_mps)
@ -438,13 +404,7 @@ TORCH_IMPL_FUNC(norm_out_mps)
  namespace native_mps = at::native::mps;
  CheckedFrom c = "norm_out_mps";

-  // Derive from MPSCachedGraph
-  struct CachedGraph : public native_mps::MPSCachedGraph
-  {
-    CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
-    MPSGraphTensor *inputTensor_ = nil;
-    MPSGraphTensor *outputTensor_ = nil;
-  };
+  using CachedGraph = native_mps::MPSUnaryCachedGraph;

  native_mps::MPSGraphCache* cache_ = native_mps::MPSGraphCache::getInstance();

@ -485,7 +445,7 @@ TORCH_IMPL_FUNC(norm_out_mps)
      string keepdim_info = (keepdim) ? "keepdim=1" : "keepdim=0";
      string key =  string("norm_out_mps:") + [ns_key UTF8String] + ":" + native_mps::getMPSTypeString(input_t.scalar_type()) + ":p" + to_string(p) + ":" + keepdim_info;

-    CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+    auto cachedGraph = cache_->LookUpAs<CachedGraph>(key);

    if(!cachedGraph) {
      native_mps::MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ native_mps::MPSCachedGraph * () {
@ -558,7 +518,7 @@ TORCH_IMPL_FUNC(norm_out_mps)
        }
        return newCachedGraph;
      });
-      cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
+      cachedGraph = tmpCachedGraph->as<CachedGraph>();
    }

    auto inputPlaceholder = native_mps::Placeholder();
@ -592,6 +552,7 @@ Tensor std_var_common_impl_mps(
  StdVarType stdVarType)
 {
  namespace native_mps = at::native::mps;
+  using CachedGraph = native_mps::MPSUnaryCachedGraph;

  IntArrayRef input_shape = input_t.sizes();
  int64_t num_input_dims = input_shape.size();
@ -613,15 +574,6 @@ Tensor std_var_common_impl_mps(
  const auto correction_value = use_correction ? correction.value() : false;
  int64_t correction_n = 1;

-
-  // Derive from MPSCachedGraph
-  struct CachedGraph : public native_mps::MPSCachedGraph
-  {
-    CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
-    MPSGraphTensor *inputTensor_ = nil;
-    MPSGraphTensor *outputTensor_ = nil;
-  };
-
  native_mps::MPSGraphCache* cache_ = native_mps::MPSGraphCache::getInstance();

  int64_t num_output_dims = 0;
@ -765,9 +717,9 @@ Tensor std_var_common_impl_mps(
    string bessel_corrected = (use_correction && correction_value) ? "unbiased " : "biased ";
    string use_dim_info = (use_dim) ? "use_dim=1:" + to_string(dim_value.size()) : "use_dim=0";
    string keepdim_info = (keepdim) ? "keepdim=1" : "keepdim=0";
-    string key = op_key + use_dim_info + ":" + keepdim_info + ":" + string([ns_key UTF8String]) + ":" + native_mps::getMPSTypeString(input_t.scalar_type()) + ":" + bessel_corrected;
+    string key = op_key + use_dim_info + ":" + keepdim_info + ":" + string([ns_key UTF8String]) + ":" + native_mps::getTensorsStringKey(input_t) + ":" + bessel_corrected;

-    CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+    auto cachedGraph = cache_->LookUpAs<CachedGraph>(key);
    // Initialize once if configuration not found in cache
  if(!cachedGraph) {
      native_mps::MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ native_mps::MPSCachedGraph * () {
@ -857,19 +809,12 @@ TORCH_IMPL_FUNC(any_out_mps)
   const Tensor& output_t)
 {
    namespace native_mps = at::native::mps;
+    using CachedGraph = native_mps::MPSUnaryCachedGraph;

    if (output_t.numel() == 0 || input_t.numel() == 0) {
      return;
    }

-    // Derive from MPSCachedGraph
-    struct CachedGraph : public native_mps::MPSCachedGraph
-    {
-      CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
-      MPSGraphTensor *inputTensor_ = nil;
-      MPSGraphTensor *outputTensor_ = nil;
-    };
-
    native_mps::MPSGraphCache* cache_ = native_mps::MPSGraphCache::getInstance();
    int64_t dim_ = maybe_wrap_dim(dim, input_t.dim());
    native::zero_numel_check_dims(input_t, dim_, "any()");
@ -892,7 +837,7 @@ TORCH_IMPL_FUNC(any_out_mps)
    @autoreleasepool {
        MPSShape* input_t_shape = native_mps::getMPSShape(input_t);
        string key = string("any_out_mps:") + native_mps::getMPSShapeString(input_t_shape) + ":" + to_string(dim_) + ":" + native_mps::getMPSTypeString(input_t.scalar_type());
-        CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+        CachedGraph* cachedGraph = cache_->LookUpAs<CachedGraph>(key);

        if(!cachedGraph) {
          native_mps::MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ native_mps::MPSCachedGraph * () {
@ -935,7 +880,7 @@ TORCH_IMPL_FUNC(any_out_mps)
            }
            return newCachedGraph;
          });
-          cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
+          cachedGraph = tmpCachedGraph->as<CachedGraph>();
        }

        auto inputPlaceholder = native_mps::Placeholder(cachedGraph->inputTensor_, input_t);
@ -955,26 +900,19 @@ TORCH_IMPL_FUNC(any_out_mps)
 TORCH_IMPL_FUNC(any_all_out_mps)(const Tensor& input_t, const Tensor& output_t)
 {
    namespace native_mps = at::native::mps;
+    using CachedGraph = native_mps::MPSUnaryCachedGraph;
    if (output_t.numel() == 0 || input_t.numel() == 0) {
      return;
    }

-    // Derive from MPSCachedGraph
-    struct CachedGraph : public native_mps::MPSCachedGraph
-    {
-      CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
-      MPSGraphTensor *inputTensor_ = nil;
-      MPSGraphTensor *outputTensor_ = nil;
-    };
-
-    native_mps::MPSGraphCache* cache_ = native_mps::MPSGraphCache::getInstance();
+    auto cache_ = native_mps::MPSGraphCache::getInstance();

    auto stream = at::mps::getCurrentMPSStream();

    @autoreleasepool {
        MPSShape* input_t_shape = native_mps::getMPSShape(input_t);
        string key = string("any_all_out_mps:") + native_mps::getMPSShapeString(input_t_shape) +":" + native_mps::getMPSTypeString(input_t.scalar_type());
-        CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+        CachedGraph* cachedGraph = cache_->LookUpAs<CachedGraph>(key);

        if(!cachedGraph) {
          native_mps::MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ native_mps::MPSCachedGraph * () {
@ -1042,19 +980,12 @@ TORCH_IMPL_FUNC(all_out_mps)
   const Tensor& output_t)
 {
    namespace native_mps = at::native::mps;
+    using CachedGraph = native_mps::MPSUnaryCachedGraph;

    if (output_t.numel() == 0 || input_t.numel() == 0) {
      return;
    }

-    // Derive from MPSCachedGraph
-    struct CachedGraph : public native_mps::MPSCachedGraph
-    {
-      CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
-      MPSGraphTensor *inputTensor_ = nil;
-      MPSGraphTensor *outputTensor_ = nil;
-    };
-
    native_mps::MPSGraphCache* cache_ = native_mps::MPSGraphCache::getInstance();
    int64_t dim_ = maybe_wrap_dim(dim, input_t.dim());
    native::zero_numel_check_dims(input_t, dim_, "all()");
@ -1077,7 +1008,7 @@ TORCH_IMPL_FUNC(all_out_mps)
    @autoreleasepool {
        MPSShape* input_t_shape = native_mps::getMPSShape(input_t);
        string key = string("all_out_mps:") + native_mps::getMPSShapeString(input_t_shape) + ":" + to_string(dim_) + ":" + native_mps::getMPSTypeString(input_t.scalar_type());
-        CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+        CachedGraph* cachedGraph = cache_->LookUpAs<CachedGraph>(key);

        if(!cachedGraph) {
          native_mps::MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ native_mps::MPSCachedGraph * () {
@ -1120,7 +1051,7 @@ TORCH_IMPL_FUNC(all_out_mps)
            }
            return newCachedGraph;
          });
-          cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
+          cachedGraph = tmpCachedGraph->as<CachedGraph>();
        }

        auto inputPlaceholder = native_mps::Placeholder(cachedGraph->inputTensor_, input_t);
@ -1140,18 +1071,11 @@ TORCH_IMPL_FUNC(all_out_mps)
 TORCH_IMPL_FUNC(all_all_out_mps)(const Tensor& input_t, const Tensor& output_t)
 {
    namespace native_mps = at::native::mps;
+    using CachedGraph = native_mps::MPSUnaryCachedGraph;
    if (output_t.numel() == 0 || input_t.numel() == 0) {
      return;
    }

-    // Derive from MPSCachedGraph
-    struct CachedGraph : public native_mps::MPSCachedGraph
-    {
-      CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
-      MPSGraphTensor *inputTensor_ = nil;
-      MPSGraphTensor *outputTensor_ = nil;
-    };
-
    native_mps::MPSGraphCache* cache_ = native_mps::MPSGraphCache::getInstance();

    auto stream = at::mps::getCurrentMPSStream();
@ -1159,7 +1083,7 @@ TORCH_IMPL_FUNC(all_all_out_mps)(const Tensor& input_t, const Tensor& output_t)
    @autoreleasepool {
        MPSShape* input_t_shape = native_mps::getMPSShape(input_t);
        string key = string("all_all_out_mps:") + native_mps::getMPSShapeString(input_t_shape) +":" + native_mps::getMPSTypeString(input_t.scalar_type());
-        CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+        CachedGraph* cachedGraph = cache_->LookUpAs<CachedGraph>(key);

        if(!cachedGraph) {
          native_mps::MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ native_mps::MPSCachedGraph * () {
@ -1203,7 +1127,7 @@ TORCH_IMPL_FUNC(all_all_out_mps)(const Tensor& input_t, const Tensor& output_t)
            }
            return newCachedGraph;
          });
-          cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
+          cachedGraph = tmpCachedGraph->as<CachedGraph>();
        }

        auto inputPlaceholder = native_mps::Placeholder(cachedGraph->inputTensor_, input_t);
@ -1225,18 +1149,11 @@ TORCH_IMPL_FUNC(all_all_out_mps)(const Tensor& input_t, const Tensor& output_t)

 Tensor min_max_mps
  (const Tensor& input_t,
-   string reduction_type,
-   string func_name) {
+   MPSReductionType reduction_type,
+   const std::string& func_name) {

  namespace native_mps = at::native::mps;
-
-  // Derive from MPSCachedGraph
-  struct CachedGraph : public native_mps::MPSCachedGraph
-  {
-    CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
-    MPSGraphTensor *inputTensor_ = nil;
-    MPSGraphTensor *outputTensor_ = nil;
-  };
+  using CachedGraph = native_mps::MPSUnaryCachedGraph;

  native_mps::MPSGraphCache* cache_ = native_mps::MPSGraphCache::getInstance();

@ -1259,7 +1176,7 @@ Tensor min_max_mps

  @autoreleasepool {
    string key = func_name + mps::getTensorsStringKey(input_t);
-    CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+    CachedGraph* cachedGraph = cache_->LookUpAs<CachedGraph>(key);
    // Initialize once if configuration not found in cache
    if(!cachedGraph) {
      native_mps::MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ native_mps::MPSCachedGraph * () {
@ -1274,11 +1191,11 @@ Tensor min_max_mps

          MPSGraphTensor* outputTensor = nil;

-          if(reduction_type == "max")
+          if(reduction_type == MPSReductionType::MAX)
            outputTensor = [mpsGraph reductionMaximumWithTensor:inputTensor
                                                           axes:@[@0]
                                                           name:nil];
-          else if(reduction_type == "min")
+          else if(reduction_type == MPSReductionType::MIN)
            outputTensor = [mpsGraph reductionMinimumWithTensor:inputTensor
                                                           axes:@[@0]
                                                           name:nil];
@ -1312,13 +1229,13 @@ Tensor min_max_mps
 // Max entire tensor into scalar result
 Tensor max_mps(const Tensor& input_t) {

-  return min_max_mps(input_t, "max", "max_mps");
+  return min_max_mps(input_t, MPSReductionType::MAX, "max_mps");
 }

 // Min entire tensor into scalar result
 Tensor min_mps(const Tensor& input_t) {

-  return min_max_mps(input_t, "min", "min_mps");
+  return min_max_mps(input_t, MPSReductionType::MIN, "min_mps");
 }

 void min_max_out_mps
@ -1327,8 +1244,8 @@ void min_max_out_mps
  bool keepdim,
  const Tensor& output_t,
  const Tensor& indices_t,
-  string reduction_type,
-  string func_name) {
+  MPSReductionType reduction_type,
+  const std::string& func_name) {

    namespace native_mps = at::native::mps;

@ -1386,11 +1303,11 @@ void min_max_out_mps

              MPSGraphTensor* inputTensor = native_mps::mpsGraphUnrankedPlaceHolder(mpsGraph, native_mps::getMPSDataType(input_t.scalar_type()));
              MPSGraphTensor* outputTensor = nil;
-              if(reduction_type == "max")
+              if(reduction_type == MPSReductionType::MAX)
                outputTensor = [mpsGraph reductionMaximumWithTensor:inputTensor
                                                               axis:(NSInteger)dim_
                                                               name:nil];
-              else if(reduction_type == "min")
+              else if(reduction_type == MPSReductionType::MIN)
                outputTensor = [mpsGraph reductionMinimumWithTensor:inputTensor
                                                               axis:(NSInteger)dim_
                                                               name:nil];
@ -1407,11 +1324,11 @@ void min_max_out_mps
                castInputTensor = inputTensor;

              MPSGraphTensor* argreduceOutTensor = nil;
-              if(reduction_type == "max")
+              if(reduction_type == MPSReductionType::MAX)
                argreduceOutTensor = [mpsGraph reductionArgMaximumWithTensor:castInputTensor
                                                                        axis:(NSInteger)dim_
                                                                        name:@"argmax_out"];
-              else if(reduction_type == "min")
+              else if(reduction_type == MPSReductionType::MIN)
                argreduceOutTensor = [mpsGraph reductionArgMinimumWithTensor:castInputTensor
                                                                        axis:(NSInteger)dim_
                                                                        name:@"argmax_out"];
@ -1459,7 +1376,7 @@ TORCH_IMPL_FUNC(max_out_mps)
    int64_t dim_ = maybe_wrap_dim(dim, input_t.dim());
    native::zero_numel_check_dims(input_t, dim_,  "max()");

-    min_max_out_mps(input_t, dim, keepdim, output_t, indices_t, "max", "max_out_mps");
+    min_max_out_mps(input_t, dim, keepdim, output_t, indices_t, MPSReductionType::MAX, "max_out_mps");
 }

 // Min out with dim
@ -1473,16 +1390,163 @@ TORCH_IMPL_FUNC(min_out_mps)
    int64_t dim_ = maybe_wrap_dim(dim, input_t.dim());
    native::zero_numel_check_dims(input_t, dim_, "min()");

-    min_max_out_mps(input_t, dim, keepdim, output_t, indices_t, "min", "min_out_mps");
+    min_max_out_mps(input_t, dim, keepdim, output_t, indices_t, MPSReductionType::MIN, "min_out_mps");
 }

+void argmax_argmin_out_mps
+   (const Tensor& input_t,
+    c10::optional<int64_t> dim,
+    bool keepdim,
+    const Tensor& output_t,
+    MPSReductionType reduction_type,
+    const std::string& func_name) {
+    namespace native_mps = at::native::mps;
+    using CachedGraph = native_mps::MPSUnaryCachedGraph;
+
+    native_mps::MPSGraphCache* cache_ = native_mps::MPSGraphCache::getInstance();
+
+    int64_t dim_;
+
+    if (dim.has_value()) {
+        dim_ = maybe_wrap_dim(dim.value(), input_t.dim());
+        zero_numel_check_dims(input_t, dim_, reduction_type == MPSReductionType::MAX ? "argmax()" : "argmin()");
+    } else {
+        TORCH_CHECK_INDEX(
+        input_t.numel() != 0,
+        reduction_type == MPSReductionType::MAX ? "argmax()" : "argmin()" , ": Expected reduction dim to be specified for input.numel() == 0.");
+        // Since input will be flattened, take argmax or argmin along 0'th dimension
+        dim_ = 0;
+    }
+
+    // Calculate the output shape according to keepdim=True
+    // If there is no dim argument, the input shape is flattened
+    IntArrayRef input_shape = input_t.sizes();
+    int64_t num_input_dims = input_shape.size();
+    NSMutableArray<NSNumber*> *apparent_in_shape = nil;
+    NSMutableArray<NSNumber*> *apparent_out_shape = nil;
+
+    if(dim.has_value()) {
+        apparent_out_shape = [NSMutableArray<NSNumber*> arrayWithCapacity:num_input_dims];
+        for(int i = 0; i < num_input_dims; i++) {
+            if(dim_ == i)
+                apparent_out_shape[i] = @1;
+            else
+                apparent_out_shape[i] = [NSNumber numberWithInt:input_shape[i]];
+        }
+    }
+    else {
+        apparent_in_shape = [NSMutableArray<NSNumber*> arrayWithCapacity:1];
+        int64_t num_in_elements = 1;
+        for(int i = 0; i < num_input_dims; i++) {
+            num_in_elements *= input_shape[i];
+        }
+        apparent_in_shape[0] = [NSNumber numberWithInt:num_in_elements];
+
+        apparent_out_shape = [NSMutableArray<NSNumber*> arrayWithCapacity:1];
+        apparent_out_shape[0] = @1;
+    }
+
+    if (output_t.numel() == 0) {
+        return;
+    }
+
+    auto stream = at::mps::getCurrentMPSStream();
+
+    @autoreleasepool {
+        string key = func_name + to_string(dim_) + ":" + native_mps::getTensorsStringKey(input_t);
+        CachedGraph* cachedGraph = cache_->LookUpAs<CachedGraph>(key);
+
+        if(!cachedGraph) {
+          native_mps::MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ native_mps::MPSCachedGraph * () {
+
+            CachedGraph *newCachedGraph = nil;
+
+            @autoreleasepool {
+              MPSGraph* mpsGraph = native_mps::make_mps_graph();
+              newCachedGraph = new CachedGraph(mpsGraph);
+
+              MPSGraphTensor* inputTensor = native_mps::mpsGraphUnrankedPlaceHolder(mpsGraph, native_mps::getMPSDataType(input_t.scalar_type()));
+
+              MPSGraphTensor* castInputTensor = nil;
+              MPSGraphTensor* argreduceOutTensor = nil;
+
+              if(input_t.scalar_type() != ScalarType::Float &&
+                 input_t.scalar_type() != ScalarType::Int   &&
+                 input_t.scalar_type() != ScalarType::Half)
+                castInputTensor =  [mpsGraph castTensor:inputTensor
+                                                 toType:MPSDataTypeFloat32
+                                                   name:@"castInputTensor"];
+              else
+                castInputTensor = inputTensor;
+
+              if (reduction_type == MPSReductionType::MAX) {
+                argreduceOutTensor = [mpsGraph reductionArgMaximumWithTensor:castInputTensor
+                                                                        axis:(NSInteger)dim_
+                                                                        name:nil];
+              }
+              else {
+                argreduceOutTensor = [mpsGraph reductionArgMinimumWithTensor:castInputTensor
+                                                                        axis:(NSInteger)dim_
+                                                                        name:nil];
+              }
+              MPSGraphTensor* outputTensor = [mpsGraph castTensor:argreduceOutTensor
+                                                           toType:MPSDataTypeInt64
+                                                             name:@"castOutpuTensor"];
+
+              newCachedGraph->inputTensor_ = inputTensor;
+              newCachedGraph->outputTensor_ = outputTensor;
+            }
+            return newCachedGraph;
+          });
+          cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
+        }
+
+        native_mps::Placeholder inputPlaceholder = native_mps::Placeholder();
+        if(apparent_in_shape)
+            inputPlaceholder = native_mps::Placeholder(cachedGraph->inputTensor_, input_t, apparent_in_shape);
+        else
+            inputPlaceholder = native_mps::Placeholder(cachedGraph->inputTensor_, input_t);
+
+        auto outputPlaceholder = native_mps::Placeholder(cachedGraph->outputTensor_, output_t, apparent_out_shape);
+
+        NSDictionary<MPSGraphTensor *, MPSGraphTensorData *> *feeds = @{
+          inputPlaceholder.getMPSGraphTensor() : inputPlaceholder.getMPSGraphTensorData(),
+        };
+
+        NSDictionary<MPSGraphTensor *, MPSGraphTensorData *> *results = @{
+          outputPlaceholder.getMPSGraphTensor() : outputPlaceholder.getMPSGraphTensorData()
+        };
+
+        native_mps::runMPSGraph(stream, cachedGraph->graph(), feeds, results);
+    }
+}
+
+TORCH_IMPL_FUNC(argmax_out_mps)
+   (const Tensor& input_t,
+    c10::optional<int64_t> dim,
+    bool keepdim,
+    const Tensor& output_t) {
+
+    argmax_argmin_out_mps(input_t, dim, keepdim, output_t, MPSReductionType::MAX, "argmax_out_mps");
+}
+
+TORCH_IMPL_FUNC(argmin_out_mps)
+   (const Tensor& input_t,
+    c10::optional<int64_t> dim,
+    bool keepdim,
+    const Tensor& output_t) {
+
+    argmax_argmin_out_mps(input_t, dim, keepdim, output_t, MPSReductionType::MIN, "argmin_out_mps");
+}
+
+
 // Min/Max with dim
 std::tuple<Tensor, Tensor> min_max_mps
   (const Tensor& input_t,
    int64_t dim,
    bool keepdim,
-    string reduction_type,
-    string func_name) {
+    MPSReductionType reduction_type,
+    const std::string& func_name) {

    namespace native_mps = at::native::mps;

@ -1570,7 +1634,7 @@ std::tuple<Tensor, Tensor> max_mps
    int64_t dim,
    bool keepdim) {

-    return min_max_mps(input_t, dim, keepdim, "max", "max_mps");
+    return min_max_mps(input_t, dim, keepdim, MPSReductionType::MAX, "max_mps");
 }

 // Min with dim
@ -1579,9 +1643,8 @@ std::tuple<Tensor, Tensor> min_mps
    int64_t dim,
    bool keepdim) {

-    return min_max_mps(input_t, dim, keepdim, "min", "min_mps");
+    return min_max_mps(input_t, dim, keepdim, MPSReductionType::MIN, "min_mps");
 }

-}
-
-}
+} // native
+} // at
--- a/aten/src/ATen/native/mps/operations/Repeat.mm
+++ b/aten/src/ATen/native/mps/operations/Repeat.mm
@ -74,7 +74,6 @@ Tensor repeat_mps(const Tensor& self, IntArrayRef repeats) {

  TORCH_CHECK(repeats.size() >= (size_t)self.dim(),
           "Number of dimensions of repeat dims can not be smaller than number of dimensions of tensor");
-
  struct CachedGraph : public MPSCachedGraph
  {
    CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
--- a/aten/src/ATen/native/mps/operations/RnnOps.mm
+++ b/aten/src/ATen/native/mps/operations/RnnOps.mm
@ -153,6 +153,13 @@ std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor> _lstm_mps(const Tensor& input
                                                            name:nil]];
            }

+            MPSGraphTensor* outputTensor = [outputs objectAtIndex:0];
+            if (batch_first) {
+                outputTensor = [mpsGraph transposeTensor:outputTensor
+                                               dimension:0
+                                           withDimension:1
+                                                    name:nil];
+            }
            MPSGraphTensor* outputStates = [mpsGraph concatTensors:outputStateArray
                                                            dimension:0
                                                            name:nil];
@ -166,7 +173,7 @@ std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor> _lstm_mps(const Tensor& input
                                                            dimension:0
                                                            name:nil];

-            std::vector<MPSGraphTensor*> outputTensors = {[outputs objectAtIndex:0], outputStates, outputCellStates, outputZStates, outputCellStatesFwd};
+            std::vector<MPSGraphTensor*> outputTensors = {outputTensor, outputStates, outputCellStates, outputZStates, outputCellStatesFwd};
            newCachedGraph->inputTensors_ = inputTensors;
            newCachedGraph->outputTensors_ = outputTensors;
            newCachedGraph->kernelWeightsList_ = kernelWeightsList;
--- a/aten/src/ATen/native/mps/operations/Shape.mm
+++ b/aten/src/ATen/native/mps/operations/Shape.mm
@ -300,6 +300,8 @@ TORCH_IMPL_FUNC(topk_out_mps)
    k >= 0 && k <= (self.dim() > 0 ? self.size(dim) : 1),
    "selected index k out of range");

+  TORCH_CHECK( k <= 16 , "Currently topk on mps works only for k<=16 ");
+
  if (self.dim() == 0 && self.numel() == 1)
  {
      values.copy_(self);
--- a/aten/src/ATen/native/mps/operations/UnaryOps.mm
+++ b/aten/src/ATen/native/mps/operations/UnaryOps.mm
@ -13,38 +13,38 @@ namespace mps {

 typedef MPSGraphTensor* (^UnaryOpBlock)(MPSGraph*, MPSGraphTensor*);

-void unary_op(const Tensor& self_t, const Tensor& output, std::string op_name, UnaryOpBlock unaryBlock)
+void unary_op(const Tensor& self, const Tensor& output, std::string op_name, UnaryOpBlock unaryBlock)
 {
-  Tensor self = self_t.contiguous(at::MemoryFormat::Contiguous);
+  TORCH_CHECK_TYPE(self.scalar_type() != ScalarType::Long, "Operation '", op_name, "()' does not support input type 'int64' in MPS backend.");
  if (!output.is_same_size(self)) {
    output.resize_(self.sizes());
  }
-  struct CachedGraph : public MPSCachedGraph
-  {
-    CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
-    MPSGraphTensor *inputTensor = nil, *outputTensor = nil;
-  };
  MPSGraphCache* cache_ = MPSGraphCache::getInstance();
  @autoreleasepool {
-    string key = op_name + getTensorsStringKey({self});
-    CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+    string key = op_name + getTensorsStringKey({self}, /*use_scalar_value*/ false);
+    auto cachedGraph = cache_->LookUpAs<MPSUnaryCachedGraph>(key);

    if(!cachedGraph) {
      MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ MPSCachedGraph* () {
-        CachedGraph *newCachedGraph = nil;
+        MPSUnaryCachedGraph *newCachedGraph = nil;
        @autoreleasepool {
          MPSGraph* mpsGraph = make_mps_graph();
-          newCachedGraph = new CachedGraph(mpsGraph);
-          newCachedGraph->inputTensor = mpsGraphRankedPlaceHolder(mpsGraph, self);
-          newCachedGraph->outputTensor = unaryBlock(mpsGraph, newCachedGraph->inputTensor);
+          newCachedGraph = new MPSUnaryCachedGraph(mpsGraph);
+          newCachedGraph->inputTensor_ = mpsGraphRankedPlaceHolder(mpsGraph, self);
+          MPSGraphTensor* castTensor = newCachedGraph->inputTensor_;
+          // Integer input must be cast to float if output is float
+          if (isIntegralType(self.scalar_type()) && isFloatingType(output.scalar_type())) {
+            castTensor = castMPSTensor(mpsGraph, newCachedGraph->inputTensor_, output.scalar_type());
+          }
+          newCachedGraph->outputTensor_ = unaryBlock(mpsGraph, castTensor);
        }
        return newCachedGraph;
      });
-      cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
+      cachedGraph = tmpCachedGraph->as<MPSUnaryCachedGraph>();
    }

-    Placeholder selfPlaceholder = Placeholder(cachedGraph->inputTensor, self);
-    Placeholder outputPlaceholder = Placeholder(cachedGraph->outputTensor, output);
+    Placeholder selfPlaceholder = Placeholder(cachedGraph->inputTensor_, self);
+    Placeholder outputPlaceholder = Placeholder(cachedGraph->outputTensor_, output);
    NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* feeds = @{
      selfPlaceholder.getMPSGraphTensor() : selfPlaceholder.getMPSGraphTensorData()
    };
@ -121,45 +121,47 @@ CREATE_MPS_STRUCTURED_UNARY_TORCH_IMPL_FUNC(atanh_out_mps, atanh)

 CREATE_MPS_UNARY_TORCH_IMPL_FUNC(abs_out_mps, absolute)

+Tensor& logical_not_out_mps(const Tensor& self, Tensor& output)
+{
+  auto bool_self = self.to(ScalarType::Bool);
+  mps::unary_op(bool_self, output, "logical_not_out_mps", [](MPSGraph* mpsGraph, MPSGraphTensor* inputTensor){ return [mpsGraph notWithTensor:inputTensor name:nil];});
+  return output;
+}
+
 TORCH_IMPL_FUNC(log1p_out_mps) (const Tensor& self, const Tensor& output)
 {
    using namespace mps;
    if (!output.is_same_size(self)) {
      output.resize_(self.sizes());
    }
-    struct CachedGraph : public MPSCachedGraph
-    {
-      CachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
-      MPSGraphTensor *inputTensor = nil, *outputTensor = nil;
-    };
    MPSGraphCache* cache_ = MPSGraphCache::getInstance();
    @autoreleasepool {
      string key = string("log1p_out_mps") + getTensorsStringKey({self});
-      CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
+      auto cachedGraph = cache_->LookUpAs<MPSUnaryCachedGraph>(key);

      if(!cachedGraph) {
        MPSCachedGraph *tmpCachedGraph = cache_->CreateCachedGraph(key, ^ MPSCachedGraph* () {
-          CachedGraph *newCachedGraph = nil;
+          MPSUnaryCachedGraph *newCachedGraph = nil;
          @autoreleasepool {
            MPSGraph* mpsGraph = make_mps_graph();
-            newCachedGraph = new CachedGraph(mpsGraph);
-            newCachedGraph->inputTensor = mpsGraphRankedPlaceHolder(mpsGraph, self);
+            newCachedGraph = new MPSUnaryCachedGraph(mpsGraph);
+            newCachedGraph->inputTensor_ = mpsGraphRankedPlaceHolder(mpsGraph, self);
              MPSGraphTensor* oneTensor = [mpsGraph constantWithScalar:1.0
                                                          shape:getMPSShape(self)
                                                       dataType:mps::getMPSDataType(self.scalar_type())];
-              MPSGraphTensor* addedTensor = [mpsGraph additionWithPrimaryTensor:newCachedGraph->inputTensor
+              MPSGraphTensor* addedTensor = [mpsGraph additionWithPrimaryTensor:newCachedGraph->inputTensor_
                                                         secondaryTensor:oneTensor
                                                                    name:nil];
-            newCachedGraph->outputTensor = [mpsGraph logarithmWithTensor:addedTensor
+            newCachedGraph->outputTensor_ = [mpsGraph logarithmWithTensor:addedTensor
                                                                    name:nil];
          }
          return newCachedGraph;
        });
-        cachedGraph = static_cast<CachedGraph *>(tmpCachedGraph);
+        cachedGraph = tmpCachedGraph->as<MPSUnaryCachedGraph>();
      }

-      Placeholder selfPlaceholder = Placeholder(cachedGraph->inputTensor, self);
-      Placeholder outputPlaceholder = Placeholder(cachedGraph->outputTensor, output);
+      Placeholder selfPlaceholder = Placeholder(cachedGraph->inputTensor_, self);
+      Placeholder outputPlaceholder = Placeholder(cachedGraph->outputTensor_, output);
      NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* feeds = @{
        selfPlaceholder.getMPSGraphTensor() : selfPlaceholder.getMPSGraphTensorData()
      };
--- a/aten/src/ATen/native/mps/operations/View.mm
+++ b/aten/src/ATen/native/mps/operations/View.mm
@ -0,0 +1,259 @@
+//  Copyright © 2022 Apple Inc.
+
+#include <ATen/native/mps/OperationUtils.h>
+#include <ATen/native/Resize.h>
+
+namespace at {
+
+// these are from MPSAllocator
+namespace mps {
+  // to check the requested non-aligned size of an MTL buffer
+  ssize_t get_requested_buffer_size(void* ptr);
+  // to retrieve the shape of a base tensor from a view tensor
+  IntArrayRef get_buffer_shape(void* ptr);
+  // to set the shape of a base tensor from a view tensor
+  void set_buffer_shape(void* ptr, const IntArrayRef& shape);
+}
+
+namespace native {
+namespace mps {
+
+struct ViewCachedGraph : public MPSCachedGraph
+{
+  ViewCachedGraph(MPSGraph *graph) : MPSCachedGraph(graph) {}
+  MPSGraphTensor* inputTensor = nil;
+  MPSGraphTensor* outputTensor = nil;
+  MPSGraphTensor* updatesTensor = nil;
+  MPSGraphTensor* storageOffsetTensor = nil;
+  std::vector<MPSGraphTensor*> strideTensors;
+};
+
+static std::string getStridedKey(const ScalarType& dtype, const IntArrayRef& base_shape,
+                          const IntArrayRef& new_shape, bool is_scatter)
+{
+  return (is_scatter ? "scatter:" : "gather:") + getMPSTypeString(dtype) + "[" +
+         getArrayRefString(base_shape) + "]:[" + getArrayRefString(new_shape) + "]";
+}
+
+// initializes the MTLBuffers for tesnsor data and runs the MPSGraph for the view op
+static Tensor& runViewGraph(ViewCachedGraph* cachedGraph, const at::Tensor& src, Tensor& output, bool needsScatter)
+{
+  const id<MTLBuffer> sourceBuffer = getMTLBufferStorage(src);
+  const id<MTLBuffer> outputBuffer = getMTLBufferStorage(output);
+
+  const IntArrayRef& strides   = needsScatter ? output.strides() : src.strides();
+  const IntArrayRef& sizes     = needsScatter ? output.sizes() : src.sizes();
+  const int64_t storage_offset = needsScatter ? output.storage_offset() : src.storage_offset();
+  const MPSDataType inputType  = [cachedGraph->inputTensor dataType];
+
+  MPSShape *inputShape = [cachedGraph->inputTensor shape];
+  MPSShape *outputShape = needsScatter ? inputShape : getMPSShape(src);
+
+  MPSStream* stream = getCurrentMPSStream();
+  @autoreleasepool {
+    NSMutableDictionary *feeds = [[NSMutableDictionary new] autorelease];
+    // in case of scatter, we use ouput tensor as input buffer and write the results back to the source buffer
+    feeds[cachedGraph->inputTensor] = [[[MPSGraphTensorData alloc] initWithMTLBuffer: needsScatter ? outputBuffer : sourceBuffer
+                                                                               shape: inputShape
+                                                                            dataType: inputType] autorelease];
+    if (needsScatter) {
+      feeds[cachedGraph->updatesTensor] = [[[MPSGraphTensorData alloc] initWithMTLBuffer: sourceBuffer
+                                                                                   shape: getMPSShape(src.numel())
+                                                                                dataType: inputType] autorelease];
+    }
+    feeds[cachedGraph->storageOffsetTensor] = getMPSGraphTensorFromScalar(stream, Scalar(storage_offset), MPSDataTypeInt32);
+    for (int i = 0; i < sizes.size(); i++) {
+      feeds[cachedGraph->strideTensors[i]] = getMPSGraphTensorFromScalar(stream, Scalar(strides[i]), MPSDataTypeInt32);
+    }
+    MPSGraphTensorData* outputTensorData = [[[MPSGraphTensorData alloc] initWithMTLBuffer: outputBuffer
+                                                                                    shape: outputShape
+                                                                                 dataType: getMPSDataType(output.scalar_type())] autorelease];
+    NSDictionary<MPSGraphTensor*, MPSGraphTensorData*>* results = @{
+      cachedGraph->outputTensor : outputTensorData
+    };
+    runMPSGraph(stream, cachedGraph->graph(), feeds, results);
+  }
+  return output;
+}
+
+static MPSGraphTensor* chainViewOperation(ViewCachedGraph* cachedGraph, const IntArrayRef& size,
+                                          const IntArrayRef& stride, int64_t offset,
+                                          const IntArrayRef& base_shape, bool needsScatter)
+{
+  MPSGraph* mpsGraph = cachedGraph->graph();
+  MPSGraphTensor *outputTensor = nil;
+  const size_t shape_size = size.size();
+
+  @autoreleasepool {
+    std::vector<int32_t> sizeArray(shape_size);
+    const int64_t int_max = std::numeric_limits<int32_t>::max();
+    for (int i = 0; i < shape_size; i++) {
+      TORCH_CHECK(size[i] <= int_max);
+      sizeArray[i] = static_cast<int32_t>(size[i]);
+    }
+    NSData* shapeData = [NSData dataWithBytes: sizeArray.data()
+                                       length: shape_size * sizeof(int32_t)];
+    MPSGraphTensor* shapeTensor = [mpsGraph constantWithData: shapeData
+                                                       shape: @[[NSNumber numberWithUnsignedInteger: shape_size]]
+                                                    dataType: MPSDataTypeInt32];
+    MPSGraphTensor* indicesTensor = nil;
+    // create stride Tensors for each rank of the input tensor
+    for (int i = 0; i < shape_size; i++) {
+      MPSGraphTensor* rangeTensor = [mpsGraph coordinateAlongAxis: (-i - 1)
+                                                  withShapeTensor: shapeTensor
+                                                             name: nil];
+      MPSGraphTensor* strideTensor = cachedGraph->strideTensors[shape_size - i - 1];
+      MPSGraphTensor* indexTensor = [mpsGraph multiplicationWithPrimaryTensor: rangeTensor
+                                                              secondaryTensor: strideTensor
+                                                                         name: nil];
+      if (!indicesTensor) {
+        indicesTensor = indexTensor;
+      } else {
+        indicesTensor = [mpsGraph additionWithPrimaryTensor: indexTensor
+                                            secondaryTensor: indicesTensor
+                                                       name: nil];
+      }
+    }
+
+    indicesTensor = [mpsGraph additionWithPrimaryTensor: indicesTensor
+                                        secondaryTensor: cachedGraph->storageOffsetTensor
+                                                   name: nil];
+    MPSGraphTensor *reshapedInputTensor = [mpsGraph reshapeTensor: cachedGraph->inputTensor
+                                                        withShape: @[@-1]
+                                                             name: nil];
+    MPSGraphTensor *reshapedIndicesTensor = [mpsGraph reshapeTensor: indicesTensor
+                                                          withShape: @[@-1]
+                                                               name: nil];
+    if (needsScatter) {
+      MPSGraphTensor* scatteredTensor = [mpsGraph scatterAlongAxis: 0
+                                                    withDataTensor: reshapedInputTensor
+                                                     updatesTensor: cachedGraph->updatesTensor
+                                                     indicesTensor: reshapedIndicesTensor
+                                                              mode: MPSGraphScatterModeSet
+                                                              name: nil];
+      outputTensor = [mpsGraph reshapeTensor: scatteredTensor
+                                   withShape: getMPSShape(base_shape)
+                                        name: nil];
+    } else {
+      // Call gather to coalesce the needed values. Result will be of same shape as flattened indices tensor
+      MPSGraphTensor *gatheredTensor = [mpsGraph gatherWithUpdatesTensor: reshapedInputTensor
+                                                           indicesTensor: reshapedIndicesTensor
+                                                                    axis: 0
+                                                         batchDimensions: 0
+                                                                    name: nil];
+      // Reshape the data to desired size
+      outputTensor =  [mpsGraph reshapeTensor: gatheredTensor
+                              withShapeTensor: shapeTensor
+                                         name: nil];
+    }
+  }
+  return outputTensor;
+}
+
+// There are few cases we need to consider:
+// Here nodes are the Tensors and the edges are the operations performed on the
+// Tensor. As a result of the operation performed we can have result as View
+// Tensor (View T) or a Non view tensor (NonView T). The difference is if its
+// mapped by the same underlying storage ptr or a new MTLBuffer was allocated.
+//                T = Tensor
+//                 ----------
+//                 | Orig T |
+//                 ----------
+//                /     |     \
+//             View T  View T  NonView T
+//             /      /    \      |
+//            View T /      \     |
+//            |     /        \    |
+//            |    /          \   |
+//            |   /            \  |
+//            NonView T         NonView T
+static ViewCachedGraph* createViewGraph(const Tensor& self, IntArrayRef size, IntArrayRef stride, int64_t storage_offset, bool needsScatter)
+{
+  IntArrayRef base_shape = get_buffer_shape(self.storage().data());
+  if (base_shape.size() == 0) {
+    // IntArrayRef wouldn't own the data, so we use a static storage
+    static const int64_t shape_1d = 1;
+    // self.sizes().size() could be zero
+    base_shape = self.sizes().size() ? self.sizes() : IntArrayRef(&shape_1d, 1);
+    // base_shape will be retained in MPSAllocator until buffer gets recycled
+    if (self.storage().data())
+      set_buffer_shape(self.storage().data(), base_shape);
+  }
+  MPSGraphCache* cache_ = MPSGraphCache::getInstance();
+
+  @autoreleasepool {
+    string key = getStridedKey(self.scalar_type(), base_shape, size, needsScatter);
+    ViewCachedGraph* cachedGraph = static_cast<ViewCachedGraph *>(cache_->LookUp(key));
+
+    if (!cachedGraph) {
+      cachedGraph = static_cast<ViewCachedGraph *>(cache_->CreateCachedGraph(key, ^ MPSCachedGraph * () {
+        ViewCachedGraph *newCachedGraph = nil;
+        @autoreleasepool {
+            MPSGraph* mpsGraph = make_mps_graph();
+            newCachedGraph = new ViewCachedGraph(mpsGraph);
+            // Self is the input tensor we are creating view of
+            newCachedGraph->inputTensor = mpsGraphRankedPlaceHolder(mpsGraph, getMPSScalarType(self.scalar_type()), getMPSShape(base_shape));
+            newCachedGraph->storageOffsetTensor = mpsGraphRankedPlaceHolder(mpsGraph, MPSDataTypeInt32, @[@1]);
+            for (int i = 0; i < size.size(); i++) {
+              newCachedGraph->strideTensors.push_back(mpsGraphRankedPlaceHolder(mpsGraph, MPSDataTypeInt32, @[@1]));
+            }
+            if (needsScatter) {
+              newCachedGraph->updatesTensor = mpsGraphUnrankedPlaceHolder(mpsGraph, getMPSDataType(self.scalar_type()));
+            }
+            newCachedGraph->outputTensor = chainViewOperation(newCachedGraph, size, stride, storage_offset, base_shape, needsScatter);
+        }
+        return newCachedGraph;
+      }));
+    }
+    return cachedGraph;
+  }
+}
+
+Tensor gatherViewTensor(const at::Tensor& src, at::Tensor& dst)
+{
+  ViewCachedGraph* cachedGraph = nullptr;
+
+  const IntArrayRef& base_shape = get_buffer_shape(src.storage().data());
+  if (base_shape.size() > 0) {
+    string key = getStridedKey(src.scalar_type(), base_shape, src.sizes(), /*is_scatter*/ false);
+    cachedGraph = static_cast<ViewCachedGraph *>(MPSGraphCache::getInstance()->LookUp(key));
+  }
+  // there are cases where gatherViewTensor() is called without having as_strided() called beforehand.
+  // this typically may come from copy_mps variants. In such cases, when the base_shape isn't found the
+  // callers would resort to make the tensor contiguous in an alternative code path.
+  if (!cachedGraph) {
+    return Tensor();
+  }
+
+  Tensor output;
+  if (!dst.has_storage())
+    output = at::native::empty_mps(src.sizes(), src.scalar_type(), c10::nullopt, kMPS);
+
+  return runViewGraph(cachedGraph, src, dst.has_storage() ? dst : output, /*needsScatter*/ false);
+}
+
+Tensor& scatterViewTensor(const at::Tensor& src, at::Tensor& output)
+{
+  ViewCachedGraph* cachedGraph = createViewGraph(output, output.sizes(), output.strides(),
+                                                 output.storage_offset(), /*needsScatter*/ true);
+  return runViewGraph(cachedGraph, src, output, /*needsScatter*/ true);
+}
+
+} // namespace mps
+
+// implementation of as_strided() op
+Tensor as_strided_tensorimpl_mps(const Tensor& self, IntArrayRef size, IntArrayRef stride, optional<int64_t> storage_offset_)
+{
+  auto storage_offset = storage_offset_.value_or(self.storage_offset());
+  auto result = detail::make_tensor<TensorImpl>(c10::TensorImpl::VIEW, Storage(self.storage()), self.key_set(), self.dtype());
+  setStrided(result, size, stride, storage_offset);
+
+  // 0 sizes won't result in any change in the shape of the Tensor so we can skip it.
+  if (size.size() > 0)
+    mps::createViewGraph(self, size, stride, storage_offset, /*needsScatter*/ false);
+
+  return result;
+}
+
+} // namespace native
+} // namespace at
--- a/aten/src/ATen/native/native_functions.yaml
+++ b/aten/src/ATen/native/native_functions.yaml
@ -654,6 +654,7 @@
  structured: True
  dispatch:
    CPU, CUDA: argmin_out
+    MPS: argmin_out_mps

 - func: acosh(Tensor self) -> Tensor
  variants: function, method
@ -1034,6 +1035,7 @@
  device_check: NoCheck   # TensorIterator
  dispatch:
    CPU, CUDA: logical_not_out
+    MPS: logical_not_out_mps

 - func: logical_xor(Tensor self, Tensor other) -> Tensor
  device_check: NoCheck   # TensorIterator
@ -1051,6 +1053,7 @@
  device_check: NoCheck   # TensorIterator
  dispatch:
    CPU, CUDA: logical_xor_out
+    MPS: logical_xor_out_mps

 - func: logical_and(Tensor self, Tensor other) -> Tensor
  device_check: NoCheck   # TensorIterator
@ -1068,6 +1071,7 @@
  device_check: NoCheck   # TensorIterator
  dispatch:
    CPU, CUDA: logical_and_out
+    MPS: logical_and_out_mps

 - func: logical_or(Tensor self, Tensor other) -> Tensor
  device_check: NoCheck   # TensorIterator
@ -1085,6 +1089,7 @@
  device_check: NoCheck   # TensorIterator
  dispatch:
    CPU, CUDA: logical_or_out
+    MPS: logical_or_out_mps

 - func: blackman_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

@ -1487,6 +1492,7 @@
  dispatch:
    CPU: count_nonzero_cpu
    CUDA: count_nonzero_cuda
+    MPS: count_nonzero_mps

 - func: count_nonzero(Tensor self, int? dim=None) -> Tensor
  variants: function, method
@ -2176,11 +2182,13 @@
  dispatch:
    CPU: eye_out_cpu
    CUDA: eye_out_cuda
+    MPS: eye_out_mps

 - func: eye.m_out(int n, int m, *, Tensor(a!) out) -> Tensor(a!)
  dispatch:
    CPU: eye_out_cpu
    CUDA: eye_out_cuda
+    MPS: eye_out_mps

 - func: flatten.using_ints(Tensor(a) self, int start_dim=0, int end_dim=-1) -> Tensor(a)
  variants: function, method
@ -2680,12 +2688,14 @@
  dispatch:
    CPU: layer_norm_cpu
    CUDA: layer_norm_cuda
+    MPS: layer_norm_mps
    CompositeImplicitAutograd: math_native_layer_norm

 - func: native_layer_norm_backward(Tensor grad_out, Tensor input, int[] normalized_shape, Tensor mean, Tensor rstd, Tensor? weight, Tensor? bias, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
  dispatch:
    CPU: layer_norm_backward_cpu
    CUDA: layer_norm_backward_cuda
+    MPS: layer_norm_backward_mps

 - func: nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor
  variants: function, method
@ -2777,6 +2787,7 @@
  dispatch:
    CPU, Meta: linspace_out
    CUDA: linspace_cuda_out
+    MPS: linspace_out_mps

 - func: log(Tensor self) -> Tensor
  device_check: NoCheck   # TensorIterator
@ -3112,6 +3123,7 @@
  structured: True
  dispatch:
    CPU, CUDA: amax_out
+    MPS: amax_out_mps

 # Return: (Tensor output, Tensor indices)
 - func: max_pool1d_with_indices(Tensor self, int[1] kernel_size, int[1] stride=[], int[1] padding=0, int[1] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)
@ -3268,6 +3280,7 @@
  structured: True
  dispatch:
    CPU, CUDA: amin_out
+    MPS: amin_out_mps

 # TODO: Add this function to MPS dispatch key so that we avoid declaring it in
 # native_functions.yaml
@ -4835,6 +4848,7 @@
  variants: function, method
  dispatch:
    CPU, QuantizedCPU, CUDA, QuantizedCUDA: flip
+    MPS: flip_mps

 - func: fliplr(Tensor self) -> Tensor
  variants: function, method
@ -6950,6 +6964,7 @@
  variants: method
  dispatch:
    CPU, CUDA: exponential_
+    MPS: exponential_mps_
  autogen: exponential.functional, exponential.out

 - func: geometric_(Tensor(a!) self, float p, *, Generator? generator=None) -> Tensor(a!)
@ -8283,6 +8298,7 @@
  dispatch:
    CPU: cpu_equal
    CUDA: cuda_equal
+    MPS: mps_equal
    QuantizedCPU: equal_quantized_cpu

 - func: pow.Tensor_Tensor_out(Tensor self, Tensor exponent, *, Tensor(a!) out) -> Tensor(a!)
@ -8373,7 +8389,7 @@
 - func: normal.Tensor_float(Tensor mean, float std=1, *, Generator? generator=None) -> Tensor
  dispatch:
    CPU, CUDA: normal
-    #MPS: normal_mps
+    MPS: normal_mps
    Meta: normal_meta

 - func: normal.float_Tensor_out(float mean, Tensor std, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
@ -8385,8 +8401,8 @@
 - func: normal.float_Tensor(float mean, Tensor std, *, Generator? generator=None) -> Tensor
  dispatch:
    CPU, CUDA: normal
+    MPS: normal_mps
    Meta: normal_meta
-    #MPS: normal_mps

 - func: normal.Tensor_Tensor_out(Tensor mean, Tensor std, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
  dispatch:
@ -8397,8 +8413,8 @@
 - func: normal.Tensor_Tensor(Tensor mean, Tensor std, *, Generator? generator=None) -> Tensor
  dispatch:
    CPU, CUDA: normal
+    MPS: normal_mps
    Meta: normal_meta
-    #MPS: normal_mps

 - func: normal.float_float(float mean, float std, int[] size, *, Generator? generator=None, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor

@ -9380,16 +9396,19 @@
  python_module: nn
  dispatch:
    CPU, CUDA: huber_loss_out
+    MPS: huber_loss_out_mps

 - func: huber_loss(Tensor self, Tensor target, int reduction=Mean, float delta=1.0) -> Tensor
  python_module: nn
  dispatch:
    CPU, CUDA: huber_loss
+    MPS: huber_loss_mps

 - func: huber_loss_backward.out(Tensor grad_output, Tensor self, Tensor target, int reduction, float delta, *, Tensor(a!) grad_input) -> Tensor(a!)
  python_module: nn
  dispatch:
    CPU, CUDA: huber_loss_backward_out
+    MPS: huber_loss_backward_out_mps

 - func: huber_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction, float delta) -> Tensor
  python_module: nn
@ -9455,6 +9474,7 @@
  python_module: nn
  dispatch:
    CPU, CUDA: glu_out
+    MPS: glu_out_mps

 - func: glu(Tensor self, int dim=-1) -> Tensor
  structured_delegate: glu.out
@ -9466,12 +9486,14 @@
  dispatch:
    CPU: glu_backward_cpu_out
    CUDA: glu_backward_cuda_out
+    MPS: glu_backward_mps_out

 - func: glu_backward(Tensor grad_output, Tensor self, int dim) -> Tensor
  python_module: nn
  dispatch:
    CPU: glu_backward_cpu
    CUDA: glu_backward_cuda
+    MPS: glu_backward_mps

 - func: glu_jvp(Tensor glu, Tensor x, Tensor dx, int dim) -> Tensor
  python_module: nn
@ -9671,6 +9693,7 @@
  python_module: nn
  dispatch:
    CPU, CUDA: softplus_out
+    MPS: softplus_out_mps

 - func: softplus(Tensor self, Scalar beta=1, Scalar threshold=20) -> Tensor
  structured_delegate: softplus.out
@ -9683,6 +9706,7 @@
  python_module: nn
  dispatch:
    CPU, CUDA: softplus_backward_out
+    MPS: softplus_backward_out_mps

 - func: softplus_backward(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold) -> Tensor
  structured_delegate: softplus_backward.grad_input
@ -9781,6 +9805,7 @@
  dispatch:
    CPU: adaptive_max_pool2d_out_cpu
    CUDA: adaptive_max_pool2d_out_cuda
+    MPS: adaptive_max_pool2d_out_mps

 # Return: (Tensor output, Tensor indices)
 - func: adaptive_max_pool2d(Tensor self, int[2] output_size) -> (Tensor, Tensor)
@ -9793,6 +9818,7 @@
  dispatch:
    CPU: adaptive_max_pool2d_backward_out_cpu
    CUDA: adaptive_max_pool2d_backward_out_cuda
+    MPS: adaptive_max_pool2d_backward_out_mps

 - func: adaptive_max_pool2d_backward(Tensor grad_output, Tensor self, Tensor indices) -> Tensor
  python_module: nn
--- a/aten/src/ATen/native/nested/NestedTensorMath.cpp
+++ b/aten/src/ATen/native/nested/NestedTensorMath.cpp
@ -316,7 +316,17 @@ Tensor nested_from_padded_generic(
           padded.size(2),
           padded.size(1) * padded.size(3)});
  }
-  const auto target_size = NestedTensor_get_max_size_from_size_tensor(sizes);
+  auto target_size = NestedTensor_get_max_size_from_size_tensor(sizes);
+  // There may be extra padding on padded beyond the max size in the nested tensor.
+  // Make the mask size match.
+  const size_t dim = padded_transformed.dim();
+  TORCH_CHECK(dim - 1 == target_size.size(), "dim: ", dim, "target_size: ", target_size.size());
+  for (size_t ii = 0; ii < dim - 1; ++ii) {
+    const auto padded_size_i = padded_transformed.sizes()[ii + 1];
+    if (target_size[ii] < padded_size_i) {
+      target_size[ii] = padded_size_i;
+    }
+  }
  IntArrayRef target_size_arr(target_size);
  std::vector<at::Tensor> masks;
  std::vector<at::Tensor> all_sizes = sizes.unbind();
--- a/aten/src/ATen/native/quantized/cudnn/Conv.cpp
+++ b/aten/src/ATen/native/quantized/cudnn/Conv.cpp
@ -129,7 +129,7 @@ void PackedConvWeightCudnn<kSpatialDim>::apply_impl_helper(const at::Tensor& qua
  auto padding_vec = padding_.vec();
  auto stride_vec = stride_.vec();
  auto dilation_vec = dilation_.vec();
-  setConvolutionParams(&key.params, input, maybe_padded_weight_, padding_vec, stride_vec, dilation_vec, groups_, deterministic, allow_tf32);
+  setConvolutionParams(&key.params, input, maybe_padded_weight_, padding_vec, stride_vec, dilation_vec, groups_, deterministic, allow_tf32, input.suggest_memory_format());

  // operator datatype needs to be int32 for int8 convolution, but we can
  // set the datatype for output tensor to int32 or fp32
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@ -981,8 +981,17 @@ elseif(USE_CUDA)
    target_link_libraries(torch_cuda_linalg PRIVATE
        torch_cpu
        torch_cuda
-        ${CUDA_cusolver_LIBRARY}
    )
+    if($ENV{ATEN_STATIC_CUDA})
+      target_link_libraries(torch_cuda_linalg PRIVATE
+          ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcusolver_static.a
+          ${CUDA_TOOLKIT_ROOT_DIR}/lib64/liblapack_static.a     # needed for libcusolver_static
+      )
+    else()
+      target_link_libraries(torch_cuda_linalg PRIVATE
+          ${CUDA_cusolver_LIBRARY}
+      )
+    endif()
    # NS: TODO, is this really necessary?
    if(USE_MAGMA AND CAFFE2_STATIC_LINK_CUDA)
      target_link_libraries(torch_cuda_linalg PRIVATE
--- a/cmake/public/cuda.cmake
+++ b/cmake/public/cuda.cmake
@ -289,10 +289,7 @@ add_library(caffe2::cublas INTERFACE IMPORTED)
 if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
    set_property(
        TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
-        "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublas_static.a")
-    set_property(
-      TARGET caffe2::cublas APPEND PROPERTY INTERFACE_LINK_LIBRARIES
-      "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublasLt_static.a")
+        ${CUDA_CUBLAS_LIBRARIES})
    # Add explicit dependency to cudart_static to fix
    # libcublasLt_static.a.o): undefined reference to symbol 'cudaStreamWaitEvent'
    # error adding symbols: DSO missing from command line
--- a/docs/cpp/source/check-doxygen.sh
+++ b/docs/cpp/source/check-doxygen.sh
@ -16,7 +16,7 @@ pushd "$(dirname "$0")/../../.."

 cp torch/_utils_internal.py tools/shared

-python -m torchgen.gen
+python -m torchgen.gen --source-path aten/src/ATen

 python tools/setup_helpers/generate_code.py                 \
  --native-functions-path aten/src/ATen/native/native_functions.yaml \
--- a/docs/source/data.rst
+++ b/docs/source/data.rst
@ -111,7 +111,8 @@ Loading Batched and Non-Batched Data

 :class:`~torch.utils.data.DataLoader` supports automatically collating
 individual fetched data samples into batches via arguments
-:attr:`batch_size`, :attr:`drop_last`, and :attr:`batch_sampler`.
+:attr:`batch_size`, :attr:`drop_last`, :attr:`batch_sampler`, and
+:attr:`collate_fn` (which has a default function).


 Automatic batching (default)
@ -209,7 +210,8 @@ arrays in PyTorch tensors.
 **When automatic batching is enabled**, :attr:`collate_fn` is called with a list
 of data samples at each time. It is expected to collate the input samples into
 a batch for yielding from the data loader iterator. The rest of this section
-describes behavior of the default :attr:`collate_fn` in this case.
+describes the behavior of the default :attr:`collate_fn`
+(:func:`~torch.utils.data.default_collate`).

 For instance, if each data sample consists of a 3-channel image and an integral
 class label, i.e., each element of the dataset returns a tuple
@ -232,6 +234,10 @@ Users may use customized :attr:`collate_fn` to achieve custom batching, e.g.,
 collating along a dimension other than the first, padding sequences of
 various lengths, or adding support for custom data types.

+If you run into a situation where the outputs of :class:`~torch.utils.data.DataLoader`
+have dimensions or type that is different from your expectation, you may
+want to check your :attr:`collate_fn`.
+
 Single- and Multi-process Data Loading
 --------------------------------------

--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@ -56,6 +56,7 @@ Features described in this documentation are classified by release status:
   tensor_view
   torch.amp <amp>
   torch.autograd <autograd>
+   torch.library <library>
   cuda
   torch.backends <backends>
   torch.distributed <distributed>
--- a/docs/source/jit.rst
+++ b/docs/source/jit.rst
@ -874,6 +874,11 @@ now supported.
            b = 2
        return x, b

+Fusion Backends
+~~~~~~~~~~~~~~~
+There are a couple of fusion backends available to optimize TorchScript execution. The default fuser on CPUs is NNC, which can perform fusions for both CPUs and GPUs. The default fuser on GPUs is NVFuser, which supports a wider range of operators and has demonstrated generated kernels with improved throughput. See the  `NVFuser documentation <https://github.com/pytorch/pytorch/blob/release/1.12/torch/csrc/jit/codegen/cuda/README.md>`_ for more details on usage and debugging.
+
+
 References
 ~~~~~~~~~~
 .. toctree::
--- a/docs/source/library.rst
+++ b/docs/source/library.rst
@ -0,0 +1,42 @@
+torch.library
+===================================
+
+Python operator registration API provides capabilities for extending PyTorch's core library
+of operators with user defined operators. Currently, this can be done in two ways:
+
+#. Creating new libraries
+
+   * Lets you to register **new operators** and kernels for various backends and functionalities by specifying the appropriate dispatch keys. For example,
+
+      * Consider registering a new operator ``add`` in your newly created namespace ``foo``. You can access this operator using the ``torch.ops`` API and calling into by calling ``torch.ops.foo.add``. You can also access specific registered overloads by calling ``torch.ops.foo.add.{overload_name}``.
+
+      * If you registered a new kernel for the ``CUDA`` dispatch key for this operator, then your custom defined function will be called for CUDA tensor inputs.
+
+   * This can be done by creating Library class objects of ``"DEF"`` kind.
+
+#. Extending existing C++ libraries (e.g., aten)
+
+   * Lets you register kernels for **existing operators** corresponding to various backends and functionalities by specifying the appropriate dispatch keys.
+
+   * This may come in handy to fill up spotty operator support for a feature implemented through a dispatch key. For example.,
+
+      * You can add operator support for Meta Tensors (by registering function to the ``Meta`` dispatch key).
+
+   * This can be done by creating Library class objects of ``"IMPL"`` kind.
+
+A tutorial that walks you through some examples on how to use this API is available on `Google Colab <https://colab.research.google.com/drive/1RRhSfk7So3Cn02itzLWE9K4Fam-8U011?usp=sharing>`_.
+
+.. warning::
+  Dispatcher is a complicated PyTorch concept and having a sound understanding of Dispatcher is crucial
+  to be able to do anything advanced with this API. `This blog post <http://blog.ezyang.com/2020/09/lets-talk-about-the-pytorch-dispatcher/>`_
+  is a good starting point to learn about Dispatcher.
+
+.. currentmodule:: torch.library
+
+.. autoclass:: torch.library.Library
+  :members:
+
+We have also added some function decorators to make it convenient to register functions for operators:
+
+* :func:`torch.library.impl`
+* :func:`torch.library.define`
--- a/docs/source/nn.functional.rst
+++ b/docs/source/nn.functional.rst
@ -120,6 +120,7 @@ Dropout functions
    dropout
    alpha_dropout
    feature_alpha_dropout
+    dropout1d
    dropout2d
    dropout3d

--- a/Show More
+++ b/Show More