[RelEng] Define BUILD_BUNDLE_PTXAS (#119750 ) (#119988 )

Co-authored-by: Nikita Shulga <nshulga@meta.com> Fixes https://github.com/pytorch/pytorch/issues/119054 resolved: https://github.com/pytorch/pytorch/pull/119750
fix compile DTensor.from_local in trace_rule_look up (#119659 ) (#119941 )
2025-10-29 11:14:56 +08:00 · 2024-02-15 13:19:00 -05:00 · 2024-02-15 12:46:55 -05:00 · 2024-02-15 12:45:22 -05:00 · 2024-02-15 10:14:52 -05:00 · 2024-02-14 15:46:31 -05:00
1768 changed files with 51623 additions and 80816 deletions
--- a/.ci/docker/README.md
+++ b/.ci/docker/README.md
@ -19,7 +19,6 @@ See `build.sh` for valid build environments (it's the giant switch).
 * `ubuntu` -- Dockerfile for Ubuntu image for CPU build and test jobs
 * `ubuntu-cuda` -- Dockerfile for Ubuntu image with CUDA support for nvidia-docker
 * `ubuntu-rocm` -- Dockerfile for Ubuntu image with ROCm support
-* `ubuntu-xpu` -- Dockerfile for Ubuntu image with XPU support

 ## Usage

--- a/.ci/docker/build.sh
+++ b/.ci/docker/build.sh
@ -71,8 +71,6 @@ if [[ "$image" == *cuda* && "$UBUNTU_VERSION" != "22.04" ]]; then
  DOCKERFILE="${OS}-cuda/Dockerfile"
 elif [[ "$image" == *rocm* ]]; then
  DOCKERFILE="${OS}-rocm/Dockerfile"
-elif [[ "$image" == *xpu* ]]; then
-  DOCKERFILE="${OS}-xpu/Dockerfile"
 elif [[ "$image" == *cuda*linter* ]]; then
  # Use a separate Dockerfile for linter to keep a small image size
  DOCKERFILE="linter-cuda/Dockerfile"
@ -220,16 +218,6 @@ case "$image" in
    CONDA_CMAKE=yes
    TRITON=yes
    ;;
-  pytorch-linux-jammy-xpu-2024.0-py3)
-    ANACONDA_PYTHON_VERSION=3.8
-    GCC_VERSION=11
-    PROTOBUF=yes
-    DB=yes
-    VISION=yes
-    BASEKIT_VERSION=2024.0.0-49522
-    NINJA_VERSION=1.9.0
-    CONDA_CMAKE=yes
-    ;;
    pytorch-linux-jammy-py3.8-gcc11-inductor-benchmarks)
    ANACONDA_PYTHON_VERSION=3.8
    GCC_VERSION=11
@ -386,7 +374,6 @@ docker build \
       --build-arg "DOCS=${DOCS}" \
       --build-arg "INDUCTOR_BENCHMARKS=${INDUCTOR_BENCHMARKS}" \
       --build-arg "EXECUTORCH=${EXECUTORCH}" \
-       --build-arg "BASEKIT_VERSION=${BASEKIT_VERSION}" \
       -f $(dirname ${DOCKERFILE})/Dockerfile \
       -t "$tmp_tag" \
       "$@" \
--- a/.ci/docker/ci_commit_pins/executorch.txt
+++ b/.ci/docker/ci_commit_pins/executorch.txt
@ -1 +1 @@
-663882fe7dc518c04adf3d2ee5ccb7d99f41ade4
+b2f5dfe80704404298467347b8ee3ac229efed47
--- a/.ci/docker/ci_commit_pins/triton.txt
+++ b/.ci/docker/ci_commit_pins/triton.txt
@ -1 +1 @@
-e28a256d71f3cf2bcc7b69d6bda73a9b855e385e
+bcad9dabe15021c53b6a88296e9d7a210044f108
--- a/.ci/docker/common/install_base.sh
+++ b/.ci/docker/common/install_base.sh
@ -75,7 +75,6 @@ install_ubuntu() {
    libtool \
    vim \
    unzip \
-    gpg-agent \
    gdb

  # Should resolve issues related to various apt package repository cert issues
--- a/.ci/docker/common/install_cudnn.sh
+++ b/.ci/docker/common/install_cudnn.sh
@ -2,8 +2,8 @@

 if [[ ${CUDNN_VERSION} == 8 ]]; then
    # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
-    mkdir tmp_cudnn
-    pushd tmp_cudnn
+    mkdir tmp_cudnn && cd tmp_cudnn
+    CUDNN_NAME="cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive"
    if [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then
        CUDNN_NAME="cudnn-linux-x86_64-8.9.2.26_cuda12-archive"
        curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/${CUDNN_NAME}.tar.xz
@ -11,14 +11,17 @@ if [[ ${CUDNN_VERSION} == 8 ]]; then
        CUDNN_NAME="cudnn-linux-x86_64-8.7.0.84_cuda11-archive"
        curl --retry 3 -OLs https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/${CUDNN_NAME}.tar.xz
    else
-        print "Unsupported CUDA version ${CUDA_VERSION}"
-        exit 1
+        curl --retry 3 -OLs https://developer.download.nvidia.com/compute/redist/cudnn/v8.3.2/local_installers/11.5/${CUDNN_NAME}.tar.xz
    fi

    tar xf ${CUDNN_NAME}.tar.xz
+    cp -a ${CUDNN_NAME}/include/* /usr/include/
    cp -a ${CUDNN_NAME}/include/* /usr/local/cuda/include/
+    cp -a ${CUDNN_NAME}/include/* /usr/include/x86_64-linux-gnu/
+
    cp -a ${CUDNN_NAME}/lib/* /usr/local/cuda/lib64/
-    popd
+    cp -a ${CUDNN_NAME}/lib/* /usr/lib/x86_64-linux-gnu/
+    cd ..
    rm -rf tmp_cudnn
    ldconfig
 fi
--- a/.ci/docker/common/install_cusparselt.sh
+++ b/.ci/docker/common/install_cusparselt.sh
@ -1,21 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-# cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html
-mkdir tmp_cusparselt && cd tmp_cusparselt
-
-if [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then
-    CUSPARSELT_NAME="libcusparse_lt-linux-x86_64-0.5.2.1-archive"
-    curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/${CUSPARSELT_NAME}.tar.xz
-elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then
-    CUSPARSELT_NAME="libcusparse_lt-linux-x86_64-0.4.0.7-archive"
-    curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/${CUSPARSELT_NAME}.tar.xz
-fi
-
-tar xf ${CUSPARSELT_NAME}.tar.xz
-cp -a ${CUSPARSELT_NAME}/include/* /usr/local/cuda/include/
-cp -a ${CUSPARSELT_NAME}/lib/* /usr/local/cuda/lib64/
-cd ..
-rm -rf tmp_cusparselt
-ldconfig
--- a/.ci/docker/common/install_xpu.sh
+++ b/.ci/docker/common/install_xpu.sh
@ -1,115 +0,0 @@
-#!/bin/bash
-set -xe
-
-
-# Intel® software for general purpose GPU capabilities.
-# Refer to https://dgpu-docs.intel.com/releases/stable_647_21_20230714.html
-
-# Intel® oneAPI Base Toolkit (version 2024.0.0) has been updated to include functional and security updates.
-# Refer to https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html
-
-# Users should update to the latest version as it becomes available
-
-function install_ubuntu() {
-    apt-get update -y
-    apt-get install -y gpg-agent wget
-
-    # Set up the repository. To do this, download the key to the system keyring
-    wget -qO - https://repositories.intel.com/gpu/intel-graphics.key \
-        | gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg
-    wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
-        | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
-
-    # Add the signed entry to APT sources and configure the APT client to use the Intel repository
-    echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/production/2328 unified" \
-        | tee /etc/apt/sources.list.d/intel-gpu-jammy.list
-    echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \
-        | tee /etc/apt/sources.list.d/oneAPI.list
-
-    # Update the packages list and repository index
-    apt-get update
-
-    # The xpu-smi packages
-    apt-get install -y flex bison xpu-smi
-    # Compute and Media Runtimes
-    apt-get install -y \
-        intel-opencl-icd intel-level-zero-gpu level-zero \
-        intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \
-        libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
-        libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
-        mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo
-    # Development Packages
-    apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev
-    # Install Intel® oneAPI Base Toolkit
-    if [ -n "$BASEKIT_VERSION" ]; then
-        apt-get install intel-basekit=$BASEKIT_VERSION -y
-    else
-        apt-get install intel-basekit -y
-    fi
-
-    # Cleanup
-    apt-get autoclean && apt-get clean
-    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
-}
-
-function install_centos() {
-    dnf install -y 'dnf-command(config-manager)'
-    dnf config-manager --add-repo \
-        https://repositories.intel.com/gpu/rhel/8.6/production/2328/unified/intel-gpu-8.6.repo
-    # To add the EPEL repository needed for DKMS
-    dnf -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm
-        # https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm
-
-    # Create the YUM repository file in the /temp directory as a normal user
-    tee > /tmp/oneAPI.repo << EOF
-[oneAPI]
-name=Intel® oneAPI repository
-baseurl=https://yum.repos.intel.com/oneapi
-enabled=1
-gpgcheck=1
-repo_gpgcheck=1
-gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
-EOF
-
-    # Move the newly created oneAPI.repo file to the YUM configuration directory /etc/yum.repos.d
-    mv /tmp/oneAPI.repo /etc/yum.repos.d
-
-    # The xpu-smi packages
-    dnf install -y flex bison xpu-smi
-    # Compute and Media Runtimes
-    dnf install -y \
-        intel-opencl intel-media intel-mediasdk libmfxgen1 libvpl2\
-        level-zero intel-level-zero-gpu mesa-dri-drivers mesa-vulkan-drivers \
-        mesa-vdpau-drivers libdrm mesa-libEGL mesa-libgbm mesa-libGL \
-        mesa-libxatracker libvpl-tools intel-metrics-discovery \
-        intel-metrics-library intel-igc-core intel-igc-cm \
-        libva libva-utils intel-gmmlib libmetee intel-gsc intel-ocloc hwinfo clinfo
-    # Development packages
-    dnf install -y --refresh \
-        intel-igc-opencl-devel level-zero-devel intel-gsc-devel libmetee-devel \
-        level-zero-devel
-    # Install Intel® oneAPI Base Toolkit
-    dnf install intel-basekit -y
-
-    # Cleanup
-    dnf clean all
-    rm -rf /var/cache/yum
-    rm -rf /var/lib/yum/yumdb
-    rm -rf /var/lib/yum/history
-}
-
-
-# The installation depends on the base OS
-ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
-case "$ID" in
-    ubuntu)
-        install_ubuntu
-    ;;
-    centos)
-        install_centos
-    ;;
-    *)
-        echo "Unable to determine OS..."
-        exit 1
-    ;;
-esac
--- a/.ci/docker/requirements-ci.txt
+++ b/.ci/docker/requirements-ci.txt
@ -299,7 +299,7 @@ pywavelets==1.4.1
 #Pinned versions: 1.4.1
 #test that import:

-lxml==5.0.0.
+lxml==4.9.4
 #Description: This is a requirement of unittest-xml-reporting
-
+# have to pin to 4.9.4 because 5.0.0 release on Dec 29th missing
 # Python-3.9 binaries
--- a/.ci/docker/ubuntu-cuda/Dockerfile
+++ b/.ci/docker/ubuntu-cuda/Dockerfile
@ -142,12 +142,6 @@ COPY ./common/install_cudnn.sh install_cudnn.sh
 RUN if [ "${CUDNN_VERSION}" -eq 8 ]; then bash install_cudnn.sh; fi
 RUN rm install_cudnn.sh

-# Install CUSPARSELT
-ARG CUDA_VERSION
-COPY ./common/install_cusparselt.sh install_cusparselt.sh
-RUN bash install_cusparselt.sh
-RUN rm install_cusparselt.sh
-
 # Delete /usr/local/cuda-11.X/cuda-11.X symlinks
 RUN if [ -h /usr/local/cuda-11.6/cuda-11.6 ]; then rm /usr/local/cuda-11.6/cuda-11.6; fi
 RUN if [ -h /usr/local/cuda-11.7/cuda-11.7 ]; then rm /usr/local/cuda-11.7/cuda-11.7; fi
--- a/.ci/docker/ubuntu-xpu/Dockerfile
+++ b/.ci/docker/ubuntu-xpu/Dockerfile
@ -1,118 +0,0 @@
-ARG UBUNTU_VERSION
-
-FROM ubuntu:${UBUNTU_VERSION}
-
-ARG UBUNTU_VERSION
-
-ENV DEBIAN_FRONTEND noninteractive
-
-ARG CLANG_VERSION
-
-# Install common dependencies (so that this step can be cached separately)
-COPY ./common/install_base.sh install_base.sh
-RUN bash ./install_base.sh && rm install_base.sh
-
-# Install clang
-ARG LLVMDEV
-COPY ./common/install_clang.sh install_clang.sh
-RUN bash ./install_clang.sh && rm install_clang.sh
-
-# Install user
-COPY ./common/install_user.sh install_user.sh
-RUN bash ./install_user.sh && rm install_user.sh
-
-# Install katex
-ARG KATEX
-COPY ./common/install_docs_reqs.sh install_docs_reqs.sh
-RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh
-
-# Install conda and other packages (e.g., numpy, pytest)
-ARG ANACONDA_PYTHON_VERSION
-ARG CONDA_CMAKE
-ARG DOCS
-ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
-ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
-ENV DOCS=$DOCS
-COPY requirements-ci.txt requirements-docs.txt /opt/conda/
-COPY ./common/install_conda.sh install_conda.sh
-COPY ./common/common_utils.sh common_utils.sh
-RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt /opt/conda/requirements-docs.txt
-
-# Install gcc
-ARG GCC_VERSION
-COPY ./common/install_gcc.sh install_gcc.sh
-RUN bash ./install_gcc.sh && rm install_gcc.sh
-
-# Install lcov for C++ code coverage
-COPY ./common/install_lcov.sh install_lcov.sh
-RUN  bash ./install_lcov.sh && rm install_lcov.sh
-
-COPY ./common/install_openssl.sh install_openssl.sh
-RUN bash ./install_openssl.sh
-ENV OPENSSL_ROOT_DIR /opt/openssl
-ENV OPENSSL_DIR /opt/openssl
-RUN rm install_openssl.sh
-
-ARG INDUCTOR_BENCHMARKS
-COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
-COPY ./common/common_utils.sh common_utils.sh
-COPY ci_commit_pins/huggingface.txt huggingface.txt
-COPY ci_commit_pins/timm.txt timm.txt
-RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
-RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt
-
-ARG TRITON
-# Install triton, this needs to be done before sccache because the latter will
-# try to reach out to S3, which docker build runners don't have access
-COPY ./common/install_triton.sh install_triton.sh
-COPY ./common/common_utils.sh common_utils.sh
-# TODO: will add triton xpu commit
-COPY ci_commit_pins/triton.txt triton.txt
-RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
-RUN rm install_triton.sh common_utils.sh triton.txt
-
-# (optional) Install database packages like LMDB and LevelDB
-ARG DB
-COPY ./common/install_db.sh install_db.sh
-RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
-RUN rm install_db.sh
-ENV INSTALLED_DB ${DB}
-
-# (optional) Install vision packages like OpenCV and ffmpeg
-ARG VISION
-COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
-RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
-RUN rm install_vision.sh cache_vision_models.sh common_utils.sh
-ENV INSTALLED_VISION ${VISION}
-
-# Install XPU Dependencies
-ARG BASEKIT_VERSION
-COPY ./common/install_xpu.sh install_xpu.sh
-RUN bash ./install_xpu.sh && rm install_xpu.sh
-
-# (optional) Install non-default CMake version
-ARG CMAKE_VERSION
-COPY ./common/install_cmake.sh install_cmake.sh
-RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
-RUN rm install_cmake.sh
-
-# (optional) Install non-default Ninja version
-ARG NINJA_VERSION
-COPY ./common/install_ninja.sh install_ninja.sh
-RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
-RUN rm install_ninja.sh
-
-# Install ccache/sccache (do this last, so we get priority in PATH)
-COPY ./common/install_cache.sh install_cache.sh
-ENV PATH /opt/cache/bin:$PATH
-RUN bash ./install_cache.sh && rm install_cache.sh
-
-# Include BUILD_ENVIRONMENT environment variable in image
-ARG BUILD_ENVIRONMENT
-ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
-
-# Install LLVM dev version (Defined in the pytorch/builder github repository)
-COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
-
-USER jenkins
-CMD ["bash"]
--- a/.ci/pytorch/build.sh
+++ b/.ci/pytorch/build.sh
@ -153,12 +153,6 @@ if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
  python tools/amd_build/build_amd.py
 fi

-if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
-  # shellcheck disable=SC1091
-  source /opt/intel/oneapi/compiler/latest/env/vars.sh
-  export USE_XPU=1
-fi
-
 # sccache will fail for CUDA builds if all cores are used for compiling
 # gcc 7 with sccache seems to have intermittent OOM issue if all cores are used
 if [ -z "$MAX_JOBS" ]; then
--- a/.ci/pytorch/common_utils.sh
+++ b/.ci/pytorch/common_utils.sh
@ -173,7 +173,7 @@ function install_torchrec_and_fbgemm() {

 function clone_pytorch_xla() {
  if [[ ! -d ./xla ]]; then
-    git clone --recursive --quiet https://github.com/pytorch/xla.git
+    git clone --recursive -b r2.2 https://github.com/pytorch/xla.git
    pushd xla
    # pin the xla hash so that we don't get broken by changes to xla
    git checkout "$(cat ../.github/ci_commit_pins/xla.txt)"
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@ -18,10 +18,6 @@ BUILD_DIR="build"
 BUILD_RENAMED_DIR="build_renamed"
 BUILD_BIN_DIR="$BUILD_DIR"/bin

-#Set Default values for these variables in case they are not set
-SHARD_NUMBER="${SHARD_NUMBER:=1}"
-NUM_TEST_SHARDS="${NUM_TEST_SHARDS:=1}"
-
 export VALGRIND=ON
 # export TORCH_INDUCTOR_INSTALL_GXX=ON
 if [[ "$BUILD_ENVIRONMENT" == *clang9* ]]; then
@ -128,8 +124,6 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda* || "$BUILD_ENVIRONMENT" == *rocm* ]]; then
  # mainly used so that we're not spending extra cycles testing cpu
  # devices on expensive gpu machines
  export PYTORCH_TESTING_DEVICE_ONLY_FOR="cuda"
-elif [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
-  export PYTORCH_TESTING_DEVICE_ONLY_FOR="xpu"
 fi

 if [[ "$TEST_CONFIG" == *crossref* ]]; then
@ -142,15 +136,6 @@ if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
  rocminfo | grep -E 'Name:.*\sgfx|Marketing'
 fi

-if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
-  # Source Intel oneAPI envrioment script to enable xpu runtime related libraries
-  # refer to https://www.intel.com/content/www/us/en/docs/oneapi/programming-guide/2024-0/use-the-setvars-and-oneapi-vars-scripts-with-linux.html
-  # shellcheck disable=SC1091
-  source /opt/intel/oneapi/compiler/latest/env/vars.sh
-  # Check XPU status before testing
-  xpu-smi discovery
-fi
-
 if [[ "$BUILD_ENVIRONMENT" != *-bazel-* ]] ; then
  # JIT C++ extensions require ninja.
  pip_install --user "ninja==1.10.2"
@ -274,7 +259,6 @@ test_dynamo_shard() {
    --exclude-jit-executor \
    --exclude-distributed-tests \
    --exclude \
-      test_ao_sparsity \
      test_autograd \
      test_jit \
      test_proxy_tensor \
@ -324,10 +308,8 @@ test_inductor() {

  # docker build uses bdist_wheel which does not work with test_aot_inductor
  # TODO: need a faster way to build
-  if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
-      BUILD_AOT_INDUCTOR_TEST=1 python setup.py develop
-      CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aot_inductor
-  fi
+  BUILD_AOT_INDUCTOR_TEST=1 python setup.py develop
+  CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aot_inductor
 }

 # "Global" flags for inductor benchmarking controlled by TEST_CONFIG
@ -407,8 +389,8 @@ test_perf_for_dashboard() {
            --output "$TEST_REPORTS_DIR/${backend}_dynamic_${suite}_${dtype}_${mode}_cuda_${target}.csv"
      fi
      if [[ "$DASHBOARD_TAG" == *cppwrapper-true* ]] && [[ "$mode" == "inference" ]]; then
-        TORCHINDUCTOR_CPP_WRAPPER=1 python "benchmarks/dynamo/$suite.py" \
-            "${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" --disable-cudagraphs "$@" \
+        python "benchmarks/dynamo/$suite.py" \
+            "${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" --disable-cudagraphs --cpp-wrapper "$@" \
            --output "$TEST_REPORTS_DIR/${backend}_cpp_wrapper_${suite}_${dtype}_${mode}_cuda_${target}.csv"
      fi
      if [[ "$DASHBOARD_TAG" == *freezing_cudagraphs-true* ]] && [[ "$mode" == "inference" ]]; then
@ -509,13 +491,6 @@ test_inductor_torchbench_smoketest_perf() {
  TEST_REPORTS_DIR=$(pwd)/test/test-reports
  mkdir -p "$TEST_REPORTS_DIR"

-  # smoke test the cpp_wrapper mode
-  TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/torchbench.py --device cuda --accuracy --bfloat16 \
-    --inference --inductor --only hf_T5 --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_smoketest.csv"
-  python benchmarks/dynamo/check_accuracy.py \
-      --actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_smoketest.csv" \
-      --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv"
-
  python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --float16 --training \
    --batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" --only hf_Bert \
    --output "$TEST_REPORTS_DIR/inductor_training_smoketest.csv"
@ -525,11 +500,7 @@ test_inductor_torchbench_smoketest_perf() {
  python benchmarks/dynamo/torchbench.py --device cuda --performance --bfloat16 --inference \
    --export-aot-inductor --only nanogpt --output "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv"
  # The threshold value needs to be actively maintained to make this check useful
-  # The perf number of nanogpt seems not very stable, e.g.
-  # https://github.com/pytorch/pytorch/actions/runs/7158691360/job/19491437314,
-  # and thus we lower its threshold to reduce flakiness. If this continues to be a problem,
-  # we switch to use some other model.
-  python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" -t 4.9
+  python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" -t 5.2

  # Check memory compression ratio for a few models
  for test in hf_Albert timm_vision_transformer; do
@ -689,20 +660,6 @@ test_libtorch_api() {
  fi
 }

-test_xpu_bin(){
-  TEST_REPORTS_DIR=$(pwd)/test/test-reports
-  mkdir -p "$TEST_REPORTS_DIR"
-
-  for xpu_case in "${BUILD_BIN_DIR}"/*{xpu,sycl}*
-  do
-    if [[ "$xpu_case" != *"*"* ]]; then
-      case_name=$(basename "$xpu_case")
-      echo "Testing ${case_name} ..."
-      "$xpu_case" --gtest_output=xml:"$TEST_REPORTS_DIR"/"$case_name".xml
-    fi
-  done
-}
-
 test_aot_compilation() {
  echo "Testing Ahead of Time compilation"
  ln -sf "$TORCH_LIB_DIR"/libc10* "$TORCH_BIN_DIR"
@ -1112,7 +1069,7 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
  # https://github.com/opencv/opencv-python/issues/885
  pip_install opencv-python==4.8.0.74
  if [[ "${TEST_CONFIG}" == *inductor_torchbench_smoketest_perf* ]]; then
-    checkout_install_torchbench hf_Bert hf_Albert nanogpt timm_vision_transformer
+    checkout_install_torchbench hf_Bert hf_Albert timm_vision_transformer
    PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_smoketest_perf
  else
    checkout_install_torchbench
@ -1128,21 +1085,19 @@ elif [[ "${TEST_CONFIG}" == *inductor* && "${SHARD_NUMBER}" == 1 ]]; then
  test_inductor
  test_inductor_distributed
 elif [[ "${TEST_CONFIG}" == *dynamo* && "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
+  test_without_numpy
  install_torchvision
  test_dynamo_shard 1
  test_aten
-elif [[ "${TEST_CONFIG}" == *dynamo* && $SHARD_NUMBER -gt 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
+elif [[ "${TEST_CONFIG}" == *dynamo* && "${SHARD_NUMBER}" == 2 && $NUM_TEST_SHARDS -gt 1 ]]; then
  install_torchvision
-  test_dynamo_shard "${SHARD_NUMBER}"
+  test_dynamo_shard 2
 elif [[ "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
  test_without_numpy
  install_torchvision
  test_python_shard 1
  test_aten
  test_libtorch 1
-  if [[ "${BUILD_ENVIRONMENT}" == *xpu* ]]; then
-    test_xpu_bin
-  fi
 elif [[ "${SHARD_NUMBER}" == 2 && $NUM_TEST_SHARDS -gt 1 ]]; then
  install_torchvision
  test_python_shard 2
@ -1167,11 +1122,6 @@ elif [[ "${BUILD_ENVIRONMENT}" == *rocm* && -n "$TESTS_TO_INCLUDE" ]]; then
  install_torchvision
  test_python
  test_aten
-elif [[ "${BUILD_ENVIRONMENT}" == *xpu* ]]; then
-  install_torchvision
-  test_python
-  test_aten
-  test_xpu_bin
 else
  install_torchvision
  install_monkeytype
--- a/.circleci/cimodel/init.py
+++ b/.circleci/cimodel/init.py
--- a/.circleci/cimodel/data/init.py
+++ b/.circleci/cimodel/data/init.py
--- a/.circleci/cimodel/data/binary_build_data.py
+++ b/.circleci/cimodel/data/binary_build_data.py
@ -0,0 +1,198 @@
+"""
+This module models the tree of configuration variants
+for "smoketest" builds.
+
+Each subclass of ConfigNode represents a layer of the configuration hierarchy.
+These tree nodes encapsulate the logic for whether a branch of the hierarchy
+should be "pruned".
+"""
+
+from collections import OrderedDict
+
+import cimodel.data.dimensions as dimensions
+
+from cimodel.lib.conf_tree import ConfigNode
+
+
+LINKING_DIMENSIONS = [
+    "shared",
+    "static",
+]
+
+
+DEPS_INCLUSION_DIMENSIONS = [
+    "with-deps",
+    "without-deps",
+]
+
+
+def get_processor_arch_name(gpu_version):
+    return (
+        "cpu"
+        if not gpu_version
+        else (
+            "cu" + gpu_version.strip("cuda")
+            if gpu_version.startswith("cuda")
+            else gpu_version
+        )
+    )
+
+
+CONFIG_TREE_DATA = OrderedDict()
+
+# GCC config variants:
+#
+# All the nightlies (except libtorch with new gcc ABI) are built with devtoolset7,
+# which can only build with old gcc ABI. It is better than devtoolset3
+# because it understands avx512, which is needed for good fbgemm performance.
+#
+# Libtorch with new gcc ABI is built with gcc 5.4 on Ubuntu 16.04.
+LINUX_GCC_CONFIG_VARIANTS = OrderedDict(
+    manywheel=["devtoolset7"],
+    conda=["devtoolset7"],
+    libtorch=[
+        "devtoolset7",
+        "gcc5.4_cxx11-abi",
+    ],
+)
+
+WINDOWS_LIBTORCH_CONFIG_VARIANTS = [
+    "debug",
+    "release",
+]
+
+
+class TopLevelNode(ConfigNode):
+    def __init__(self, node_name, config_tree_data, smoke):
+        super().__init__(None, node_name)
+
+        self.config_tree_data = config_tree_data
+        self.props["smoke"] = smoke
+
+    def get_children(self):
+        return [
+            OSConfigNode(self, x, c, p) for (x, (c, p)) in self.config_tree_data.items()
+        ]
+
+
+class OSConfigNode(ConfigNode):
+    def __init__(self, parent, os_name, gpu_versions, py_tree):
+        super().__init__(parent, os_name)
+
+        self.py_tree = py_tree
+        self.props["os_name"] = os_name
+        self.props["gpu_versions"] = gpu_versions
+
+    def get_children(self):
+        return [PackageFormatConfigNode(self, k, v) for k, v in self.py_tree.items()]
+
+
+class PackageFormatConfigNode(ConfigNode):
+    def __init__(self, parent, package_format, python_versions):
+        super().__init__(parent, package_format)
+
+        self.props["python_versions"] = python_versions
+        self.props["package_format"] = package_format
+
+    def get_children(self):
+        if self.find_prop("os_name") == "linux":
+            return [
+                LinuxGccConfigNode(self, v)
+                for v in LINUX_GCC_CONFIG_VARIANTS[self.find_prop("package_format")]
+            ]
+        elif (
+            self.find_prop("os_name") == "windows"
+            and self.find_prop("package_format") == "libtorch"
+        ):
+            return [
+                WindowsLibtorchConfigNode(self, v)
+                for v in WINDOWS_LIBTORCH_CONFIG_VARIANTS
+            ]
+        else:
+            return [ArchConfigNode(self, v) for v in self.find_prop("gpu_versions")]
+
+
+class LinuxGccConfigNode(ConfigNode):
+    def __init__(self, parent, gcc_config_variant):
+        super().__init__(parent, "GCC_CONFIG_VARIANT=" + str(gcc_config_variant))
+
+        self.props["gcc_config_variant"] = gcc_config_variant
+
+    def get_children(self):
+        gpu_versions = self.find_prop("gpu_versions")
+
+        # XXX devtoolset7 on CUDA 9.0 is temporarily disabled
+        # see https://github.com/pytorch/pytorch/issues/20066
+        if self.find_prop("gcc_config_variant") == "devtoolset7":
+            gpu_versions = filter(lambda x: x != "cuda_90", gpu_versions)
+
+        # XXX disabling conda rocm build since docker images are not there
+        if self.find_prop("package_format") == "conda":
+            gpu_versions = filter(
+                lambda x: x not in dimensions.ROCM_VERSION_LABELS, gpu_versions
+            )
+
+        # XXX libtorch rocm build  is temporarily disabled
+        if self.find_prop("package_format") == "libtorch":
+            gpu_versions = filter(
+                lambda x: x not in dimensions.ROCM_VERSION_LABELS, gpu_versions
+            )
+
+        return [ArchConfigNode(self, v) for v in gpu_versions]
+
+
+class WindowsLibtorchConfigNode(ConfigNode):
+    def __init__(self, parent, libtorch_config_variant):
+        super().__init__(
+            parent, "LIBTORCH_CONFIG_VARIANT=" + str(libtorch_config_variant)
+        )
+
+        self.props["libtorch_config_variant"] = libtorch_config_variant
+
+    def get_children(self):
+        return [ArchConfigNode(self, v) for v in self.find_prop("gpu_versions")]
+
+
+class ArchConfigNode(ConfigNode):
+    def __init__(self, parent, gpu):
+        super().__init__(parent, get_processor_arch_name(gpu))
+
+        self.props["gpu"] = gpu
+
+    def get_children(self):
+        return [PyVersionConfigNode(self, v) for v in self.find_prop("python_versions")]
+
+
+class PyVersionConfigNode(ConfigNode):
+    def __init__(self, parent, pyver):
+        super().__init__(parent, pyver)
+
+        self.props["pyver"] = pyver
+
+    def get_children(self):
+        package_format = self.find_prop("package_format")
+        os_name = self.find_prop("os_name")
+
+        has_libtorch_variants = package_format == "libtorch" and os_name == "linux"
+        linking_variants = LINKING_DIMENSIONS if has_libtorch_variants else []
+
+        return [LinkingVariantConfigNode(self, v) for v in linking_variants]
+
+
+class LinkingVariantConfigNode(ConfigNode):
+    def __init__(self, parent, linking_variant):
+        super().__init__(parent, linking_variant)
+
+    def get_children(self):
+        return [
+            DependencyInclusionConfigNode(self, v) for v in DEPS_INCLUSION_DIMENSIONS
+        ]
+
+
+class DependencyInclusionConfigNode(ConfigNode):
+    def __init__(self, parent, deps_variant):
+        super().__init__(parent, deps_variant)
+
+        self.props["libtorch_variant"] = "-".join(
+            [self.parent.get_label(), self.get_label()]
+        )
--- a/.circleci/cimodel/data/binary_build_definitions.py
+++ b/.circleci/cimodel/data/binary_build_definitions.py
@ -0,0 +1,275 @@
+from collections import OrderedDict
+
+import cimodel.data.binary_build_data as binary_build_data
+
+import cimodel.data.simple.util.branch_filters as branch_filters
+import cimodel.lib.conf_tree as conf_tree
+import cimodel.lib.miniutils as miniutils
+
+
+class Conf:
+    def __init__(
+        self,
+        os,
+        gpu_version,
+        pydistro,
+        parms,
+        smoke,
+        libtorch_variant,
+        gcc_config_variant,
+        libtorch_config_variant,
+    ):
+        self.os = os
+        self.gpu_version = gpu_version
+        self.pydistro = pydistro
+        self.parms = parms
+        self.smoke = smoke
+        self.libtorch_variant = libtorch_variant
+        self.gcc_config_variant = gcc_config_variant
+        self.libtorch_config_variant = libtorch_config_variant
+
+    def gen_build_env_parms(self):
+        elems = (
+            [self.pydistro]
+            + self.parms
+            + [binary_build_data.get_processor_arch_name(self.gpu_version)]
+        )
+        if self.gcc_config_variant is not None:
+            elems.append(str(self.gcc_config_variant))
+        if self.libtorch_config_variant is not None:
+            elems.append(str(self.libtorch_config_variant))
+        return elems
+
+    def gen_docker_image(self):
+        if self.gcc_config_variant == "gcc5.4_cxx11-abi":
+            if self.gpu_version is None:
+                return miniutils.quote("pytorch/libtorch-cxx11-builder:cpu")
+            else:
+                return miniutils.quote(
+                    f"pytorch/libtorch-cxx11-builder:{self.gpu_version}"
+                )
+        if self.pydistro == "conda":
+            if self.gpu_version is None:
+                return miniutils.quote("pytorch/conda-builder:cpu")
+            else:
+                return miniutils.quote(f"pytorch/conda-builder:{self.gpu_version}")
+
+        docker_word_substitution = {
+            "manywheel": "manylinux",
+            "libtorch": "manylinux",
+        }
+
+        docker_distro_prefix = miniutils.override(
+            self.pydistro, docker_word_substitution
+        )
+
+        # The cpu nightlies are built on the pytorch/manylinux-cuda102 docker image
+        # TODO cuda images should consolidate into tag-base images similar to rocm
+        alt_docker_suffix = (
+            "cuda102"
+            if not self.gpu_version
+            else (
+                "rocm:" + self.gpu_version.strip("rocm")
+                if self.gpu_version.startswith("rocm")
+                else self.gpu_version
+            )
+        )
+        docker_distro_suffix = (
+            alt_docker_suffix
+            if self.pydistro != "conda"
+            else ("cuda" if alt_docker_suffix.startswith("cuda") else "rocm")
+        )
+        return miniutils.quote(
+            "pytorch/" + docker_distro_prefix + "-" + docker_distro_suffix
+        )
+
+    def get_name_prefix(self):
+        return "smoke" if self.smoke else "binary"
+
+    def gen_build_name(self, build_or_test, nightly):
+        parts = [self.get_name_prefix(), self.os] + self.gen_build_env_parms()
+
+        if nightly:
+            parts.append("nightly")
+
+        if self.libtorch_variant:
+            parts.append(self.libtorch_variant)
+
+        if not self.smoke:
+            parts.append(build_or_test)
+
+        joined = "_".join(parts)
+        return joined.replace(".", "_")
+
+    def gen_workflow_job(self, phase, upload_phase_dependency=None, nightly=False):
+        job_def = OrderedDict()
+        job_def["name"] = self.gen_build_name(phase, nightly)
+        job_def["build_environment"] = miniutils.quote(
+            " ".join(self.gen_build_env_parms())
+        )
+        if self.smoke:
+            job_def["requires"] = [
+                "update_s3_htmls",
+            ]
+            job_def["filters"] = branch_filters.gen_filter_dict(
+                branches_list=["postnightly"],
+            )
+        else:
+            filter_branch = r"/.*/"
+            job_def["filters"] = branch_filters.gen_filter_dict(
+                branches_list=[filter_branch],
+                tags_list=[branch_filters.RC_PATTERN],
+            )
+        if self.libtorch_variant:
+            job_def["libtorch_variant"] = miniutils.quote(self.libtorch_variant)
+        if phase == "test":
+            if not self.smoke:
+                job_def["requires"] = [self.gen_build_name("build", nightly)]
+            if not (self.smoke and self.os == "macos") and self.os != "windows":
+                job_def["docker_image"] = self.gen_docker_image()
+
+            # fix this. only works on cuda not rocm
+            if self.os != "windows" and self.gpu_version:
+                job_def["use_cuda_docker_runtime"] = miniutils.quote("1")
+        else:
+            if self.os == "linux" and phase != "upload":
+                job_def["docker_image"] = self.gen_docker_image()
+
+        if phase == "test":
+            if self.gpu_version:
+                if self.os == "windows":
+                    job_def["executor"] = "windows-with-nvidia-gpu"
+                else:
+                    job_def["resource_class"] = "gpu.medium"
+
+        os_name = miniutils.override(self.os, {"macos": "mac"})
+        job_name = "_".join([self.get_name_prefix(), os_name, phase])
+        return {job_name: job_def}
+
+    def gen_upload_job(self, phase, requires_dependency):
+        """Generate binary_upload job for configuration
+
+          Output looks similar to:
+
+        - binary_upload:
+            name: binary_linux_manywheel_3_7m_cu113_devtoolset7_nightly_upload
+            context: org-member
+            requires: binary_linux_manywheel_3_7m_cu113_devtoolset7_nightly_test
+            filters:
+              branches:
+                only:
+                  - nightly
+              tags:
+                only: /v[0-9]+(\\.[0-9]+)*-rc[0-9]+/
+            package_type: manywheel
+            upload_subfolder: cu113
+        """
+        return {
+            "binary_upload": OrderedDict(
+                {
+                    "name": self.gen_build_name(phase, nightly=True),
+                    "context": "org-member",
+                    "requires": [
+                        self.gen_build_name(requires_dependency, nightly=True)
+                    ],
+                    "filters": branch_filters.gen_filter_dict(
+                        branches_list=["nightly"],
+                        tags_list=[branch_filters.RC_PATTERN],
+                    ),
+                    "package_type": self.pydistro,
+                    "upload_subfolder": binary_build_data.get_processor_arch_name(
+                        self.gpu_version,
+                    ),
+                }
+            )
+        }
+
+
+def get_root(smoke, name):
+    return binary_build_data.TopLevelNode(
+        name,
+        binary_build_data.CONFIG_TREE_DATA,
+        smoke,
+    )
+
+
+def gen_build_env_list(smoke):
+    root = get_root(smoke, "N/A")
+    config_list = conf_tree.dfs(root)
+
+    newlist = []
+    for c in config_list:
+        conf = Conf(
+            c.find_prop("os_name"),
+            c.find_prop("gpu"),
+            c.find_prop("package_format"),
+            [c.find_prop("pyver")],
+            c.find_prop("smoke")
+            and not (c.find_prop("os_name") == "macos_arm64"),  # don't test arm64
+            c.find_prop("libtorch_variant"),
+            c.find_prop("gcc_config_variant"),
+            c.find_prop("libtorch_config_variant"),
+        )
+        newlist.append(conf)
+
+    return newlist
+
+
+def predicate_exclude_macos(config):
+    return config.os == "linux" or config.os == "windows"
+
+
+def get_nightly_uploads():
+    configs = gen_build_env_list(False)
+    mylist = []
+    for conf in configs:
+        phase_dependency = "test" if predicate_exclude_macos(conf) else "build"
+        mylist.append(conf.gen_upload_job("upload", phase_dependency))
+
+    return mylist
+
+
+def get_post_upload_jobs():
+    return [
+        {
+            "update_s3_htmls": {
+                "name": "update_s3_htmls",
+                "context": "org-member",
+                "filters": branch_filters.gen_filter_dict(
+                    branches_list=["postnightly"],
+                ),
+            },
+        },
+    ]
+
+
+def get_nightly_tests():
+    configs = gen_build_env_list(False)
+    filtered_configs = filter(predicate_exclude_macos, configs)
+
+    tests = []
+    for conf_options in filtered_configs:
+        yaml_item = conf_options.gen_workflow_job("test", nightly=True)
+        tests.append(yaml_item)
+
+    return tests
+
+
+def get_jobs(toplevel_key, smoke):
+    jobs_list = []
+    configs = gen_build_env_list(smoke)
+    phase = "build" if toplevel_key == "binarybuilds" else "test"
+    for build_config in configs:
+        # don't test for macos_arm64 as it's cross compiled
+        if phase != "test" or build_config.os != "macos_arm64":
+            jobs_list.append(build_config.gen_workflow_job(phase, nightly=True))
+
+    return jobs_list
+
+
+def get_binary_build_jobs():
+    return get_jobs("binarybuilds", False)
+
+
+def get_binary_smoke_test_jobs():
+    return get_jobs("binarysmoketests", True)
--- a/.circleci/cimodel/data/dimensions.py
+++ b/.circleci/cimodel/data/dimensions.py
@ -0,0 +1,19 @@
+PHASES = ["build", "test"]
+
+CUDA_VERSIONS = [
+    "102",
+    "113",
+    "116",
+    "117",
+]
+
+ROCM_VERSIONS = [
+    "4.3.1",
+    "4.5.2",
+]
+
+ROCM_VERSION_LABELS = ["rocm" + v for v in ROCM_VERSIONS]
+
+GPU_VERSIONS = [None] + ["cuda" + v for v in CUDA_VERSIONS] + ROCM_VERSION_LABELS
+
+STANDARD_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10"]
--- a/.circleci/cimodel/data/pytorch_build_data.py
+++ b/.circleci/cimodel/data/pytorch_build_data.py
@ -0,0 +1,296 @@
+from cimodel.lib.conf_tree import ConfigNode
+
+
+CONFIG_TREE_DATA = []
+
+
+def get_major_pyver(dotted_version):
+    parts = dotted_version.split(".")
+    return "py" + parts[0]
+
+
+class TreeConfigNode(ConfigNode):
+    def __init__(self, parent, node_name, subtree):
+        super().__init__(parent, self.modify_label(node_name))
+        self.subtree = subtree
+        self.init2(node_name)
+
+    def modify_label(self, label):
+        return label
+
+    def init2(self, node_name):
+        pass
+
+    def get_children(self):
+        return [self.child_constructor()(self, k, v) for (k, v) in self.subtree]
+
+
+class TopLevelNode(TreeConfigNode):
+    def __init__(self, node_name, subtree):
+        super().__init__(None, node_name, subtree)
+
+    # noinspection PyMethodMayBeStatic
+    def child_constructor(self):
+        return DistroConfigNode
+
+
+class DistroConfigNode(TreeConfigNode):
+    def init2(self, node_name):
+        self.props["distro_name"] = node_name
+
+    def child_constructor(self):
+        distro = self.find_prop("distro_name")
+
+        next_nodes = {
+            "xenial": XenialCompilerConfigNode,
+            "bionic": BionicCompilerConfigNode,
+        }
+        return next_nodes[distro]
+
+
+class PyVerConfigNode(TreeConfigNode):
+    def init2(self, node_name):
+        self.props["pyver"] = node_name
+        self.props["abbreviated_pyver"] = get_major_pyver(node_name)
+        if node_name == "3.9":
+            self.props["abbreviated_pyver"] = "py3.9"
+
+    # noinspection PyMethodMayBeStatic
+    def child_constructor(self):
+        return ExperimentalFeatureConfigNode
+
+
+class ExperimentalFeatureConfigNode(TreeConfigNode):
+    def init2(self, node_name):
+        self.props["experimental_feature"] = node_name
+
+    def child_constructor(self):
+        experimental_feature = self.find_prop("experimental_feature")
+
+        next_nodes = {
+            "asan": AsanConfigNode,
+            "xla": XlaConfigNode,
+            "mps": MPSConfigNode,
+            "vulkan": VulkanConfigNode,
+            "parallel_tbb": ParallelTBBConfigNode,
+            "crossref": CrossRefConfigNode,
+            "dynamo": DynamoConfigNode,
+            "parallel_native": ParallelNativeConfigNode,
+            "onnx": ONNXConfigNode,
+            "libtorch": LibTorchConfigNode,
+            "important": ImportantConfigNode,
+            "build_only": BuildOnlyConfigNode,
+            "shard_test": ShardTestConfigNode,
+            "cuda_gcc_override": CudaGccOverrideConfigNode,
+            "pure_torch": PureTorchConfigNode,
+            "slow_gradcheck": SlowGradcheckConfigNode,
+        }
+        return next_nodes[experimental_feature]
+
+
+class SlowGradcheckConfigNode(TreeConfigNode):
+    def init2(self, node_name):
+        self.props["is_slow_gradcheck"] = True
+
+    def child_constructor(self):
+        return ExperimentalFeatureConfigNode
+
+
+class PureTorchConfigNode(TreeConfigNode):
+    def modify_label(self, label):
+        return "PURE_TORCH=" + str(label)
+
+    def init2(self, node_name):
+        self.props["is_pure_torch"] = node_name
+
+    def child_constructor(self):
+        return ImportantConfigNode
+
+
+class XlaConfigNode(TreeConfigNode):
+    def modify_label(self, label):
+        return "XLA=" + str(label)
+
+    def init2(self, node_name):
+        self.props["is_xla"] = node_name
+
+    def child_constructor(self):
+        return ImportantConfigNode
+
+
+class MPSConfigNode(TreeConfigNode):
+    def modify_label(self, label):
+        return "MPS=" + str(label)
+
+    def init2(self, node_name):
+        self.props["is_mps"] = node_name
+
+    def child_constructor(self):
+        return ImportantConfigNode
+
+
+class AsanConfigNode(TreeConfigNode):
+    def modify_label(self, label):
+        return "Asan=" + str(label)
+
+    def init2(self, node_name):
+        self.props["is_asan"] = node_name
+
+    def child_constructor(self):
+        return ExperimentalFeatureConfigNode
+
+
+class ONNXConfigNode(TreeConfigNode):
+    def modify_label(self, label):
+        return "Onnx=" + str(label)
+
+    def init2(self, node_name):
+        self.props["is_onnx"] = node_name
+
+    def child_constructor(self):
+        return ImportantConfigNode
+
+
+class VulkanConfigNode(TreeConfigNode):
+    def modify_label(self, label):
+        return "Vulkan=" + str(label)
+
+    def init2(self, node_name):
+        self.props["is_vulkan"] = node_name
+
+    def child_constructor(self):
+        return ImportantConfigNode
+
+
+class ParallelTBBConfigNode(TreeConfigNode):
+    def modify_label(self, label):
+        return "PARALLELTBB=" + str(label)
+
+    def init2(self, node_name):
+        self.props["parallel_backend"] = "paralleltbb"
+
+    def child_constructor(self):
+        return ImportantConfigNode
+
+
+class CrossRefConfigNode(TreeConfigNode):
+    def init2(self, node_name):
+        self.props["is_crossref"] = node_name
+
+    def child_constructor(self):
+        return ImportantConfigNode
+
+
+class DynamoConfigNode(TreeConfigNode):
+    def init2(self, node_name):
+        self.props["is_dynamo"] = node_name
+
+    def child_constructor(self):
+        return ImportantConfigNode
+
+
+class ParallelNativeConfigNode(TreeConfigNode):
+    def modify_label(self, label):
+        return "PARALLELNATIVE=" + str(label)
+
+    def init2(self, node_name):
+        self.props["parallel_backend"] = "parallelnative"
+
+    def child_constructor(self):
+        return ImportantConfigNode
+
+
+class LibTorchConfigNode(TreeConfigNode):
+    def modify_label(self, label):
+        return "BUILD_TEST_LIBTORCH=" + str(label)
+
+    def init2(self, node_name):
+        self.props["is_libtorch"] = node_name
+
+    def child_constructor(self):
+        return ExperimentalFeatureConfigNode
+
+
+class CudaGccOverrideConfigNode(TreeConfigNode):
+    def init2(self, node_name):
+        self.props["cuda_gcc_override"] = node_name
+
+    def child_constructor(self):
+        return ExperimentalFeatureConfigNode
+
+
+class BuildOnlyConfigNode(TreeConfigNode):
+    def init2(self, node_name):
+        self.props["build_only"] = node_name
+
+    def child_constructor(self):
+        return ExperimentalFeatureConfigNode
+
+
+class ShardTestConfigNode(TreeConfigNode):
+    def init2(self, node_name):
+        self.props["shard_test"] = node_name
+
+    def child_constructor(self):
+        return ImportantConfigNode
+
+
+class ImportantConfigNode(TreeConfigNode):
+    def modify_label(self, label):
+        return "IMPORTANT=" + str(label)
+
+    def init2(self, node_name):
+        self.props["is_important"] = node_name
+
+    def get_children(self):
+        return []
+
+
+class XenialCompilerConfigNode(TreeConfigNode):
+    def modify_label(self, label):
+        return label or "<unspecified>"
+
+    def init2(self, node_name):
+        self.props["compiler_name"] = node_name
+
+    # noinspection PyMethodMayBeStatic
+    def child_constructor(self):
+        return (
+            XenialCompilerVersionConfigNode
+            if self.props["compiler_name"]
+            else PyVerConfigNode
+        )
+
+
+class BionicCompilerConfigNode(TreeConfigNode):
+    def modify_label(self, label):
+        return label or "<unspecified>"
+
+    def init2(self, node_name):
+        self.props["compiler_name"] = node_name
+
+    # noinspection PyMethodMayBeStatic
+    def child_constructor(self):
+        return (
+            BionicCompilerVersionConfigNode
+            if self.props["compiler_name"]
+            else PyVerConfigNode
+        )
+
+
+class XenialCompilerVersionConfigNode(TreeConfigNode):
+    def init2(self, node_name):
+        self.props["compiler_version"] = node_name
+
+    # noinspection PyMethodMayBeStatic
+    def child_constructor(self):
+        return PyVerConfigNode
+
+
+class BionicCompilerVersionConfigNode(TreeConfigNode):
+    def init2(self, node_name):
+        self.props["compiler_version"] = node_name
+
+    # noinspection PyMethodMayBeStatic
+    def child_constructor(self):
+        return PyVerConfigNode
--- a/.circleci/cimodel/data/pytorch_build_definitions.py
+++ b/.circleci/cimodel/data/pytorch_build_definitions.py
@ -0,0 +1,382 @@
+from collections import OrderedDict
+from dataclasses import dataclass, field
+from typing import List, Optional
+
+import cimodel.data.dimensions as dimensions
+import cimodel.lib.conf_tree as conf_tree
+import cimodel.lib.miniutils as miniutils
+from cimodel.data.pytorch_build_data import CONFIG_TREE_DATA, TopLevelNode
+from cimodel.data.simple.util.branch_filters import gen_filter_dict, RC_PATTERN
+from cimodel.data.simple.util.docker_constants import gen_docker_image
+
+
+@dataclass
+class Conf:
+    distro: str
+    parms: List[str]
+    parms_list_ignored_for_docker_image: Optional[List[str]] = None
+    pyver: Optional[str] = None
+    cuda_version: Optional[str] = None
+    rocm_version: Optional[str] = None
+    # TODO expand this to cover all the USE_* that we want to test for
+    #  tesnrorrt, leveldb, lmdb, redis, opencv, mkldnn, ideep, etc.
+    # (from https://github.com/pytorch/pytorch/pull/17323#discussion_r259453608)
+    is_xla: bool = False
+    is_vulkan: bool = False
+    is_pure_torch: bool = False
+    restrict_phases: Optional[List[str]] = None
+    gpu_resource: Optional[str] = None
+    dependent_tests: List = field(default_factory=list)
+    parent_build: Optional["Conf"] = None
+    is_libtorch: bool = False
+    is_important: bool = False
+    parallel_backend: Optional[str] = None
+    build_only: bool = False
+
+    @staticmethod
+    def is_test_phase(phase):
+        return "test" in phase
+
+    # TODO: Eliminate the special casing for docker paths
+    # In the short term, we *will* need to support special casing as docker images are merged for caffe2 and pytorch
+    def get_parms(self, for_docker):
+        leading = []
+        # We just don't run non-important jobs on pull requests;
+        # previously we also named them in a way to make it obvious
+        # if self.is_important and not for_docker:
+        #    leading.append("AAA")
+        leading.append("pytorch")
+        if self.is_xla and not for_docker:
+            leading.append("xla")
+        if self.is_vulkan and not for_docker:
+            leading.append("vulkan")
+        if self.is_libtorch and not for_docker:
+            leading.append("libtorch")
+        if self.is_pure_torch and not for_docker:
+            leading.append("pure_torch")
+        if self.parallel_backend is not None and not for_docker:
+            leading.append(self.parallel_backend)
+
+        cuda_parms = []
+        if self.cuda_version:
+            cudnn = "cudnn8" if self.cuda_version.startswith("11.") else "cudnn7"
+            cuda_parms.extend(["cuda" + self.cuda_version, cudnn])
+        if self.rocm_version:
+            cuda_parms.extend([f"rocm{self.rocm_version}"])
+        result = leading + ["linux", self.distro] + cuda_parms + self.parms
+        if not for_docker and self.parms_list_ignored_for_docker_image is not None:
+            result = result + self.parms_list_ignored_for_docker_image
+        return result
+
+    def gen_docker_image_path(self):
+        parms_source = self.parent_build or self
+        base_build_env_name = "-".join(parms_source.get_parms(True))
+        image_name, _ = gen_docker_image(base_build_env_name)
+        return miniutils.quote(image_name)
+
+    def gen_docker_image_requires(self):
+        parms_source = self.parent_build or self
+        base_build_env_name = "-".join(parms_source.get_parms(True))
+        _, requires = gen_docker_image(base_build_env_name)
+        return miniutils.quote(requires)
+
+    def get_build_job_name_pieces(self, build_or_test):
+        return self.get_parms(False) + [build_or_test]
+
+    def gen_build_name(self, build_or_test):
+        return (
+            ("_".join(map(str, self.get_build_job_name_pieces(build_or_test))))
+            .replace(".", "_")
+            .replace("-", "_")
+        )
+
+    def get_dependents(self):
+        return self.dependent_tests or []
+
+    def gen_workflow_params(self, phase):
+        parameters = OrderedDict()
+        build_job_name_pieces = self.get_build_job_name_pieces(phase)
+
+        build_env_name = "-".join(map(str, build_job_name_pieces))
+        parameters["build_environment"] = miniutils.quote(build_env_name)
+        parameters["docker_image"] = self.gen_docker_image_path()
+        if Conf.is_test_phase(phase) and self.gpu_resource:
+            parameters["use_cuda_docker_runtime"] = miniutils.quote("1")
+        if Conf.is_test_phase(phase):
+            resource_class = "large"
+            if self.gpu_resource:
+                resource_class = "gpu." + self.gpu_resource
+            if self.rocm_version is not None:
+                resource_class = "pytorch/amd-gpu"
+            parameters["resource_class"] = resource_class
+        if phase == "build" and self.rocm_version is not None:
+            parameters["resource_class"] = "xlarge"
+        if hasattr(self, "filters"):
+            parameters["filters"] = self.filters
+        if self.build_only:
+            parameters["build_only"] = miniutils.quote(str(int(True)))
+        return parameters
+
+    def gen_workflow_job(self, phase):
+        job_def = OrderedDict()
+        job_def["name"] = self.gen_build_name(phase)
+
+        if Conf.is_test_phase(phase):
+            # TODO When merging the caffe2 and pytorch jobs, it might be convenient for a while to make a
+            #  caffe2 test job dependent on a pytorch build job. This way we could quickly dedup the repeated
+            #  build of pytorch in the caffe2 build job, and just run the caffe2 tests off of a completed
+            #  pytorch build job (from https://github.com/pytorch/pytorch/pull/17323#discussion_r259452641)
+
+            dependency_build = self.parent_build or self
+            job_def["requires"] = [dependency_build.gen_build_name("build")]
+            job_name = "pytorch_linux_test"
+        else:
+            job_name = "pytorch_linux_build"
+            job_def["requires"] = [self.gen_docker_image_requires()]
+
+        if not self.is_important:
+            job_def["filters"] = gen_filter_dict()
+        job_def.update(self.gen_workflow_params(phase))
+
+        return {job_name: job_def}
+
+
+# TODO This is a hack to special case some configs just for the workflow list
+class HiddenConf:
+    def __init__(self, name, parent_build=None, filters=None):
+        self.name = name
+        self.parent_build = parent_build
+        self.filters = filters
+
+    def gen_workflow_job(self, phase):
+        return {
+            self.gen_build_name(phase): {
+                "requires": [self.parent_build.gen_build_name("build")],
+                "filters": self.filters,
+            }
+        }
+
+    def gen_build_name(self, _):
+        return self.name
+
+
+class DocPushConf:
+    def __init__(self, name, parent_build=None, branch="master"):
+        self.name = name
+        self.parent_build = parent_build
+        self.branch = branch
+
+    def gen_workflow_job(self, phase):
+        return {
+            "pytorch_doc_push": {
+                "name": self.name,
+                "branch": self.branch,
+                "requires": [self.parent_build],
+                "context": "org-member",
+                "filters": gen_filter_dict(
+                    branches_list=["nightly"], tags_list=RC_PATTERN
+                ),
+            }
+        }
+
+
+def gen_docs_configs(xenial_parent_config):
+    configs = []
+
+    configs.append(
+        HiddenConf(
+            "pytorch_python_doc_build",
+            parent_build=xenial_parent_config,
+            filters=gen_filter_dict(
+                branches_list=["master", "main", "nightly"], tags_list=RC_PATTERN
+            ),
+        )
+    )
+    configs.append(
+        DocPushConf(
+            "pytorch_python_doc_push",
+            parent_build="pytorch_python_doc_build",
+            branch="site",
+        )
+    )
+
+    configs.append(
+        HiddenConf(
+            "pytorch_cpp_doc_build",
+            parent_build=xenial_parent_config,
+            filters=gen_filter_dict(
+                branches_list=["master", "main", "nightly"], tags_list=RC_PATTERN
+            ),
+        )
+    )
+    configs.append(
+        DocPushConf(
+            "pytorch_cpp_doc_push",
+            parent_build="pytorch_cpp_doc_build",
+            branch="master",
+        )
+    )
+    return configs
+
+
+def get_root():
+    return TopLevelNode("PyTorch Builds", CONFIG_TREE_DATA)
+
+
+def gen_tree():
+    root = get_root()
+    configs_list = conf_tree.dfs(root)
+    return configs_list
+
+
+def instantiate_configs(only_slow_gradcheck):
+    config_list = []
+
+    root = get_root()
+    found_configs = conf_tree.dfs(root)
+    for fc in found_configs:
+        restrict_phases = None
+        distro_name = fc.find_prop("distro_name")
+        compiler_name = fc.find_prop("compiler_name")
+        compiler_version = fc.find_prop("compiler_version")
+        is_xla = fc.find_prop("is_xla") or False
+        is_asan = fc.find_prop("is_asan") or False
+        is_crossref = fc.find_prop("is_crossref") or False
+        is_dynamo = fc.find_prop("is_dynamo") or False
+        is_onnx = fc.find_prop("is_onnx") or False
+        is_pure_torch = fc.find_prop("is_pure_torch") or False
+        is_vulkan = fc.find_prop("is_vulkan") or False
+        is_slow_gradcheck = fc.find_prop("is_slow_gradcheck") or False
+        parms_list_ignored_for_docker_image = []
+
+        if only_slow_gradcheck ^ is_slow_gradcheck:
+            continue
+
+        python_version = None
+        if compiler_name == "cuda" or compiler_name == "android":
+            python_version = fc.find_prop("pyver")
+            parms_list = [fc.find_prop("abbreviated_pyver")]
+        else:
+            parms_list = ["py" + fc.find_prop("pyver")]
+
+        cuda_version = None
+        rocm_version = None
+        if compiler_name == "cuda":
+            cuda_version = fc.find_prop("compiler_version")
+
+        elif compiler_name == "rocm":
+            rocm_version = fc.find_prop("compiler_version")
+            restrict_phases = ["build", "test1", "test2", "caffe2_test"]
+
+        elif compiler_name == "android":
+            android_ndk_version = fc.find_prop("compiler_version")
+            # TODO: do we need clang to compile host binaries like protoc?
+            parms_list.append("clang5")
+            parms_list.append("android-ndk-" + android_ndk_version)
+            android_abi = fc.find_prop("android_abi")
+            parms_list_ignored_for_docker_image.append(android_abi)
+            restrict_phases = ["build"]
+
+        elif compiler_name:
+            gcc_version = compiler_name + (fc.find_prop("compiler_version") or "")
+            parms_list.append(gcc_version)
+
+        if is_asan:
+            parms_list.append("asan")
+            python_version = fc.find_prop("pyver")
+            parms_list[0] = fc.find_prop("abbreviated_pyver")
+
+        if is_crossref:
+            parms_list_ignored_for_docker_image.append("crossref")
+
+        if is_dynamo:
+            parms_list_ignored_for_docker_image.append("dynamo")
+
+        if is_onnx:
+            parms_list.append("onnx")
+            python_version = fc.find_prop("pyver")
+            parms_list[0] = fc.find_prop("abbreviated_pyver")
+            restrict_phases = ["build", "ort_test1", "ort_test2"]
+
+        if cuda_version:
+            cuda_gcc_version = fc.find_prop("cuda_gcc_override") or "gcc7"
+            parms_list.append(cuda_gcc_version)
+
+        is_libtorch = fc.find_prop("is_libtorch") or False
+        is_important = fc.find_prop("is_important") or False
+        parallel_backend = fc.find_prop("parallel_backend") or None
+        build_only = fc.find_prop("build_only") or False
+        shard_test = fc.find_prop("shard_test") or False
+        # TODO: fix pure_torch python test packaging issue.
+        if shard_test:
+            restrict_phases = ["build"] if restrict_phases is None else restrict_phases
+            restrict_phases.extend(["test1", "test2"])
+        if build_only or is_pure_torch:
+            restrict_phases = ["build"]
+
+        if is_slow_gradcheck:
+            parms_list_ignored_for_docker_image.append("old")
+            parms_list_ignored_for_docker_image.append("gradcheck")
+
+        gpu_resource = None
+        if cuda_version and cuda_version != "10":
+            gpu_resource = "medium"
+
+        c = Conf(
+            distro_name,
+            parms_list,
+            parms_list_ignored_for_docker_image,
+            python_version,
+            cuda_version,
+            rocm_version,
+            is_xla,
+            is_vulkan,
+            is_pure_torch,
+            restrict_phases,
+            gpu_resource,
+            is_libtorch=is_libtorch,
+            is_important=is_important,
+            parallel_backend=parallel_backend,
+            build_only=build_only,
+        )
+
+        # run docs builds on "pytorch-linux-xenial-py3.7-gcc5.4". Docs builds
+        # should run on a CPU-only build that runs on all PRs.
+        # XXX should this be updated to a more modern build?
+        if (
+            distro_name == "xenial"
+            and fc.find_prop("pyver") == "3.7"
+            and cuda_version is None
+            and parallel_backend is None
+            and not is_vulkan
+            and not is_pure_torch
+            and compiler_name == "gcc"
+            and fc.find_prop("compiler_version") == "5.4"
+        ):
+            c.filters = gen_filter_dict(branches_list=r"/.*/", tags_list=RC_PATTERN)
+            c.dependent_tests = gen_docs_configs(c)
+
+        config_list.append(c)
+
+    return config_list
+
+
+def get_workflow_jobs(only_slow_gradcheck=False):
+    config_list = instantiate_configs(only_slow_gradcheck)
+
+    x = []
+    for conf_options in config_list:
+        phases = conf_options.restrict_phases or dimensions.PHASES
+
+        for phase in phases:
+            # TODO why does this not have a test?
+            if Conf.is_test_phase(phase) and conf_options.cuda_version == "10":
+                continue
+
+            x.append(conf_options.gen_workflow_job(phase))
+
+        # TODO convert to recursion
+        for conf in conf_options.get_dependents():
+            x.append(conf.gen_workflow_job("test"))
+
+    return x
--- a/.circleci/cimodel/data/simple/init.py
+++ b/.circleci/cimodel/data/simple/init.py
--- a/.circleci/cimodel/data/simple/docker_definitions.py
+++ b/.circleci/cimodel/data/simple/docker_definitions.py
@ -0,0 +1,39 @@
+from collections import OrderedDict
+
+from cimodel.data.simple.util.branch_filters import gen_filter_dict, RC_PATTERN
+
+from cimodel.lib.miniutils import quote
+
+
+# NOTE: All hardcoded docker image builds have been migrated to GHA
+IMAGE_NAMES = []
+
+# This entry should be an element from the list above
+# This should contain the image matching the "slow_gradcheck" entry in
+# pytorch_build_data.py
+SLOW_GRADCHECK_IMAGE_NAME = "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
+
+
+def get_workflow_jobs(images=IMAGE_NAMES, only_slow_gradcheck=False):
+    """Generates a list of docker image build definitions"""
+    ret = []
+    for image_name in images:
+        if image_name.startswith("docker-"):
+            image_name = image_name.lstrip("docker-")
+        if only_slow_gradcheck and image_name is not SLOW_GRADCHECK_IMAGE_NAME:
+            continue
+
+        parameters = OrderedDict(
+            {
+                "name": quote(f"docker-{image_name}"),
+                "image_name": quote(image_name),
+            }
+        )
+        if image_name == "pytorch-linux-xenial-py3.7-gcc5.4":
+            # pushing documentation on tags requires CircleCI to also
+            # build all the dependencies on tags, including this docker image
+            parameters["filters"] = gen_filter_dict(
+                branches_list=r"/.*/", tags_list=RC_PATTERN
+            )
+        ret.append(OrderedDict({"docker_build_job": parameters}))
+    return ret
--- a/.circleci/cimodel/data/simple/ios_definitions.py
+++ b/.circleci/cimodel/data/simple/ios_definitions.py
@ -0,0 +1,100 @@
+import cimodel.lib.miniutils as miniutils
+from cimodel.data.simple.util.branch_filters import gen_filter_dict_exclude
+from cimodel.data.simple.util.versions import MultiPartVersion
+
+XCODE_VERSION = MultiPartVersion([12, 5, 1])
+
+
+class ArchVariant:
+    def __init__(self, name, custom_build_name=""):
+        self.name = name
+        self.custom_build_name = custom_build_name
+
+    def render(self):
+        extra_parts = (
+            [self.custom_build_name] if len(self.custom_build_name) > 0 else []
+        )
+        return "-".join([self.name] + extra_parts).replace("_", "-")
+
+
+def get_platform(arch_variant_name):
+    return "SIMULATOR" if arch_variant_name == "x86_64" else "OS"
+
+
+class IOSJob:
+    def __init__(
+        self, xcode_version, arch_variant, is_org_member_context=True, extra_props=None
+    ):
+        self.xcode_version = xcode_version
+        self.arch_variant = arch_variant
+        self.is_org_member_context = is_org_member_context
+        self.extra_props = extra_props
+
+    def gen_name_parts(self):
+        version_parts = self.xcode_version.render_dots_or_parts("-")
+        build_variant_suffix = self.arch_variant.render()
+        return (
+            [
+                "ios",
+            ]
+            + version_parts
+            + [
+                build_variant_suffix,
+            ]
+        )
+
+    def gen_job_name(self):
+        return "-".join(self.gen_name_parts())
+
+    def gen_tree(self):
+        platform_name = get_platform(self.arch_variant.name)
+        props_dict = {
+            "name": self.gen_job_name(),
+            "build_environment": self.gen_job_name(),
+            "ios_arch": self.arch_variant.name,
+            "ios_platform": platform_name,
+        }
+
+        if self.is_org_member_context:
+            props_dict["context"] = "org-member"
+
+        if self.extra_props:
+            props_dict.update(self.extra_props)
+
+        props_dict["filters"] = gen_filter_dict_exclude()
+
+        return [{"pytorch_ios_build": props_dict}]
+
+
+WORKFLOW_DATA = [
+    IOSJob(
+        XCODE_VERSION,
+        ArchVariant("x86_64"),
+        is_org_member_context=False,
+        extra_props={"lite_interpreter": miniutils.quote(str(int(True)))},
+    ),
+    # IOSJob(XCODE_VERSION, ArchVariant("arm64"), extra_props={
+    #     "lite_interpreter": miniutils.quote(str(int(True)))}),
+    # IOSJob(XCODE_VERSION, ArchVariant("arm64", "metal"), extra_props={
+    #     "use_metal": miniutils.quote(str(int(True))),
+    #     "lite_interpreter": miniutils.quote(str(int(True)))}),
+    # IOSJob(XCODE_VERSION, ArchVariant("arm64", "custom-ops"), extra_props={
+    #     "op_list": "mobilenetv2.yaml",
+    #     "lite_interpreter": miniutils.quote(str(int(True)))}),
+    IOSJob(
+        XCODE_VERSION,
+        ArchVariant("x86_64", "coreml"),
+        is_org_member_context=False,
+        extra_props={
+            "use_coreml": miniutils.quote(str(int(True))),
+            "lite_interpreter": miniutils.quote(str(int(True))),
+        },
+    ),
+    # IOSJob(XCODE_VERSION, ArchVariant("arm64", "coreml"), extra_props={
+    #     "use_coreml": miniutils.quote(str(int(True))),
+    #     "lite_interpreter": miniutils.quote(str(int(True)))}),
+]
+
+
+def get_workflow_jobs():
+    return [item.gen_tree() for item in WORKFLOW_DATA]
--- a/.circleci/cimodel/data/simple/macos_definitions.py
+++ b/.circleci/cimodel/data/simple/macos_definitions.py
@ -0,0 +1,54 @@
+class MacOsJob:
+    def __init__(self, os_version, is_build=False, is_test=False, extra_props=tuple()):
+        # extra_props is tuple type, because mutable data structures for argument defaults
+        # is not recommended.
+        self.os_version = os_version
+        self.is_build = is_build
+        self.is_test = is_test
+        self.extra_props = dict(extra_props)
+
+    def gen_tree(self):
+        non_phase_parts = ["pytorch", "macos", self.os_version, "py3"]
+
+        extra_name_list = [name for name, exist in self.extra_props.items() if exist]
+        full_job_name_list = (
+            non_phase_parts
+            + extra_name_list
+            + [
+                "build" if self.is_build else None,
+                "test" if self.is_test else None,
+            ]
+        )
+
+        full_job_name = "_".join(list(filter(None, full_job_name_list)))
+
+        test_build_dependency = "_".join(non_phase_parts + ["build"])
+        extra_dependencies = [test_build_dependency] if self.is_test else []
+        job_dependencies = extra_dependencies
+
+        # Yes we name the job after itself, it needs a non-empty value in here
+        # for the YAML output to work.
+        props_dict = {"requires": job_dependencies, "name": full_job_name}
+
+        return [{full_job_name: props_dict}]
+
+
+WORKFLOW_DATA = [
+    MacOsJob("10_15", is_build=True),
+    MacOsJob("10_13", is_build=True),
+    MacOsJob(
+        "10_13",
+        is_build=False,
+        is_test=True,
+    ),
+    MacOsJob(
+        "10_13",
+        is_build=True,
+        is_test=True,
+        extra_props=tuple({"lite_interpreter": True}.items()),
+    ),
+]
+
+
+def get_workflow_jobs():
+    return [item.gen_tree() for item in WORKFLOW_DATA]
--- a/.circleci/cimodel/data/simple/mobile_definitions.py
+++ b/.circleci/cimodel/data/simple/mobile_definitions.py
@ -0,0 +1,51 @@
+"""
+PyTorch Mobile PR builds (use linux host toolchain + mobile build options)
+"""
+
+import cimodel.data.simple.util.branch_filters
+import cimodel.lib.miniutils as miniutils
+
+
+class MobileJob:
+    def __init__(
+        self, docker_image, docker_requires, variant_parts, is_master_only=False
+    ):
+        self.docker_image = docker_image
+        self.docker_requires = docker_requires
+        self.variant_parts = variant_parts
+        self.is_master_only = is_master_only
+
+    def gen_tree(self):
+        non_phase_parts = [
+            "pytorch",
+            "linux",
+            "xenial",
+            "py3",
+            "clang5",
+            "mobile",
+        ] + self.variant_parts
+
+        full_job_name = "_".join(non_phase_parts)
+        build_env_name = "-".join(non_phase_parts)
+
+        props_dict = {
+            "build_environment": build_env_name,
+            "build_only": miniutils.quote(str(int(True))),
+            "docker_image": self.docker_image,
+            "requires": self.docker_requires,
+            "name": full_job_name,
+        }
+
+        if self.is_master_only:
+            props_dict[
+                "filters"
+            ] = cimodel.data.simple.util.branch_filters.gen_filter_dict()
+
+        return [{"pytorch_linux_build": props_dict}]
+
+
+WORKFLOW_DATA = []
+
+
+def get_workflow_jobs():
+    return [item.gen_tree() for item in WORKFLOW_DATA]
--- a/.circleci/cimodel/data/simple/nightly_ios.py
+++ b/.circleci/cimodel/data/simple/nightly_ios.py
@ -0,0 +1,96 @@
+import cimodel.data.simple.ios_definitions as ios_definitions
+import cimodel.lib.miniutils as miniutils
+
+
+class IOSNightlyJob:
+    def __init__(self, variant, is_full_jit=False, is_upload=False):
+        self.variant = variant
+        self.is_full_jit = is_full_jit
+        self.is_upload = is_upload
+
+    def get_phase_name(self):
+        return "upload" if self.is_upload else "build"
+
+    def get_common_name_pieces(self, sep):
+        extra_name_suffix = [self.get_phase_name()] if self.is_upload else []
+
+        extra_name = ["full_jit"] if self.is_full_jit else []
+
+        common_name_pieces = (
+            [
+                "ios",
+            ]
+            + extra_name
+            + []
+            + ios_definitions.XCODE_VERSION.render_dots_or_parts(sep)
+            + [
+                "nightly",
+                self.variant,
+                "build",
+            ]
+            + extra_name_suffix
+        )
+
+        return common_name_pieces
+
+    def gen_job_name(self):
+        return "_".join(["pytorch"] + self.get_common_name_pieces(None))
+
+    def gen_tree(self):
+        build_configs = BUILD_CONFIGS_FULL_JIT if self.is_full_jit else BUILD_CONFIGS
+        extra_requires = (
+            [x.gen_job_name() for x in build_configs] if self.is_upload else []
+        )
+
+        props_dict = {
+            "build_environment": "-".join(
+                ["libtorch"] + self.get_common_name_pieces(".")
+            ),
+            "requires": extra_requires,
+            "context": "org-member",
+            "filters": {"branches": {"only": "nightly"}},
+        }
+
+        if not self.is_upload:
+            props_dict["ios_arch"] = self.variant
+            props_dict["ios_platform"] = ios_definitions.get_platform(self.variant)
+            props_dict["name"] = self.gen_job_name()
+            props_dict["use_metal"] = miniutils.quote(str(int(True)))
+            props_dict["use_coreml"] = miniutils.quote(str(int(True)))
+
+        if self.is_full_jit:
+            props_dict["lite_interpreter"] = miniutils.quote(str(int(False)))
+
+        template_name = "_".join(
+            [
+                "binary",
+                "ios",
+                self.get_phase_name(),
+            ]
+        )
+
+        return [{template_name: props_dict}]
+
+
+BUILD_CONFIGS = [
+    IOSNightlyJob("x86_64"),
+    IOSNightlyJob("arm64"),
+]
+
+BUILD_CONFIGS_FULL_JIT = [
+    IOSNightlyJob("x86_64", is_full_jit=True),
+    IOSNightlyJob("arm64", is_full_jit=True),
+]
+
+WORKFLOW_DATA = (
+    BUILD_CONFIGS
+    + BUILD_CONFIGS_FULL_JIT
+    + [
+        IOSNightlyJob("binary", is_full_jit=False, is_upload=True),
+        IOSNightlyJob("binary", is_full_jit=True, is_upload=True),
+    ]
+)
+
+
+def get_workflow_jobs():
+    return [item.gen_tree() for item in WORKFLOW_DATA]
--- a/.circleci/cimodel/data/simple/util/init.py
+++ b/.circleci/cimodel/data/simple/util/init.py
--- a/.circleci/cimodel/data/simple/util/branch_filters.py
+++ b/.circleci/cimodel/data/simple/util/branch_filters.py
@ -0,0 +1,36 @@
+NON_PR_BRANCH_LIST = [
+    "main",
+    "master",
+    r"/ci-all\/.*/",
+    r"/release\/.*/",
+]
+
+PR_BRANCH_LIST = [
+    r"/gh\/.*\/head/",
+    r"/pull\/.*/",
+]
+
+RC_PATTERN = r"/v[0-9]+(\.[0-9]+)*-rc[0-9]+/"
+
+MAC_IOS_EXCLUSION_LIST = ["nightly", "postnightly"]
+
+
+def gen_filter_dict(branches_list=NON_PR_BRANCH_LIST, tags_list=None):
+    """Generates a filter dictionary for use with CircleCI's job filter"""
+    filter_dict = {
+        "branches": {
+            "only": branches_list,
+        },
+    }
+
+    if tags_list is not None:
+        filter_dict["tags"] = {"only": tags_list}
+    return filter_dict
+
+
+def gen_filter_dict_exclude(branches_list=MAC_IOS_EXCLUSION_LIST):
+    return {
+        "branches": {
+            "ignore": branches_list,
+        },
+    }
--- a/.circleci/cimodel/data/simple/util/docker_constants.py
+++ b/.circleci/cimodel/data/simple/util/docker_constants.py
@ -0,0 +1,35 @@
+AWS_DOCKER_HOST = "308535385114.dkr.ecr.us-east-1.amazonaws.com"
+
+
+def gen_docker_image(container_type):
+    return (
+        "/".join([AWS_DOCKER_HOST, "pytorch", container_type]),
+        f"docker-{container_type}",
+    )
+
+
+def gen_docker_image_requires(image_name):
+    return [f"docker-{image_name}"]
+
+
+DOCKER_IMAGE_BASIC, DOCKER_REQUIREMENT_BASE = gen_docker_image(
+    "pytorch-linux-xenial-py3.7-gcc5.4"
+)
+
+DOCKER_IMAGE_CUDA_10_2, DOCKER_REQUIREMENT_CUDA_10_2 = gen_docker_image(
+    "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
+)
+
+DOCKER_IMAGE_GCC7, DOCKER_REQUIREMENT_GCC7 = gen_docker_image(
+    "pytorch-linux-xenial-py3.7-gcc7"
+)
+
+
+def gen_mobile_docker(specifier):
+    container_type = "pytorch-linux-xenial-py3-clang5-" + specifier
+    return gen_docker_image(container_type)
+
+
+DOCKER_IMAGE_ASAN, DOCKER_REQUIREMENT_ASAN = gen_mobile_docker("asan")
+
+DOCKER_IMAGE_NDK, DOCKER_REQUIREMENT_NDK = gen_mobile_docker("android-ndk-r21e")
--- a/.circleci/cimodel/data/simple/util/versions.py
+++ b/.circleci/cimodel/data/simple/util/versions.py
@ -0,0 +1,36 @@
+from typing import Optional
+
+
+class MultiPartVersion:
+    def __init__(self, parts, prefix=""):
+        self.parts = parts
+        self.prefix = prefix
+
+    def prefixed_parts(self):
+        """
+        Prepends the first element of the version list
+        with the prefix string.
+        """
+        if self.parts:
+            return [self.prefix + str(self.parts[0])] + [
+                str(part) for part in self.parts[1:]
+            ]
+        else:
+            return [self.prefix]
+
+    def render_dots_or_parts(self, sep: Optional[str] = None):
+        if sep is None:
+            return self.prefixed_parts()
+        else:
+            return [sep.join(self.prefixed_parts())]
+
+
+class CudaVersion(MultiPartVersion):
+    def __init__(self, major, minor):
+        self.major = major
+        self.minor = minor
+
+        super().__init__([self.major, self.minor], "cuda")
+
+    def __str__(self):
+        return f"{self.major}.{self.minor}"
--- a/.circleci/cimodel/lib/init.py
+++ b/.circleci/cimodel/lib/init.py
--- a/.circleci/cimodel/lib/conf_tree.py
+++ b/.circleci/cimodel/lib/conf_tree.py
@ -0,0 +1,111 @@
+from dataclasses import dataclass, field
+from typing import Dict, Optional
+
+
+def X(val):
+    """
+    Compact way to write a leaf node
+    """
+    return val, []
+
+
+def XImportant(name):
+    """Compact way to write an important (run on PRs) leaf node"""
+    return (name, [("important", [X(True)])])
+
+
+@dataclass
+class Ver:
+    """
+    Represents a product with a version number
+    """
+
+    name: str
+    version: str = ""
+
+    def __str__(self):
+        return self.name + self.version
+
+
+@dataclass
+class ConfigNode:
+    parent: Optional["ConfigNode"]
+    node_name: str
+    props: Dict[str, str] = field(default_factory=dict)
+
+    def get_label(self):
+        return self.node_name
+
+    # noinspection PyMethodMayBeStatic
+    def get_children(self):
+        return []
+
+    def get_parents(self):
+        return (
+            (self.parent.get_parents() + [self.parent.get_label()])
+            if self.parent
+            else []
+        )
+
+    def get_depth(self):
+        return len(self.get_parents())
+
+    def get_node_key(self):
+        return "%".join(self.get_parents() + [self.get_label()])
+
+    def find_prop(self, propname, searched=None):
+        """
+        Checks if its own dictionary has
+        the property, otherwise asks parent node.
+        """
+
+        if searched is None:
+            searched = []
+
+        searched.append(self.node_name)
+
+        if propname in self.props:
+            return self.props[propname]
+        elif self.parent:
+            return self.parent.find_prop(propname, searched)
+        else:
+            # raise Exception('Property "%s" does not exist anywhere in the tree! Searched: %s' % (propname, searched))
+            return None
+
+
+def dfs_recurse(
+    node,
+    leaf_callback=lambda x: None,
+    discovery_callback=lambda x, y, z: None,
+    child_callback=lambda x, y: None,
+    sibling_index=0,
+    sibling_count=1,
+):
+    discovery_callback(node, sibling_index, sibling_count)
+
+    node_children = node.get_children()
+    if node_children:
+        for i, child in enumerate(node_children):
+            child_callback(node, child)
+
+            dfs_recurse(
+                child,
+                leaf_callback,
+                discovery_callback,
+                child_callback,
+                i,
+                len(node_children),
+            )
+    else:
+        leaf_callback(node)
+
+
+def dfs(toplevel_config_node):
+    config_list = []
+
+    def leaf_callback(node):
+        config_list.append(node)
+
+    dfs_recurse(toplevel_config_node, leaf_callback)
+
+    return config_list
--- a/.circleci/cimodel/lib/miniutils.py
+++ b/.circleci/cimodel/lib/miniutils.py
@ -0,0 +1,10 @@
+def quote(s):
+    return sandwich('"', s)
+
+
+def sandwich(bread, jam):
+    return bread + jam + bread
+
+
+def override(word, substitutions):
+    return substitutions.get(word, word)
--- a/.circleci/cimodel/lib/miniyaml.py
+++ b/.circleci/cimodel/lib/miniyaml.py
@ -0,0 +1,51 @@
+from collections import OrderedDict
+
+import cimodel.lib.miniutils as miniutils
+
+
+LIST_MARKER = "- "
+INDENTATION_WIDTH = 2
+
+
+def is_dict(data):
+    return type(data) in [dict, OrderedDict]
+
+
+def is_collection(data):
+    return is_dict(data) or type(data) is list
+
+
+def render(fh, data, depth, is_list_member=False):
+    """
+    PyYaml does not allow precise control over the quoting
+    behavior, especially for merge references.
+    Therefore, we use this custom YAML renderer.
+    """
+
+    indentation = " " * INDENTATION_WIDTH * depth
+
+    if is_dict(data):
+        tuples = list(data.items())
+        if type(data) is not OrderedDict:
+            tuples.sort()
+
+        for i, (k, v) in enumerate(tuples):
+            if not v:
+                continue
+            # If this dict is itself a list member, the first key gets prefixed with a list marker
+            list_marker_prefix = LIST_MARKER if is_list_member and not i else ""
+
+            trailing_whitespace = "\n" if is_collection(v) else " "
+            fh.write(indentation + list_marker_prefix + k + ":" + trailing_whitespace)
+
+            render(fh, v, depth + 1 + int(is_list_member))
+
+    elif type(data) is list:
+        for v in data:
+            render(fh, v, depth, True)
+
+    else:
+        # use empty quotes to denote an empty string value instead of blank space
+        modified_data = miniutils.quote(data) if data == "" else data
+        list_member_prefix = indentation + LIST_MARKER if is_list_member else ""
+        fh.write(list_member_prefix + str(modified_data) + "\n")
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
--- a/.circleci/ensure-consistency.py
+++ b/.circleci/ensure-consistency.py
@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+
+import os
+import subprocess
+import sys
+import tempfile
+
+import generate_config_yml
+
+
+CHECKED_IN_FILE = "config.yml"
+REGENERATION_SCRIPT = "regenerate.sh"
+
+PARENT_DIR = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
+README_PATH = os.path.join(PARENT_DIR, "README.md")
+
+ERROR_MESSAGE_TEMPLATE = """
+The checked-in CircleCI "%s" file does not match what was generated by the scripts.
+Please re-run the "%s" script in the "%s" directory and commit the result. See "%s" for more information.
+"""
+
+
+def check_consistency():
+    _, temp_filename = tempfile.mkstemp("-generated-config.yml")
+
+    with open(temp_filename, "w") as fh:
+        generate_config_yml.stitch_sources(fh)
+
+    try:
+        subprocess.check_call(["cmp", temp_filename, CHECKED_IN_FILE])
+    except subprocess.CalledProcessError:
+        sys.exit(
+            ERROR_MESSAGE_TEMPLATE
+            % (CHECKED_IN_FILE, REGENERATION_SCRIPT, PARENT_DIR, README_PATH)
+        )
+    finally:
+        os.remove(temp_filename)
+
+
+if __name__ == "__main__":
+    check_consistency()
--- a/.circleci/generate_config_yml.py
+++ b/.circleci/generate_config_yml.py
@ -0,0 +1,196 @@
+#!/usr/bin/env python3
+
+"""
+This script is the source of truth for config.yml.
+Please see README.md in this directory for details.
+"""
+
+import os
+import shutil
+import sys
+from collections import namedtuple
+
+import cimodel.data.simple.docker_definitions
+import cimodel.data.simple.mobile_definitions
+import cimodel.data.simple.nightly_ios
+import cimodel.lib.miniutils as miniutils
+import cimodel.lib.miniyaml as miniyaml
+
+
+class File:
+    """
+    Verbatim copy the contents of a file into config.yml
+    """
+
+    def __init__(self, filename):
+        self.filename = filename
+
+    def write(self, output_filehandle):
+        with open(os.path.join("verbatim-sources", self.filename)) as fh:
+            shutil.copyfileobj(fh, output_filehandle)
+
+
+class FunctionGen(namedtuple("FunctionGen", "function depth")):
+    __slots__ = ()
+
+
+class Treegen(FunctionGen):
+    """
+    Insert the content of a YAML tree into config.yml
+    """
+
+    def write(self, output_filehandle):
+        miniyaml.render(output_filehandle, self.function(), self.depth)
+
+
+class Listgen(FunctionGen):
+    """
+    Insert the content of a YAML list into config.yml
+    """
+
+    def write(self, output_filehandle):
+        miniyaml.render(output_filehandle, self.function(), self.depth)
+
+
+def horizontal_rule():
+    return "".join("#" * 78)
+
+
+class Header:
+    def __init__(self, title, summary=None):
+        self.title = title
+        self.summary_lines = summary or []
+
+    def write(self, output_filehandle):
+        text_lines = [self.title] + self.summary_lines
+        comment_lines = ["# " + x for x in text_lines]
+        lines = miniutils.sandwich([horizontal_rule()], comment_lines)
+
+        for line in filter(None, lines):
+            output_filehandle.write(line + "\n")
+
+
+def _for_all_items(items, functor) -> None:
+    if isinstance(items, list):
+        for item in items:
+            _for_all_items(item, functor)
+    if isinstance(items, dict) and len(items) == 1:
+        item_type, item = next(iter(items.items()))
+        functor(item_type, item)
+
+
+def filter_master_only_jobs(items):
+    def _is_main_or_master_item(item):
+        filters = item.get("filters", None)
+        branches = filters.get("branches", None) if filters is not None else None
+        branches_only = branches.get("only", None) if branches is not None else None
+        return (
+            ("main" in branches_only or "master" in branches_only)
+            if branches_only is not None
+            else False
+        )
+
+    master_deps = set()
+
+    def _save_requires_if_master(item_type, item):
+        requires = item.get("requires", None)
+        item_name = item.get("name", None)
+        if not isinstance(requires, list):
+            return
+        if _is_main_or_master_item(item) or item_name in master_deps:
+            master_deps.update([n.strip('"') for n in requires])
+
+    def _do_filtering(items):
+        if isinstance(items, list):
+            rc = [_do_filtering(item) for item in items]
+            return [item for item in rc if len(item if item is not None else []) > 0]
+        assert isinstance(items, dict) and len(items) == 1
+        item_type, item = next(iter(items.items()))
+        item_name = item.get("name", None)
+        item_name = item_name.strip('"') if item_name is not None else None
+        if not _is_main_or_master_item(item) and item_name not in master_deps:
+            return None
+        if "filters" in item:
+            item = item.copy()
+            item.pop("filters")
+        return {item_type: item}
+
+    # Scan of dependencies twice to pick up nested required jobs
+    # I.e. jobs depending on jobs that main-only job depend on
+    _for_all_items(items, _save_requires_if_master)
+    _for_all_items(items, _save_requires_if_master)
+    return _do_filtering(items)
+
+
+def generate_required_docker_images(items):
+    required_docker_images = set()
+
+    def _requires_docker_image(item_type, item):
+        requires = item.get("requires", None)
+        if not isinstance(requires, list):
+            return
+        for requirement in requires:
+            requirement = requirement.replace('"', "")
+            if requirement.startswith("docker-"):
+                required_docker_images.add(requirement)
+
+    _for_all_items(items, _requires_docker_image)
+    return required_docker_images
+
+
+def gen_build_workflows_tree():
+    build_workflows_functions = [
+        cimodel.data.simple.mobile_definitions.get_workflow_jobs,
+        cimodel.data.simple.nightly_ios.get_workflow_jobs,
+    ]
+    build_jobs = [f() for f in build_workflows_functions]
+    build_jobs.extend(
+        cimodel.data.simple.docker_definitions.get_workflow_jobs(
+            # sort for consistency
+            sorted(generate_required_docker_images(build_jobs))
+        )
+    )
+    master_build_jobs = filter_master_only_jobs(build_jobs)
+
+    rc = {
+        "workflows": {
+            "build": {
+                "when": r"<< pipeline.parameters.run_build >>",
+                "jobs": build_jobs,
+            },
+        }
+    }
+    if len(master_build_jobs) > 0:
+        rc["workflows"]["master_build"] = {
+            "when": r"<< pipeline.parameters.run_master_build >>",
+            "jobs": master_build_jobs,
+        }
+    return rc
+
+
+# Order of this list matters to the generated config.yml.
+YAML_SOURCES = [
+    File("header-section.yml"),
+    File("commands.yml"),
+    File("nightly-binary-build-defaults.yml"),
+    Header("Build parameters"),
+    File("build-parameters/pytorch-build-params.yml"),
+    File("build-parameters/binary-build-params.yml"),
+    Header("Job specs"),
+    File("job-specs/binary-job-specs.yml"),
+    File("job-specs/job-specs-custom.yml"),
+    File("job-specs/binary_update_htmls.yml"),
+    File("job-specs/binary-build-tests.yml"),
+    File("job-specs/docker_jobs.yml"),
+    Header("Workflows"),
+    Treegen(gen_build_workflows_tree, 0),
+]
+
+
+def stitch_sources(output_filehandle):
+    for f in YAML_SOURCES:
+        f.write(output_filehandle)
+
+
+if __name__ == "__main__":
+    stitch_sources(sys.stdout)
--- a/.circleci/regenerate.ps1
+++ b/.circleci/regenerate.ps1
@ -0,0 +1,5 @@
+cd $PSScriptRoot;
+$NewFile = New-TemporaryFile;
+python generate_config_yml.py > $NewFile.name
+(Get-Content $NewFile.name -Raw).TrimEnd().Replace("`r`n","`n") | Set-Content config.yml -Force
+Remove-Item $NewFile.name
--- a/.circleci/regenerate.sh
+++ b/.circleci/regenerate.sh
@ -0,0 +1,17 @@
+#!/bin/bash -e
+
+# Allows this script to be invoked from any directory:
+cd "$(dirname "$0")"
+
+UNCOMMIT_CHANGE=$(git status -s | grep " config.yml" | wc -l | xargs)
+if [[ $UNCOMMIT_CHANGE != 0 ]]; then
+    OLD_FILE=$(mktemp)
+    cp config.yml "$OLD_FILE"
+    echo "Uncommitted change detected in .circleci/config.yml"
+    echo "It has been backed up to $OLD_FILE"
+fi
+
+NEW_FILE=$(mktemp)
+./generate_config_yml.py > "$NEW_FILE"
+cp "$NEW_FILE" config.yml
+echo "New config generated in .circleci/config.yml"
--- a/.circleci/scripts/binary_populate_env.sh
+++ b/.circleci/scripts/binary_populate_env.sh
@ -58,7 +58,8 @@ fi
 PIP_UPLOAD_FOLDER='nightly/'
 # We put this here so that OVERRIDE_PACKAGE_VERSION below can read from it
 export DATE="$(date -u +%Y%m%d)"
-BASE_BUILD_VERSION="$(cat ${PYTORCH_ROOT}/version.txt|cut -da -f1).dev${DATE}"
+#TODO: We should be pulling semver version from the base version.txt
+BASE_BUILD_VERSION="2.2.0.dev$DATE"
 # Change BASE_BUILD_VERSION to git tag when on a git tag
 # Use 'git -C' to make doubly sure we're in the correct directory for checking
 # the git tag
--- a/.circleci/verbatim-sources/build-parameters/binary-build-params.yml
+++ b/.circleci/verbatim-sources/build-parameters/binary-build-params.yml
@ -0,0 +1,65 @@
+binary_linux_build_params: &binary_linux_build_params
+  parameters:
+    build_environment:
+      type: string
+      default: ""
+    docker_image:
+      type: string
+      default: ""
+    libtorch_variant:
+      type: string
+      default: ""
+    resource_class:
+      type: string
+      default: "2xlarge+"
+  environment:
+    BUILD_ENVIRONMENT: << parameters.build_environment >>
+    LIBTORCH_VARIANT: << parameters.libtorch_variant >>
+    ANACONDA_USER: pytorch
+  resource_class: << parameters.resource_class >>
+  docker:
+    - image: << parameters.docker_image >>
+
+binary_linux_test_upload_params: &binary_linux_test_upload_params
+  parameters:
+    build_environment:
+      type: string
+      default: ""
+    docker_image:
+      type: string
+      default: ""
+    libtorch_variant:
+      type: string
+      default: ""
+    resource_class:
+      type: string
+      default: "medium"
+    use_cuda_docker_runtime:
+      type: string
+      default: ""
+  environment:
+    BUILD_ENVIRONMENT: << parameters.build_environment >>
+    DOCKER_IMAGE: << parameters.docker_image >>
+    USE_CUDA_DOCKER_RUNTIME: << parameters.use_cuda_docker_runtime >>
+    LIBTORCH_VARIANT: << parameters.libtorch_variant >>
+  resource_class: << parameters.resource_class >>
+
+binary_mac_params: &binary_mac_params
+  parameters:
+    build_environment:
+      type: string
+      default: ""
+  environment:
+    BUILD_ENVIRONMENT: << parameters.build_environment >>
+
+binary_windows_params: &binary_windows_params
+  parameters:
+    build_environment:
+      type: string
+      default: ""
+    executor:
+      type: string
+      default: "windows-xlarge-cpu-with-nvidia-cuda"
+  environment:
+    BUILD_ENVIRONMENT: << parameters.build_environment >>
+    JOB_EXECUTOR: <<parameters.executor>>
--- a/.circleci/verbatim-sources/build-parameters/pytorch-build-params.yml
+++ b/.circleci/verbatim-sources/build-parameters/pytorch-build-params.yml
@ -0,0 +1,105 @@
+pytorch_params: &pytorch_params
+  parameters:
+    build_environment:
+      type: string
+      default: ""
+    docker_image:
+      type: string
+      default: ""
+    resource_class:
+      type: string
+      default: "large"
+    use_cuda_docker_runtime:
+      type: string
+      default: ""
+    build_only:
+      type: string
+      default: ""
+    ci_master:
+      type: string
+      default: ""
+  environment:
+    BUILD_ENVIRONMENT: << parameters.build_environment >>
+    DOCKER_IMAGE: << parameters.docker_image >>
+    USE_CUDA_DOCKER_RUNTIME: << parameters.use_cuda_docker_runtime >>
+    BUILD_ONLY: << parameters.build_only >>
+    CI_MASTER: << pipeline.parameters.run_master_build >>
+  resource_class: << parameters.resource_class >>
+
+pytorch_ios_params: &pytorch_ios_params
+  parameters:
+    build_environment:
+      type: string
+      default: ""
+    ios_arch:
+      type: string
+      default: ""
+    ios_platform:
+      type: string
+      default: ""
+    op_list:
+      type: string
+      default: ""
+    use_metal:
+      type: string
+      default: "0"
+    lite_interpreter:
+      type: string
+      default: "1"
+    use_coreml:
+      type: string
+      default: "0"
+  environment:
+    BUILD_ENVIRONMENT: << parameters.build_environment >>
+    IOS_ARCH: << parameters.ios_arch >>
+    IOS_PLATFORM: << parameters.ios_platform >>
+    SELECTED_OP_LIST: << parameters.op_list >>
+    USE_PYTORCH_METAL: << parameters.use_metal >>
+    BUILD_LITE_INTERPRETER: << parameters.lite_interpreter >>
+    USE_COREML_DELEGATE: << parameters.use_coreml >>
+
+pytorch_windows_params: &pytorch_windows_params
+  parameters:
+    executor:
+      type: string
+      default: "windows-xlarge-cpu-with-nvidia-cuda"
+    build_environment:
+      type: string
+      default: ""
+    test_name:
+      type: string
+      default: ""
+    cuda_version:
+      type: string
+      default: "10.1"
+    python_version:
+      type: string
+      default: "3.8"
+    vs_version:
+      type: string
+      default: "16.8.6"
+    vc_version:
+      type: string
+      default: "14.16"
+    vc_year:
+      type: string
+      default: "2019"
+    vc_product:
+      type: string
+      default: "BuildTools"
+    use_cuda:
+      type: string
+      default: ""
+  environment:
+    BUILD_ENVIRONMENT: <<parameters.build_environment>>
+    SCCACHE_BUCKET: "ossci-compiler-cache"
+    CUDA_VERSION: <<parameters.cuda_version>>
+    PYTHON_VERSION: <<parameters.python_version>>
+    VS_VERSION: <<parameters.vs_version>>
+    VC_VERSION: <<parameters.vc_version>>
+    VC_YEAR: <<parameters.vc_year>>
+    VC_PRODUCT: <<parameters.vc_product>>
+    USE_CUDA: <<parameters.use_cuda>>
+    TORCH_CUDA_ARCH_LIST: "5.2 7.5"
+    JOB_BASE_NAME: <<parameters.test_name>>
+    JOB_EXECUTOR: <<parameters.executor>>
--- a/.circleci/verbatim-sources/commands.yml
+++ b/.circleci/verbatim-sources/commands.yml
@ -0,0 +1,134 @@
+commands:
+
+  calculate_docker_image_tag:
+    description: "Calculates the docker image tag"
+    steps:
+      - run:
+          name: "Calculate docker image hash"
+          command: |
+            DOCKER_TAG=$(git rev-parse HEAD:.ci/docker)
+            echo "DOCKER_TAG=${DOCKER_TAG}" >> "${BASH_ENV}"
+
+  designate_upload_channel:
+    description: "inserts the correct upload channel into ${BASH_ENV}"
+    steps:
+      - run:
+          name: adding UPLOAD_CHANNEL to BASH_ENV
+          command: |
+            our_upload_channel=nightly
+            # On tags upload to test instead
+            if [[ -n "${CIRCLE_TAG}" ]]; then
+              our_upload_channel=test
+            fi
+            echo "export UPLOAD_CHANNEL=${our_upload_channel}" >> ${BASH_ENV}
+
+  # This system setup script is meant to run before the CI-related scripts, e.g.,
+  # installing Git client, checking out code, setting up CI env, and
+  # building/testing.
+  setup_linux_system_environment:
+    steps:
+      - run:
+          name: Set Up System Environment
+          no_output_timeout: "1h"
+          command: .circleci/scripts/setup_linux_system_environment.sh
+
+  setup_ci_environment:
+    steps:
+      - run:
+          name: Set Up CI Environment After attach_workspace
+          no_output_timeout: "1h"
+          command: .circleci/scripts/setup_ci_environment.sh
+
+  brew_update:
+    description: "Update Homebrew and install base formulae"
+    steps:
+      - run:
+          name: Update Homebrew
+          no_output_timeout: "10m"
+          command: |
+            set -ex
+
+            # Update repositories manually.
+            # Running `brew update` produces a comparison between the
+            # current checkout and the updated checkout, which takes a
+            # very long time because the existing checkout is 2y old.
+            for path in $(find /usr/local/Homebrew -type d -name .git)
+            do
+            cd $path/..
+            git fetch --depth=1 origin
+            git reset --hard origin/master
+            done
+
+            export HOMEBREW_NO_AUTO_UPDATE=1
+
+            # Install expect and moreutils so that we can call `unbuffer` and `ts`.
+            # moreutils installs a `parallel` executable by default, which conflicts
+            # with the executable from the GNU `parallel`, so we must unlink GNU
+            # `parallel` first, and relink it afterwards.
+            brew unlink parallel
+            brew install moreutils
+            brew link parallel --overwrite
+            brew install expect
+
+  brew_install:
+    description: "Install Homebrew formulae"
+    parameters:
+      formulae:
+        type: string
+        default: ""
+    steps:
+      - run:
+          name: Install << parameters.formulae >>
+          no_output_timeout: "10m"
+          command: |
+            set -ex
+            export HOMEBREW_NO_AUTO_UPDATE=1
+            brew install << parameters.formulae >>
+
+  run_brew_for_macos_build:
+    steps:
+      - brew_update
+      - brew_install:
+          formulae: libomp
+
+  run_brew_for_ios_build:
+    steps:
+      - brew_update
+      - brew_install:
+          formulae: libtool
+
+  optional_merge_target_branch:
+    steps:
+      - run:
+          name: (Optional) Merge target branch
+          no_output_timeout: "10m"
+          command: |
+            if [[ -n "$CIRCLE_PULL_REQUEST" && "$CIRCLE_BRANCH" != "nightly" ]]; then
+              PR_NUM=$(basename $CIRCLE_PULL_REQUEST)
+              CIRCLE_PR_BASE_BRANCH=$(curl -s https://api.github.com/repos/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME/pulls/$PR_NUM | jq -r '.base.ref')
+              if [[ "${BUILD_ENVIRONMENT}" == *"xla"* || "${BUILD_ENVIRONMENT}" == *"gcc5"* ]] ; then
+                set -x
+                git config --global user.email "circleci.ossci@gmail.com"
+                git config --global user.name "CircleCI"
+                git config remote.origin.url https://github.com/pytorch/pytorch.git
+                git config --add remote.origin.fetch +refs/heads/master:refs/remotes/origin/master
+                git fetch --tags --progress https://github.com/pytorch/pytorch.git +refs/heads/master:refs/remotes/origin/master --depth=100 --quiet
+                # PRs generated from ghstack has format CIRCLE_PR_BASE_BRANCH=gh/xxx/1234/base
+                if [[ "${CIRCLE_PR_BASE_BRANCH}" == "gh/"* ]]; then
+                  CIRCLE_PR_BASE_BRANCH=master
+                fi
+                export GIT_MERGE_TARGET=`git log -n 1 --pretty=format:"%H" origin/$CIRCLE_PR_BASE_BRANCH`
+                echo "GIT_MERGE_TARGET: " ${GIT_MERGE_TARGET}
+                export GIT_COMMIT=${CIRCLE_SHA1}
+                echo "GIT_COMMIT: " ${GIT_COMMIT}
+                git checkout -f ${GIT_COMMIT}
+                git reset --hard ${GIT_COMMIT}
+                git merge --allow-unrelated-histories --no-edit --no-ff ${GIT_MERGE_TARGET}
+                echo "Merged $CIRCLE_PR_BASE_BRANCH branch before building in environment $BUILD_ENVIRONMENT"
+                set +x
+              else
+                echo "No need to merge with $CIRCLE_PR_BASE_BRANCH, skipping..."
+              fi
+            else
+              echo "This is not a pull request, skipping..."
+            fi
--- a/.circleci/verbatim-sources/header-section.yml
+++ b/.circleci/verbatim-sources/header-section.yml
@ -0,0 +1,41 @@
+# WARNING: DO NOT EDIT THIS FILE DIRECTLY!!!
+# See the README.md in this directory.
+
+# IMPORTANT: To update Docker image version, please follow
+# the instructions at
+# https://github.com/pytorch/pytorch/wiki/Docker-image-build-on-CircleCI
+
+version: 2.1
+
+parameters:
+  run_binary_tests:
+    type: boolean
+    default: false
+  run_build:
+    type: boolean
+    default: true
+  run_master_build:
+    type: boolean
+    default: false
+  run_slow_gradcheck_build:
+    type: boolean
+    default: false
+
+executors:
+  windows-with-nvidia-gpu:
+    machine:
+      resource_class: windows.gpu.nvidia.medium
+      image: windows-server-2019-nvidia:previous
+      shell: bash.exe
+
+  windows-xlarge-cpu-with-nvidia-cuda:
+    machine:
+      resource_class: windows.xlarge
+      image: windows-server-2019-vs2019:stable
+      shell: bash.exe
+
+  windows-medium-cpu-with-nvidia-cuda:
+    machine:
+      resource_class: windows.medium
+      image: windows-server-2019-vs2019:stable
+      shell: bash.exe
--- a/.circleci/verbatim-sources/job-specs/binary-build-tests.yml
+++ b/.circleci/verbatim-sources/job-specs/binary-build-tests.yml
@ -0,0 +1,14 @@
+
+# There is currently no testing for libtorch TODO
+#  binary_linux_libtorch_3.6m_cpu_test:
+#    environment:
+#      BUILD_ENVIRONMENT: "libtorch 3.6m cpu"
+#    resource_class: gpu.nvidia.small
+#    <<: *binary_linux_test
+#
+#  binary_linux_libtorch_3.6m_cu90_test:
+#    environment:
+#      BUILD_ENVIRONMENT: "libtorch 3.6m cu90"
+#    resource_class: gpu.nvidia.small
+#    <<: *binary_linux_test
+#
--- a/.circleci/verbatim-sources/job-specs/binary-job-specs.yml
+++ b/.circleci/verbatim-sources/job-specs/binary-job-specs.yml
@ -0,0 +1,44 @@
+jobs:
+  binary_ios_build:
+    <<: *pytorch_ios_params
+    macos:
+      xcode: "12.5.1"
+    steps:
+    - attach_workspace:
+        at: ~/workspace
+    - checkout
+    - run_brew_for_ios_build
+    - run:
+        name: Build
+        no_output_timeout: "1h"
+        command: |
+          script="/Users/distiller/project/.circleci/scripts/binary_ios_build.sh"
+          cat "$script"
+          source "$script"
+    - run:
+        name: Test
+        no_output_timeout: "30m"
+        command: |
+          script="/Users/distiller/project/.circleci/scripts/binary_ios_test.sh"
+          cat "$script"
+          source "$script"
+    - persist_to_workspace:
+        root: /Users/distiller/workspace/
+        paths: ios
+
+  binary_ios_upload:
+    <<: *pytorch_ios_params
+    macos:
+      xcode: "12.5.1"
+    steps:
+    - attach_workspace:
+        at: ~/workspace
+    - checkout
+    - run_brew_for_ios_build
+    - run:
+        name: Upload
+        no_output_timeout: "1h"
+        command: |
+          script="/Users/distiller/project/.circleci/scripts/binary_ios_upload.sh"
+          cat "$script"
+          source "$script"
--- a/.circleci/verbatim-sources/job-specs/binary_update_htmls.yml
+++ b/.circleci/verbatim-sources/job-specs/binary_update_htmls.yml
@ -0,0 +1,53 @@
+
+  # update_s3_htmls job
+  # These jobs create html files for every cpu/cu## folder in s3. The html
+  # files just store the names of all the files in that folder (which are
+  # binary files (.whl files)). This is to allow pip installs of the latest
+  # version in a folder without having to know the latest date. Pip has a flag
+  # -f that you can pass an html file listing a bunch of packages, and pip will
+  # then install the one with the most recent version.
+  update_s3_htmls: &update_s3_htmls
+    machine:
+      image: ubuntu-2004:202104-01
+    resource_class: medium
+    steps:
+    - checkout
+    - setup_linux_system_environment
+    - run:
+        <<: *binary_checkout
+    # N.B. we do not run binary_populate_env. The only variable we need is
+    # PIP_UPLOAD_FOLDER (which is 'nightly/' for the nightlies and '' for
+    # releases, and sometimes other things for special cases). Instead we
+    # expect PIP_UPLOAD_FOLDER to be passed directly in the env. This is
+    # because, unlike all the other binary jobs, these jobs only get run once,
+    # in a separate workflow. They are not a step in other binary jobs like
+    # build, test, upload.
+    #
+    # You could attach this to every job, or include it in the upload step if
+    # you wanted. You would need to add binary_populate_env in this case to
+    # make sure it has the same upload folder as the job it's attached to. This
+    # function is idempotent, so it won't hurt anything; it's just a little
+    # unnescessary"
+    - run:
+        name: define PIP_UPLOAD_FOLDER
+        command: |
+          our_upload_folder=nightly/
+          # On tags upload to test instead
+          if [[ -n "${CIRCLE_TAG}" ]]; then
+            our_upload_folder=test/
+          fi
+          echo "export PIP_UPLOAD_FOLDER=${our_upload_folder}" >> ${BASH_ENV}
+    - run:
+        name: Update s3 htmls
+        no_output_timeout: "1h"
+        command: |
+          set +x
+          echo "declare -x \"AWS_ACCESS_KEY_ID=${PYTORCH_BINARY_AWS_ACCESS_KEY_ID}\"" >> /home/circleci/project/env
+          echo "declare -x \"AWS_SECRET_ACCESS_KEY=${PYTORCH_BINARY_AWS_SECRET_ACCESS_KEY}\"" >> /home/circleci/project/env
+          source /home/circleci/project/env
+          set -eux -o pipefail
+          retry () {
+              $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
+          }
+          retry pip install awscli==1.6
+          "/home/circleci/project/builder/cron/update_s3_htmls.sh"
--- a/.circleci/verbatim-sources/job-specs/docker_jobs.yml
+++ b/.circleci/verbatim-sources/job-specs/docker_jobs.yml
@ -0,0 +1,56 @@
+  docker_build_job:
+      parameters:
+        image_name:
+          type: string
+          default: ""
+      machine:
+        image: ubuntu-2004:202104-01
+      resource_class: large
+      environment:
+        IMAGE_NAME: << parameters.image_name >>
+        # Enable 'docker manifest'
+        DOCKER_CLI_EXPERIMENTAL: "enabled"
+        DOCKER_BUILDKIT: 1
+      steps:
+        - checkout
+        - calculate_docker_image_tag
+        - run:
+            name: Check if image should be built
+            command: |
+              set +x
+              export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_DOCKER_BUILDER_V1}
+              export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_DOCKER_BUILDER_V1}
+              export AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
+              export AWS_REGION=us-east-1
+              aws ecr get-login-password --region $AWS_REGION|docker login --username AWS \
+                       --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com
+              set -x
+              # Check if image already exists, if it does then skip building it
+              if docker manifest inspect "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/${IMAGE_NAME}:${DOCKER_TAG}"; then
+                circleci-agent step halt
+                # circleci-agent step halt doesn't actually halt the step so we need to
+                # explicitly exit the step here ourselves before it causes too much trouble
+                exit 0
+              fi
+              # Covers the case where a previous tag doesn't exist for the tree
+              # this is only really applicable on trees that don't have `.ci/docker` at its merge base, i.e. nightly
+              if ! git rev-parse "$(git merge-base HEAD << pipeline.git.base_revision >>):.ci/docker"; then
+                echo "Directory '.ci/docker' not found in tree << pipeline.git.base_revision >>, you should probably rebase onto a more recent commit"
+                exit 1
+              fi
+              PREVIOUS_DOCKER_TAG=$(git rev-parse "$(git merge-base HEAD << pipeline.git.base_revision >>):ci/docker")
+              # If no image exists but the hash is the same as the previous hash then we should error out here
+              if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
+                echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
+                echo "       contact the PyTorch team to restore the original images"
+                exit 1
+              fi
+        - run:
+            name: build_docker_image_<< parameters.image_name >>
+            no_output_timeout: "1h"
+            command: |
+              set +x
+              export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_DOCKER_BUILDER_V1}
+              export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_DOCKER_BUILDER_V1}
+              set -x
+              cd .ci/docker && ./build_docker.sh
--- a/.circleci/verbatim-sources/job-specs/job-specs-custom.yml
+++ b/.circleci/verbatim-sources/job-specs/job-specs-custom.yml
@ -0,0 +1,747 @@
+  pytorch_doc_push:
+    resource_class: medium
+    machine:
+      image: ubuntu-2004:202104-01
+    parameters:
+      branch:
+        type: string
+        default: "main"
+    steps:
+    - attach_workspace:
+        at: /tmp/workspace
+    - run:
+        name: Generate netrc
+        command: |
+          # set credentials for https pushing
+          cat > ~/.netrc \<<DONE
+            machine github.com
+            login pytorchbot
+            password ${GITHUB_PYTORCHBOT_TOKEN}
+          DONE
+    - run:
+        name: Docs push
+        command: |
+          pushd /tmp/workspace
+          git push -u origin "<< parameters.branch >>"
+
+  pytorch_macos_10_15_py3_build:
+    environment:
+      BUILD_ENVIRONMENT: pytorch-macos-10.15-py3-arm64-build
+    macos:
+      xcode: "12.3.0"
+    steps:
+      - checkout
+      - run_brew_for_macos_build
+      - run:
+          name: Build
+          no_output_timeout: "1h"
+          command: |
+            set -e
+            export CROSS_COMPILE_ARM64=1
+            export JOB_BASE_NAME=$CIRCLE_JOB
+
+            # Install sccache
+            sudo curl --retry 3 https://s3.amazonaws.com/ossci-macos/sccache_v2.15 --output /usr/local/bin/sccache
+            sudo chmod +x /usr/local/bin/sccache
+            export SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2
+
+            # This IAM user allows write access to S3 bucket for sccache
+            set +x
+            export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_S3_BUCKET_V4}
+            export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET_V4}
+            set -x
+
+            chmod a+x .ci/pytorch/macos-build.sh
+            unbuffer .ci/pytorch/macos-build.sh 2>&1 | ts
+
+      - persist_to_workspace:
+          root: /Users/distiller/workspace/
+          paths:
+            - miniconda3
+      - store_artifacts:
+          path: /Users/distiller/project/dist
+
+  pytorch_macos_10_13_py3_build:
+    environment:
+      BUILD_ENVIRONMENT: pytorch-macos-10.13-py3-build
+    macos:
+      xcode: "12.0"
+    steps:
+      - checkout
+      - run_brew_for_macos_build
+      - run:
+          name: Build
+          no_output_timeout: "1h"
+          command: |
+            set -e
+            export JOB_BASE_NAME=$CIRCLE_JOB
+
+            # Install sccache
+            sudo curl --retry 3 https://s3.amazonaws.com/ossci-macos/sccache_v2.15 --output /usr/local/bin/sccache
+            sudo chmod +x /usr/local/bin/sccache
+            export SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2
+
+            # This IAM user allows write access to S3 bucket for sccache
+            set +x
+            export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_S3_BUCKET_V4}
+            export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET_V4}
+            set -x
+
+            chmod a+x .ci/pytorch/macos-build.sh
+            unbuffer .ci/pytorch/macos-build.sh 2>&1 | ts
+
+      - persist_to_workspace:
+          root: /Users/distiller/workspace/
+          paths:
+            - miniconda3
+
+  mac_build:
+    parameters:
+      build-environment:
+        type: string
+        description: Top-level label for what's being built/tested.
+      xcode-version:
+        type: string
+        default: "13.3.1"
+        description: What xcode version to build with.
+      build-generates-artifacts:
+        type: boolean
+        default: true
+        description: if the build generates build artifacts
+      python-version:
+        type: string
+        default: "3.8"
+    macos:
+      xcode: << parameters.xcode-version >>
+    resource_class: medium
+    environment:
+      BUILD_ENVIRONMENT: << parameters.build-environment >>
+      AWS_REGION: us-east-1
+    steps:
+
+      - checkout
+      - run_brew_for_macos_build
+
+      - run:
+          name: Install sccache
+          command: |
+            sudo curl --retry 3 https://s3.amazonaws.com/ossci-macos/sccache_v2.15 --output /usr/local/bin/sccache
+            sudo chmod +x /usr/local/bin/sccache
+            echo "export SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2" >> "${BASH_ENV}"
+            echo "export SCCACHE_S3_KEY_PREFIX=${GITHUB_WORKFLOW}" >> "${BASH_ENV}"
+
+            set +x
+            echo "export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_S3_BUCKET_V4}" >> "${BASH_ENV}"
+            echo "export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET_V4}" >> "${BASH_ENV}"
+            set -x
+
+      - run:
+          name: Get workflow job id
+          command: |
+            echo "export OUR_GITHUB_JOB_ID=${CIRCLE_WORKFLOW_JOB_ID}" >> "${BASH_ENV}"
+
+      - run:
+          name: Build
+          command: |
+            set -x
+
+            git submodule sync
+            git submodule update --init --recursive --depth 1 --jobs 0
+
+            export PATH="/usr/local/bin:$PATH"
+            export WORKSPACE_DIR="${HOME}/workspace"
+            mkdir -p "${WORKSPACE_DIR}"
+            MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-py38_4.12.0-MacOSX-x86_64.sh"
+            if [  << parameters.python-version >> == 3.9.12 ]; then
+              MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-MacOSX-x86_64.sh"
+            fi
+
+            # If a local installation of conda doesn't exist, we download and install conda
+            if [ ! -d "${WORKSPACE_DIR}/miniconda3" ]; then
+              mkdir -p "${WORKSPACE_DIR}"
+              curl --retry 3 ${MINICONDA_URL} -o "${WORKSPACE_DIR}"/miniconda3.sh
+              bash "${WORKSPACE_DIR}"/miniconda3.sh -b -p "${WORKSPACE_DIR}"/miniconda3
+            fi
+            export PATH="${WORKSPACE_DIR}/miniconda3/bin:$PATH"
+            # shellcheck disable=SC1091
+            source "${WORKSPACE_DIR}"/miniconda3/bin/activate
+
+            brew link --force libomp
+
+            echo "export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname "$(which conda)")/../"}" >> "${BASH_ENV}"
+            .ci/pytorch/macos-build.sh
+
+      - when:
+          condition: << parameters.build-generates-artifacts >>
+          steps:
+            - run:
+                name: Archive artifacts into zip
+                command: |
+                  zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .additional_ci_files
+                  cp artifacts.zip /Users/distiller/workspace
+
+      - persist_to_workspace:
+          root: /Users/distiller/workspace/
+          paths:
+            - miniconda3
+            - artifacts.zip
+
+      - store_artifacts:
+          path: /Users/distiller/project/artifacts.zip
+
+  mac_test:
+    parameters:
+      build-environment:
+        type: string
+      shard-number:
+        type: string
+      num-test-shards:
+        type: string
+      xcode-version:
+        type: string
+      test-config:
+        type: string
+        default: 'default'
+
+    macos:
+      xcode: << parameters.xcode-version >>
+    environment:
+      GIT_DEFAULT_BRANCH: 'master'
+      BUILD_ENVIRONMENT: << parameters.build-environment >>
+      TEST_CONFIG: << parameters.test-config >>
+      SHARD_NUMBER: << parameters.shard-number >>
+      NUM_TEST_SHARDS: << parameters.num-test-shards >>
+      PYTORCH_RETRY_TEST_CASES: 1
+      PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
+    steps:
+      - checkout
+      - attach_workspace:
+          at: ~/workspace
+      - run_brew_for_macos_build
+      - run:
+          name: Test
+          no_output_timeout: "2h"
+          command: |
+            set -x
+
+            git submodule sync --recursive
+            git submodule update --init --recursive
+
+            mv ~/workspace/artifacts.zip .
+            unzip artifacts.zip
+
+            export IN_CI=1
+
+            COMMIT_MESSAGES=$(git cherry -v "origin/${GIT_DEFAULT_BRANCH:-master}")
+
+            export PATH="/usr/local/bin:$PATH"
+            export WORKSPACE_DIR="${HOME}/workspace"
+            mkdir -p "${WORKSPACE_DIR}"
+
+            export PATH="${WORKSPACE_DIR}/miniconda3/bin:$PATH"
+            source "${WORKSPACE_DIR}"/miniconda3/bin/activate
+
+            # sanitize the input commit message and PR body here:
+
+            # trim all new lines from commit messages to avoid issues with batch environment
+            # variable copying. see https://github.com/pytorch/pytorch/pull/80043#issuecomment-1167796028
+            COMMIT_MESSAGES="${COMMIT_MESSAGES//[$'\n\r']}"
+
+            # then trim all special characters like single and double quotes to avoid unescaped inputs to
+            # wreak havoc internally
+            export COMMIT_MESSAGES="${COMMIT_MESSAGES//[\'\"]}"
+
+            python3 -mpip install dist/*.whl
+            .ci/pytorch/macos-test.sh
+      - run:
+          name: Copy files for uploading test stats
+          command: |
+            # copy into a parent folder test-reports because we can't use CIRCLEI_BUILD_NUM in path when persisting to workspace
+            mkdir -p test-reports/test-reports_${CIRCLE_BUILD_NUM}/test/test-reports
+            cp -r test/test-reports test-reports/test-reports_${CIRCLE_BUILD_NUM}/test/test-reports
+      - store_test_results:
+          path: test/test-reports
+      - persist_to_workspace:
+          root: /Users/distiller/project/
+          paths:
+            - test-reports
+
+  upload_test_stats:
+    machine: # executor type
+      image: ubuntu-2004:202010-01 # # recommended linux image - includes Ubuntu 20.04, docker 19.03.13, docker-compose 1.27.4
+    steps:
+      - checkout
+      - attach_workspace:
+          at: ~/workspace
+      - run:
+          name: upload
+          command: |
+            set -ex
+            if [ -z ${AWS_ACCESS_KEY_FOR_OSSCI_ARTIFACT_UPLOAD} ]; then
+              echo "No credentials found, cannot upload test stats (are you on a fork?)"
+              exit 0
+            fi
+            cp -r ~/workspace/test-reports/* ~/project
+            pip3 install requests==2.26 rockset==1.0.3 boto3==1.19.12
+            export AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_FOR_OSSCI_ARTIFACT_UPLOAD}
+            export AWS_SECRET_ACCESS_KEY=${AWS_SECRET_KEY_FOR_OSSCI_ARTIFACT_UPLOAD}
+            # i dont know how to get the run attempt number for reruns so default to 1
+            python3 -m tools.stats.upload_test_stats --workflow-run-id "${CIRCLE_WORKFLOW_JOB_ID}" --workflow-run-attempt 1 --head-branch << pipeline.git.branch >> --circleci
+  pytorch_macos_10_13_py3_test:
+    environment:
+      BUILD_ENVIRONMENT: pytorch-macos-10.13-py3-test
+    macos:
+      xcode: "12.0"
+    steps:
+      - checkout
+      - attach_workspace:
+          at: ~/workspace
+      - run_brew_for_macos_build
+      - run:
+          name: Test
+          no_output_timeout: "1h"
+          command: |
+            set -e
+            export JOB_BASE_NAME=$CIRCLE_JOB
+
+            chmod a+x .ci/pytorch/macos-test.sh
+            unbuffer .ci/pytorch/macos-test.sh 2>&1 | ts
+      - store_test_results:
+          path: test/test-reports
+
+  pytorch_macos_10_13_py3_lite_interpreter_build_test:
+    environment:
+      BUILD_ENVIRONMENT: pytorch-macos-10.13-py3-test
+    macos:
+      xcode: "12.0"
+    steps:
+      - checkout
+      - attach_workspace:
+          at: ~/workspace
+      - run_brew_for_macos_build
+      - run:
+          name: Test
+          no_output_timeout: "1h"
+          command: |
+            set -e
+            export BUILD_LITE_INTERPRETER=1
+            export JOB_BASE_NAME=$CIRCLE_JOB
+            chmod a+x ${HOME}/project/.ci/pytorch/macos-lite-interpreter-build-test.sh
+            unbuffer ${HOME}/project/.ci/pytorch/macos-lite-interpreter-build-test.sh 2>&1 | ts
+      - store_test_results:
+          path: test/test-reports
+
+  pytorch_android_gradle_build:
+    environment:
+      BUILD_ENVIRONMENT: pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-build
+      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c"
+      PYTHON_VERSION: "3.7"
+    resource_class: large
+    machine:
+      image: ubuntu-2004:202104-01
+    steps:
+    - checkout
+    - calculate_docker_image_tag
+    - setup_linux_system_environment
+    - setup_ci_environment
+    - run:
+        name: pytorch android gradle build
+        no_output_timeout: "1h"
+        command: |
+          set -eux
+          docker_image_commit=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}
+
+          docker_image_libtorch_android_x86_32=${docker_image_commit}-android-x86_32
+          docker_image_libtorch_android_x86_64=${docker_image_commit}-android-x86_64
+          docker_image_libtorch_android_arm_v7a=${docker_image_commit}-android-arm-v7a
+          docker_image_libtorch_android_arm_v8a=${docker_image_commit}-android-arm-v8a
+
+          echo "docker_image_commit: "${docker_image_commit}
+          echo "docker_image_libtorch_android_x86_32: "${docker_image_libtorch_android_x86_32}
+          echo "docker_image_libtorch_android_x86_64: "${docker_image_libtorch_android_x86_64}
+          echo "docker_image_libtorch_android_arm_v7a: "${docker_image_libtorch_android_arm_v7a}
+          echo "docker_image_libtorch_android_arm_v8a: "${docker_image_libtorch_android_arm_v8a}
+
+          # x86_32
+          time docker pull ${docker_image_libtorch_android_x86_32} >/dev/null
+          export id_x86_32=$(docker run --env-file "${BASH_ENV}" -e GRADLE_OFFLINE=1 --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${docker_image_libtorch_android_x86_32})
+
+          export COMMAND='((echo "sudo chown -R jenkins workspace") | docker exec -u jenkins -i "$id_x86_32" bash) 2>&1'
+          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
+
+          # arm-v7a
+          time docker pull ${docker_image_libtorch_android_arm_v7a} >/dev/null
+          export id_arm_v7a=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${docker_image_libtorch_android_arm_v7a})
+
+          export COMMAND='((echo "sudo chown -R jenkins workspace") | docker exec -u jenkins -i "$id_arm_v7a" bash) 2>&1'
+          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
+
+          mkdir -p ~/workspace/build_android_install_arm_v7a
+          docker cp $id_arm_v7a:/var/lib/jenkins/workspace/build_android/install ~/workspace/build_android_install_arm_v7a
+
+          # x86_64
+          time docker pull ${docker_image_libtorch_android_x86_64} >/dev/null
+          export id_x86_64=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${docker_image_libtorch_android_x86_64})
+
+          export COMMAND='((echo "sudo chown -R jenkins workspace") | docker exec -u jenkins -i "$id_x86_64" bash) 2>&1'
+          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
+
+          mkdir -p ~/workspace/build_android_install_x86_64
+          docker cp $id_x86_64:/var/lib/jenkins/workspace/build_android/install ~/workspace/build_android_install_x86_64
+
+          # arm-v8a
+          time docker pull ${docker_image_libtorch_android_arm_v8a} >/dev/null
+          export id_arm_v8a=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${docker_image_libtorch_android_arm_v8a})
+
+          export COMMAND='((echo "sudo chown -R jenkins workspace") | docker exec -u jenkins -i "$id_arm_v8a" bash) 2>&1'
+          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
+
+          mkdir -p ~/workspace/build_android_install_arm_v8a
+          docker cp $id_arm_v8a:/var/lib/jenkins/workspace/build_android/install ~/workspace/build_android_install_arm_v8a
+
+          docker cp ~/workspace/build_android_install_arm_v7a $id_x86_32:/var/lib/jenkins/workspace/build_android_install_arm_v7a
+          docker cp ~/workspace/build_android_install_x86_64 $id_x86_32:/var/lib/jenkins/workspace/build_android_install_x86_64
+          docker cp ~/workspace/build_android_install_arm_v8a $id_x86_32:/var/lib/jenkins/workspace/build_android_install_arm_v8a
+
+          # run gradle buildRelease
+          export COMMAND='((echo "sudo chown -R jenkins workspace && cd workspace && ./.circleci/scripts/build_android_gradle.sh") | docker exec -u jenkins -i "$id_x86_32" bash) 2>&1'
+          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
+
+          mkdir -p ~/workspace/build_android_artifacts
+          docker cp $id_x86_32:/var/lib/jenkins/workspace/android/artifacts.tgz ~/workspace/build_android_artifacts/
+
+          output_image=$docker_image_libtorch_android_x86_32-gradle
+          docker commit "$id_x86_32" ${output_image}
+          time docker push ${output_image}
+    - store_artifacts:
+        path: ~/workspace/build_android_artifacts/artifacts.tgz
+        destination: artifacts.tgz
+
+  pytorch_android_publish_snapshot:
+    environment:
+      BUILD_ENVIRONMENT: pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-publish-snapshot
+      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c"
+      PYTHON_VERSION: "3.7"
+    resource_class: large
+    machine:
+      image: ubuntu-2004:202104-01
+    steps:
+    - checkout
+    - calculate_docker_image_tag
+    - setup_linux_system_environment
+    - setup_ci_environment
+    - run:
+        name: pytorch android gradle build
+        no_output_timeout: "1h"
+        command: |
+          set -eux
+          docker_image_commit=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}
+
+          docker_image_libtorch_android_x86_32_gradle=${docker_image_commit}-android-x86_32-gradle
+
+          echo "docker_image_commit: "${docker_image_commit}
+          echo "docker_image_libtorch_android_x86_32_gradle: "${docker_image_libtorch_android_x86_32_gradle}
+
+          # x86_32
+          time docker pull ${docker_image_libtorch_android_x86_32_gradle} >/dev/null
+          export id_x86_32=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${docker_image_libtorch_android_x86_32_gradle})
+
+          export COMMAND='((echo "sudo chown -R jenkins workspace" && echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "export SONATYPE_NEXUS_USERNAME=${SONATYPE_NEXUS_USERNAME}" && echo "export SONATYPE_NEXUS_PASSWORD=${SONATYPE_NEXUS_PASSWORD}" && echo "export ANDROID_SIGN_KEY=${ANDROID_SIGN_KEY}" && echo "export ANDROID_SIGN_PASS=${ANDROID_SIGN_PASS}" && echo "sudo chown -R jenkins workspace && cd workspace && ./.circleci/scripts/publish_android_snapshot.sh") | docker exec -u jenkins -i "$id_x86_32" bash) 2>&1'
+          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
+
+          output_image=${docker_image_libtorch_android_x86_32_gradle}-publish-snapshot
+          docker commit "$id_x86_32" ${output_image}
+          time docker push ${output_image}
+
+  pytorch_android_gradle_build-x86_32:
+    environment:
+      BUILD_ENVIRONMENT: pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-build-only-x86_32
+      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c"
+      PYTHON_VERSION: "3.7"
+    resource_class: large
+    machine:
+      image: ubuntu-2004:202104-01
+    steps:
+    - checkout
+    - calculate_docker_image_tag
+    - setup_linux_system_environment
+    - checkout
+    - setup_ci_environment
+    - run:
+        name: pytorch android gradle build only x86_32 (for PR)
+        no_output_timeout: "1h"
+        command: |
+          set -e
+          docker_image_libtorch_android_x86_32=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}-android-x86_32
+          echo "docker_image_libtorch_android_x86_32: "${docker_image_libtorch_android_x86_32}
+
+          # x86
+          time docker pull ${docker_image_libtorch_android_x86_32} >/dev/null
+          export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${docker_image_libtorch_android_x86_32})
+
+          export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "export GRADLE_OFFLINE=1" && echo "sudo chown -R jenkins workspace && cd workspace && ./.circleci/scripts/build_android_gradle.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
+
+          mkdir -p ~/workspace/build_android_x86_32_artifacts
+          docker cp $id:/var/lib/jenkins/workspace/android/artifacts.tgz ~/workspace/build_android_x86_32_artifacts/
+
+          output_image=${docker_image_libtorch_android_x86_32}-gradle
+          docker commit "$id" ${output_image}
+          time docker push ${output_image}
+    - store_artifacts:
+        path: ~/workspace/build_android_x86_32_artifacts/artifacts.tgz
+        destination: artifacts.tgz
+
+  pytorch_ios_build:
+    <<: *pytorch_ios_params
+    macos:
+      xcode: "12.5.1"
+    steps:
+      - run:
+          name: checkout with retry
+          command: |
+            checkout() {
+              set -ex
+              # Workaround old docker images with incorrect $HOME
+              # check https://github.com/docker/docker/issues/2968 for details
+              if [ "${HOME}" = "/" ]
+                then
+                export HOME=$(getent passwd $(id -un) | cut -d: -f6)
+              fi
+
+              mkdir -p ~/.ssh
+
+              echo 'github.com ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ==
+              ' >> ~/.ssh/known_hosts
+
+              # use git+ssh instead of https
+              git config --global url."ssh://git@github.com".insteadOf "https://github.com" || true
+              git config --global gc.auto 0 || true
+
+              echo 'Cloning git repository'
+              mkdir -p '/Users/distiller/project'
+              cd '/Users/distiller/project'
+              git clone "$CIRCLE_REPOSITORY_URL" .
+              echo 'Checking out branch'
+              git checkout --force -B "$CIRCLE_BRANCH" "$CIRCLE_SHA1"
+              git --no-pager log --no-color -n 1 --format='HEAD is now at %h %s'
+            }
+
+            retry () {
+              $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
+            }
+            retry checkout
+      - run_brew_for_ios_build
+      - run:
+          name: Setup Fastlane
+          no_output_timeout: "1h"
+          command: |
+            set -e
+            PROJ_ROOT=/Users/distiller/project
+            cd ${PROJ_ROOT}/ios/TestApp
+            # install fastlane
+            sudo gem install bundler && bundle install
+      - run:
+          name: Build
+          no_output_timeout: "1h"
+          command: |
+            set -e
+            WORKSPACE=/Users/distiller/workspace
+            PROJ_ROOT=/Users/distiller/project
+            export TCLLIBPATH="/usr/local/lib"
+
+            # Install conda
+            curl --retry 3 -o ~/conda.sh https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-MacOSX-x86_64.sh
+            chmod +x ~/conda.sh
+            /bin/bash ~/conda.sh -b -p ~/anaconda
+            export PATH="~/anaconda/bin:${PATH}"
+            source ~/anaconda/bin/activate
+
+            # Install dependencies
+            retry () {
+                $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
+            }
+
+            retry conda install numpy ninja pyyaml mkl mkl-include setuptools cmake requests typing-extensions --yes
+
+            # sync submodules
+            cd ${PROJ_ROOT}
+            git submodule sync
+            git submodule update --init --recursive --depth 1 --jobs 0
+
+            # export
+            export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
+
+            # run build script
+            chmod a+x ${PROJ_ROOT}/scripts/build_ios.sh
+            echo "IOS_ARCH: ${IOS_ARCH}"
+            echo "IOS_PLATFORM: ${IOS_PLATFORM}"
+            echo "USE_PYTORCH_METAL": "${USE_METAL}"
+            echo "BUILD_LITE_INTERPRETER": "${BUILD_LITE_INTERPRETER}"
+            echo "USE_COREML_DELEGATE": "${USE_COREML_DELEGATE}"
+
+            #check the custom build flag
+            echo "SELECTED_OP_LIST: ${SELECTED_OP_LIST}"
+            if [ -n "${SELECTED_OP_LIST}" ]; then
+                export SELECTED_OP_LIST="${PROJ_ROOT}/ios/TestApp/custom_build/${SELECTED_OP_LIST}"
+            fi
+            export IOS_ARCH=${IOS_ARCH}
+            export IOS_PLATFORM=${IOS_PLATFORM}
+            export USE_COREML_DELEGATE=${USE_COREML_DELEGATE}
+            if [ ${IOS_PLATFORM} != "SIMULATOR" ]; then
+              export USE_PYTORCH_METAL=${USE_METAL}
+            fi
+            unbuffer ${PROJ_ROOT}/scripts/build_ios.sh 2>&1 | ts
+      - run:
+          name: Run Build Test
+          no_output_timeout: "30m"
+          command: |
+            set -e
+            PROJ_ROOT=/Users/distiller/project
+            # run the ruby build script
+            if ! [ -x "$(command -v xcodebuild)" ]; then
+              echo 'Error: xcodebuild is not installed.'
+              exit 1
+            fi
+            ruby ${PROJ_ROOT}/scripts/xcode_build.rb -i ${PROJ_ROOT}/build_ios/install -x ${PROJ_ROOT}/ios/TestApp/TestApp.xcodeproj -p ${IOS_PLATFORM}
+            if ! [ "$?" -eq "0" ]; then
+              echo 'xcodebuild failed!'
+              exit 1
+            fi
+      - run:
+          name: Run Simulator Tests
+          no_output_timeout: "2h"
+          command: |
+            set -e
+            if [ ${IOS_PLATFORM} != "SIMULATOR" ]; then
+              echo "not SIMULATOR build, skip it."
+              exit 0
+            fi
+            WORKSPACE=/Users/distiller/workspace
+            PROJ_ROOT=/Users/distiller/project
+            source ~/anaconda/bin/activate
+            # use the pytorch nightly build to generate models
+            pip3 install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
+            # generate models for differnet backends
+            cd ${PROJ_ROOT}/ios/TestApp/benchmark
+            mkdir -p ../models
+            if [ ${USE_COREML_DELEGATE} == 1 ]; then
+              pip install coremltools==5.0b5 protobuf==3.20.1
+              python coreml_backend.py
+            else
+              cd "${PROJ_ROOT}"
+              python test/mobile/model_test/gen_test_model.py ios-test
+            fi
+            cd "${PROJ_ROOT}/ios/TestApp/benchmark"
+            if [ ${BUILD_LITE_INTERPRETER} == 1 ]; then
+              echo "Setting up the TestApp for LiteInterpreter"
+              ruby setup.rb --lite 1
+            else
+              echo "Setting up the TestApp for Full JIT"
+              ruby setup.rb
+            fi
+            cd "${PROJ_ROOT}/ios/TestApp"
+            # instruments -s -devices
+            if [ "${BUILD_LITE_INTERPRETER}" == 1 ]; then
+              if [ "${USE_COREML_DELEGATE}" == 1 ]; then
+                fastlane scan --only_testing TestAppTests/TestAppTests/testCoreML
+              else
+                fastlane scan --only_testing TestAppTests/TestAppTests/testLiteInterpreter
+              fi
+            else
+              fastlane scan --only_testing TestAppTests/TestAppTests/testFullJIT
+            fi
+  pytorch_linux_bazel_build:
+    <<: *pytorch_params
+    machine:
+      image: ubuntu-2004:202104-01
+    steps:
+    - checkout
+    - calculate_docker_image_tag
+    - setup_linux_system_environment
+    - setup_ci_environment
+    - run:
+        name: Bazel Build
+        no_output_timeout: "1h"
+        command: |
+          set -e
+          # Pull Docker image and run build
+          echo "DOCKER_IMAGE: "${DOCKER_IMAGE}:${DOCKER_TAG}
+          time docker pull ${DOCKER_IMAGE}:${DOCKER_TAG} >/dev/null
+          export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}:${DOCKER_TAG})
+
+          echo "Do NOT merge main branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT"
+
+          git submodule sync && git submodule update -q --init --recursive --depth 1 --jobs 0
+
+          docker cp /home/circleci/project/. $id:/var/lib/jenkins/workspace
+
+          export COMMAND='((echo "sudo chown -R jenkins workspace && cd workspace && .ci/pytorch/build.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+
+          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
+
+          # Push intermediate Docker image for next phase to use
+          if [ -z "${BUILD_ONLY}" ]; then
+            # Augment our output image name with bazel to avoid collisions
+            output_image=${DOCKER_IMAGE}:build-${DOCKER_TAG}-bazel-${CIRCLE_SHA1}
+            export COMMIT_DOCKER_IMAGE=$output_image
+            docker commit "$id" ${COMMIT_DOCKER_IMAGE}
+            time docker push ${COMMIT_DOCKER_IMAGE}
+          fi
+
+  pytorch_linux_bazel_test:
+    <<: *pytorch_params
+    machine:
+      image: ubuntu-2004:202104-01
+    steps:
+    - checkout
+    - calculate_docker_image_tag
+    - setup_linux_system_environment
+    - setup_ci_environment
+    - run:
+        name: Test
+        no_output_timeout: "90m"
+        command: |
+          set -e
+          output_image=${DOCKER_IMAGE}:build-${DOCKER_TAG}-bazel-${CIRCLE_SHA1}
+          export COMMIT_DOCKER_IMAGE=$output_image
+          echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
+
+          time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null
+
+          if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then
+            export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --gpus all -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
+          else
+            export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
+          fi
+
+          retrieve_test_reports() {
+            echo "retrieving test reports"
+            docker cp -L $id:/var/lib/jenkins/workspace/bazel-testlogs ./ || echo 'No test reports found!'
+          }
+          trap "retrieve_test_reports" ERR
+
+          if [[ ${BUILD_ENVIRONMENT} == *"multigpu"* ]]; then
+            export COMMAND='((echo "sudo chown -R jenkins workspace && cd workspace && .ci/pytorch/multigpu-test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+          else
+            export COMMAND='((echo "sudo chown -R jenkins workspace && cd workspace && .ci/pytorch/test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+          fi
+          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
+
+          retrieve_test_reports
+          docker stats --all --no-stream
+    - store_test_results:
+        path: bazel-testlogs
+
+  pytorch_windows_test_multigpu:
+    machine:
+      image: ubuntu-2004:202104-01
+    steps:
+      - checkout
+      - run:
+          name: Test
+          no_output_timeout: "90m"
+          command: |
+            set -e
+            python3 -m pip install requests
+            python3 ./.circleci/scripts/trigger_azure_pipeline.py
--- a/.circleci/verbatim-sources/job-specs/job-specs-promote.yml
+++ b/.circleci/verbatim-sources/job-specs/job-specs-promote.yml
@ -0,0 +1,18 @@
+
+  promote_s3:
+    <<: *promote_common
+    steps:
+      - checkout
+      - run:
+          name: Running promote script
+          command: |
+            scripts/release/promote/wheel_to_s3.sh
+
+  promote_conda:
+    <<: *promote_common
+    steps:
+      - checkout
+      - run:
+          name: Running promote script
+          command: |
+            scripts/release/promote/conda_to_conda.sh
--- a/.circleci/verbatim-sources/job-specs/job-specs-setup.yml
+++ b/.circleci/verbatim-sources/job-specs/job-specs-setup.yml
@ -0,0 +1,29 @@
+
+  setup:
+    docker:
+      - image: circleci/python:3.7.3
+    steps:
+      - checkout
+      - run:
+          name: Save commit message
+          command: git log --format='%B' -n 1 HEAD > .circleci/scripts/COMMIT_MSG
+      # Note [Workspace for CircleCI scripts]
+      # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+      # In the beginning, you wrote your CI scripts in a
+      # .circleci/config.yml file, and life was good.  Your CI
+      # configurations flourished and multiplied.
+      #
+      # Then one day, CircleCI cometh down high and say, "Your YAML file
+      # is too biggeth, it stresses our servers so."  And thus they
+      # asketh us to smite the scripts in the yml file.
+      #
+      # But you can't just put the scripts in the .circleci folder,
+      # because in some jobs, you don't ever actually checkout the
+      # source repository.  Where you gonna get the scripts from?
+      #
+      # Here's how you do it: you persist .circleci/scripts into a
+      # workspace, attach the workspace in your subjobs, and run all
+      # your scripts from there.
+      - persist_to_workspace:
+          root: .
+          paths: .circleci/scripts
--- a/.circleci/verbatim-sources/nightly-binary-build-defaults.yml
+++ b/.circleci/verbatim-sources/nightly-binary-build-defaults.yml
@ -0,0 +1,51 @@
+
+##############################################################################
+# Binary build (nightlies nightly build) defaults
+# The binary builds use the docker executor b/c at time of writing the machine
+# executor is limited to only two cores and is painfully slow (4.5+ hours per
+# GPU build). But the docker executor cannot be run with --runtime=nvidia, and
+# so the binary test/upload jobs must run on a machine executor. The package
+# built in the build job is persisted to the workspace, which the test jobs
+# expect. The test jobs just run a few quick smoke tests (very similar to the
+# second-round-user-facing smoke tests above) and then upload the binaries to
+# their final locations. The upload part requires credentials that should only
+# be available to org-members.
+#
+# binary_checkout MUST be run before other commands here. This is because the
+# other commands are written in .circleci/scripts/*.sh , so the pytorch source
+# code must be downloaded on the machine before they can be run. We cannot
+# inline all the code into this file, since that would cause the yaml size to
+# explode past 4 MB (all the code in the command section is just copy-pasted to
+# everywhere in the .circleci/config.yml file where it appears).
+##############################################################################
+
+# Checks out the Pytorch and Builder repos (always both of them), and places
+# them in the right place depending on what executor we're running on. We curl
+# our .sh file from the interweb to avoid yaml size bloat. Note that many jobs
+# do not need both the pytorch and builder repos, so this is a little wasteful
+# (smoke tests and upload jobs do not need the pytorch repo).
+binary_checkout: &binary_checkout
+  name: Checkout pytorch/builder repo
+  no_output_timeout: "30m"
+  command: .circleci/scripts/binary_checkout.sh
+
+# Parses circleci arguments in a consistent way, essentially routing to the
+# correct pythonXgccXcudaXos build we want
+binary_populate_env: &binary_populate_env
+  name: Set up binary env variables
+  command: .circleci/scripts/binary_populate_env.sh
+
+binary_install_miniconda: &binary_install_miniconda
+  name: Install miniconda
+  no_output_timeout: "1h"
+  command: .circleci/scripts/binary_install_miniconda.sh
+
+# This section is used in the binary_test and smoke_test jobs. It expects
+# 'binary_populate_env' to have populated /home/circleci/project/env and it
+# expects another section to populate /home/circleci/project/ci_test_script.sh
+# with the code to run in the docker
+binary_run_in_docker: &binary_run_in_docker
+  name: Run in docker
+  # This step only runs on circleci linux machine executors that themselves
+  # need to start docker images
+  command: .circleci/scripts/binary_run_in_docker.sh
--- a/.circleci/verbatim-sources/workflows/workflows-nightly-uploads-header.yml
+++ b/.circleci/verbatim-sources/workflows/workflows-nightly-uploads-header.yml
@ -0,0 +1,8 @@
+      #- binary_linux_libtorch_3.6m_cpu_test:
+      #    requires:
+      #      - binary_linux_libtorch_3.6m_cpu_build
+      #- binary_linux_libtorch_3.6m_cu90_test:
+      #    requires:
+      #      - binary_linux_libtorch_3.6m_cu90_build
+
+      # Nightly uploads
--- a/.clang-tidy
+++ b/.clang-tidy
@ -52,13 +52,6 @@ modernize-*,
 -modernize-use-nodiscard,
 performance-*,
 readability-container-size-empty,
-readability-delete-null-pointer,
-readability-duplicate-include
-readability-misplaced-array-index,
-readability-redundant-function-ptr-dereference,
-readability-redundant-smartptr-get,
-readability-simplify-subscript-expr,
-readability-string-compare,
 '
 HeaderFilterRegex: '^(aten/|c10/|torch/).*$'
 AnalyzeTemporaryDtors: false
--- a/.flake8
+++ b/.flake8
@ -7,7 +7,9 @@ max-line-length = 120
 # C408 ignored because we like the dict keyword argument syntax
 # E501 is not flexible enough, we're using B950 instead
 ignore =
-    E203,E305,E402,E501,E721,E741,F405,F841,F999,W503,W504,C408,E302,W291,E303,
+    E203,E305,E402,E501,E721,E741,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303,
+    # fix these lints in the future
+    E275,
    # shebang has extra meaning in fbcode lints, so I think it's not worth trying
    # to line this up with executable bit
    EXE001,
@ -29,8 +31,6 @@ ignore =
    TOR102,
 per-file-ignores =
    __init__.py: F401
-    test/**: F821
-    test/**/__init__.py: F401,F821
    torch/utils/cpp_extension.py: B950
    torchgen/api/types/__init__.py: F401,F403
    torchgen/executorch/api/types/__init__.py: F401,F403
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@ -38,5 +38,3 @@ f70844bec783bfce43c950ccf180dc494e86f2bf
 e6ec0efaf87703c5f889cfc20b29be455885d58d
 # 2023-07-31 [optim][BE] split test file into logical parts: SWA, LR, optim
 a53cda1ddc15336dc1ff0ce1eff2a49cdc5f882e
-# 2024-01-02 clangformat: fused adam #116583
-9dc68d1aa9e554d09344a10fff69f7b50b2d23a0
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@ -3,7 +3,6 @@ self-hosted-runner:
    - linux.20_04.4x
    - linux.20_04.16x
    - linux.large
-    - linux.large.arc
    - linux.2xlarge
    - linux.4xlarge
    - linux.12xlarge
--- a/.github/actions/setup-xpu/action.yml
+++ b/.github/actions/setup-xpu/action.yml
@ -1,67 +0,0 @@
-name: Setup XPU host
-
-description: Set up XPU host for CI
-
-runs:
-  using: composite
-  steps:
-    - name: Clean all stopped docker containers
-      if: always()
-      shell: bash
-      run: |
-        # Prune all stopped containers.
-        # If other runner is pruning on this node, will skip.
-        nprune=$(ps -ef | grep -c "docker container prune")
-        if [[ $nprune -eq 1 ]]; then
-          docker container prune -f
-        fi
-
-    - name: Runner health check system info
-      if: always()
-      shell: bash
-      run: |
-        cat /etc/os-release || true
-        cat /etc/apt/sources.list.d/oneAPI.list || true
-        cat /etc/apt/sources.list.d/intel-gpu-jammy.list || true
-        whoami
-
-    - name: Runner health check xpu-smi
-      if: always()
-      shell: bash
-      run: |
-        xpu-smi discovery
-
-    - name: Runner health check GPU count
-      if: always()
-      shell: bash
-      run: |
-        ngpu=$(xpu-smi discovery | grep -c -E 'Device Name')
-        msg="Please file an issue on pytorch/pytorch reporting the faulty runner. Include a link to the runner logs so the runner can be identified"
-        if [[ $ngpu -eq 0 ]]; then
-          echo "Error: Failed to detect any GPUs on the runner"
-          echo "$msg"
-          exit 1
-        fi
-
-    - name: Runner diskspace health check
-      uses: ./.github/actions/diskspace-cleanup
-      if: always()
-
-    - name: Runner health check disconnect on failure
-      if: ${{ failure() }}
-      shell: bash
-      run: |
-        killall runsvc.sh
-
-    - name: Preserve github env variables for use in docker
-      shell: bash
-      run: |
-        env | grep '^GITHUB' >> "/tmp/github_env_${GITHUB_RUN_ID}"
-        env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"
-
-    - name: XPU set GPU_FLAG
-      shell: bash
-      run: |
-        # Add render group for container creation.
-        render_gid=`cat /etc/group | grep render | cut -d: -f3`
-        echo "GPU_FLAG=--device=/dev/mem --device=/dev/dri --group-add video --group-add $render_gid" >> "${GITHUB_ENV}"
--- a/.github/actions/teardown-xpu/action.yml
+++ b/.github/actions/teardown-xpu/action.yml
@ -1,20 +0,0 @@
-name: Teardown XPU host
-
-description: Tear down XPU host for CI
-
-runs:
-  using: composite
-  steps:
-    - name: Teardown XPU
-      if: always()
-      shell: bash
-      run: |
-        # Prune all stopped containers.
-        # If other runner is pruning on this node, will skip.
-        nprune=$(ps -ef | grep -c "docker container prune")
-        if [[ $nprune -eq 1 ]]; then
-          docker container prune -f
-        fi
-    - name: Runner diskspace health check
-      uses: ./.github/actions/diskspace-cleanup
-      if: always()
--- a/.github/auto_request_review.yml
+++ b/.github/auto_request_review.yml
@ -12,6 +12,7 @@ reviewers:
    symbolic-shapes:
      - symbolic-shapes
      - antoniojkim
+      - wconstab
      - SherlockNoMad
    Chillee:
      - ezyang
--- a/.github/ci_commit_pins/audio.txt
+++ b/.github/ci_commit_pins/audio.txt
@ -1 +1 @@
-e3efbc2d9094685dd2d4ae143853941f82f167af
+6518fa9b2c74e84d7eb1fc6e3eb51e43213f0c05
--- a/.github/ci_commit_pins/vision.txt
+++ b/.github/ci_commit_pins/vision.txt
@ -1 +1 @@
-d23430765b5df76cd1267f438f129f51b7d6e3e1
+c1e2095c3a16fbe7db25b9e2f206025488c2c203
--- a/.github/ci_commit_pins/xla.txt
+++ b/.github/ci_commit_pins/xla.txt
@ -1 +1 @@
-e1c94dfa5a74331a376537c23bf74a2c367f24bd
+r2.2
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@ -8,6 +8,10 @@
 - torch/_inductor/**
 - test/inductor/**

+"module: export":
+- torch/_export/**
+- test/export/**
+
 "ciflow/inductor":
 - torch/_decomp/**
 - torch/_dynamo/**
@ -19,9 +23,8 @@
 - torch/_subclasses/meta_utils.py
 - test/distributed/test_dynamo_distributed.py
 - test/distributed/test_inductor_collectives.py
- torch/_functorch/_aot_autograd/**
- torch/_functorch/aot_autograd.py
 - torch/_functorch/partitioners.py
+- torch/_functorch/aot_autograd.py
 - .ci/docker/ci_commit_pins/**
 - .github/ci_commit_pins/**
 - c10/core/Sym*
@ -69,13 +72,9 @@
 "ciflow/trunk":
 - .ci/docker/ci_commit_pins/triton.txt

-"oncall: distributed":
+"module: distributed":
 - torch/csrc/distributed/**
 - torch/distributed/**
 - torch/nn/parallel/**
 - test/distributed/**
 - torch/testing/_internal/distributed/**
-
-"module: distributed_checkpoint":
- torch/distributed/checkpoint/**
- test/distributed/checkpoint/**
--- a/.github/merge_rules.yaml
+++ b/.github/merge_rules.yaml
@ -285,7 +285,6 @@
  - yhcharles
  - kiukchung
  - d4l3k
-  - shuqiangzhang
  mandatory_checks_name:
  - EasyCLA
  - Lint
@ -352,22 +351,16 @@
  - Lint
  - pull

- name: CPU inductor
+- name: x86 CPU quantization
  patterns:
-  - torch/_inductor/fx_passes/mkldnn_fusion.py
-  - torch/_inductor/fx_passes/quantization.py
-  - torch/_inductor/codegen/cpp.py
-  - test/inductor/test_mkldnn_pattern_matcher.py
-  - test/inductor/test_cpu_repo.py
-  - test/inductor/test_cpu_cpp_wrapper.py
-  - aten/src/ATen/native/quantized/cpu/**
-  - test/quantization/core/test_quantized_op.py
  - torch/ao/quantization/quantizer/x86_inductor_quantizer.py
+  - torch/_inductor/fx_passes/quantization.py
+  - test/quantization/core/test_quantized_op.py
+  - test/inductor/test_mkldnn_pattern_matcher.py
  - test/quantization/pt2e/test_x86inductor_quantizer.py
  approved_by:
  - leslie-fang-intel
  - jgong5
-  - EikanWang
  mandatory_checks_name:
  - EasyCLA
  - Lint
--- a/.github/pytorch-probot.yml
+++ b/.github/pytorch-probot.yml
@ -14,7 +14,6 @@ ciflow_push_tags:
 - ciflow/slow
 - ciflow/trunk
 - ciflow/unstable
- ciflow/xpu
 retryable_workflows:
 - lint
 - pull
--- a/.github/scripts/build_triton_wheel.py
+++ b/.github/scripts/build_triton_wheel.py
@ -10,9 +10,6 @@ from typing import Optional
 SCRIPT_DIR = Path(__file__).parent
 REPO_DIR = SCRIPT_DIR.parent.parent

-# TODO: Remove me once Triton version is again in sync for vanilla and ROCm
-ROCM_TRITION_VERSION = "2.1.0"
-

 def read_triton_pin(rocm_hash: bool = False) -> str:
    triton_file = "triton.txt" if not rocm_hash else "triton-rocm.txt"
@ -32,37 +29,25 @@ def check_and_replace(inp: str, src: str, dst: str) -> str:
    return inp.replace(src, dst)


-def patch_setup_py(
-    path: Path,
-    *,
-    version: str,
-    name: str = "triton",
-    expected_version: Optional[str] = None,
-) -> None:
+def patch_setup_py(path: Path, *, version: str, name: str = "triton") -> None:
    with open(path) as f:
        orig = f.read()
    # Replace name
    orig = check_and_replace(orig, 'name="triton",', f'name="{name}",')
    # Replace version
-    if not expected_version:
-        expected_version = read_triton_version()
    orig = check_and_replace(
-        orig, f'version="{expected_version}",', f'version="{version}",'
+        orig, f'version="{read_triton_version()}",', f'version="{version}",'
    )
    with open(path, "w") as f:
        f.write(orig)


-def patch_init_py(
-    path: Path, *, version: str, expected_version: Optional[str] = None
-) -> None:
-    if not expected_version:
-        expected_version = read_triton_version()
+def patch_init_py(path: Path, *, version: str) -> None:
    with open(path) as f:
        orig = f.read()
    # Replace version
    orig = check_and_replace(
-        orig, f"__version__ = '{expected_version}'", f'__version__ = "{version}"'
+        orig, f"__version__ = '{read_triton_version()}'", f'__version__ = "{version}"'
    )
    with open(path, "w") as f:
        f.write(orig)
@ -99,7 +84,14 @@ def build_triton(
            triton_repo = "https://github.com/openai/triton"
            triton_pkg_name = "pytorch-triton"
        check_call(["git", "clone", triton_repo], cwd=tmpdir)
-        check_call(["git", "checkout", commit_hash], cwd=triton_basedir)
+        if release:
+            ver, rev, patch = version.split(".")
+            check_call(
+                ["git", "checkout", f"release/{ver}.{rev}.x"], cwd=triton_basedir
+            )
+        else:
+            check_call(["git", "checkout", commit_hash], cwd=triton_basedir)
+
        if build_conda:
            with open(triton_basedir / "meta.yaml", "w") as meta:
                print(
@ -145,7 +137,7 @@ def build_triton(
                cwd=triton_basedir,
                env=env,
            )
-            conda_path = next(iter(Path(tmpdir).glob("linux-64/torchtriton*.bz2")))
+            conda_path = list(Path(tmpdir).glob("linux-64/torchtriton*.bz2"))[0]
            shutil.copy(conda_path, Path.cwd())
            return Path.cwd() / conda_path.name

@ -155,7 +147,6 @@ def build_triton(
        patch_init_py(
            triton_pythondir / "triton" / "__init__.py",
            version=f"{version}",
-            expected_version=ROCM_TRITION_VERSION if build_rocm else None,
        )

        if build_rocm:
@ -164,7 +155,6 @@ def build_triton(
                triton_pythondir / "setup.py",
                name=triton_pkg_name,
                version=f"{version}",
-                expected_version=ROCM_TRITION_VERSION,
            )
            check_call("scripts/amd/setup_rocm_libs.sh", cwd=triton_basedir, shell=True)
            print("ROCm libraries setup for triton installation...")
@ -173,7 +163,7 @@ def build_triton(
            [sys.executable, "setup.py", "bdist_wheel"], cwd=triton_pythondir, env=env
        )

-        whl_path = next(iter((triton_pythondir / "dist").glob("*.whl")))
+        whl_path = list((triton_pythondir / "dist").glob("*.whl"))[0]
        shutil.copy(whl_path, Path.cwd())

        if build_rocm:
--- a/.github/scripts/drci_mocks.json.gz
+++ b/.github/scripts/drci_mocks.json.gz
--- a/.github/scripts/filter_test_configs.py
+++ b/.github/scripts/filter_test_configs.py
@ -62,9 +62,9 @@ SUPPORTED_PERIODICAL_MODES: Dict[str, Callable[[Optional[str]], bool]] = {
 }

 # The link to the published list of disabled jobs
-DISABLED_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/disabled-jobs.json"
+DISABLED_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/disabled-jobs.json?versionId=jbbJUxI_SSZFssBBGCU6ybH9sxHitHLY"
 # and unstable jobs
-UNSTABLE_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/unstable-jobs.json"
+UNSTABLE_JOBS_URL = "https://ossci-metrics.s3.amazonaws.com/unstable-jobs.json?versionId=hUtTalgnWb1m3AtJyVLUdu7DBrnddRkp"

 # Some constants used to handle disabled and unstable jobs
 JOB_NAME_SEP = "/"
--- a/.github/scripts/generate_binary_build_matrix.py
+++ b/.github/scripts/generate_binary_build_matrix.py
@ -93,9 +93,7 @@ def get_nccl_wheel_version(arch_version: str) -> str:
    requirements = map(
        str.strip, re.split("[;|]", PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version])
    )
-    return next(x for x in requirements if x.startswith("nvidia-nccl-cu")).split("==")[
-        1
-    ]
+    return [x for x in requirements if x.startswith("nvidia-nccl-cu")][0].split("==")[1]


 def validate_nccl_dep_consistency(arch_version: str) -> None:
--- a/.github/scripts/gitutils.py
+++ b/.github/scripts/gitutils.py
@ -145,16 +145,6 @@ class GitRepo:
        rc = self._run_git("rev-list", revision_range, "--", ".").strip()
        return rc.split("\n") if len(rc) > 0 else []

-    def branches_containing_ref(
-        self, ref: str, *, include_remote: bool = True
-    ) -> List[str]:
-        rc = (
-            self._run_git("branch", "--remote", "--contains", ref)
-            if include_remote
-            else self._run_git("branch", "--contains", ref)
-        )
-        return [x.strip() for x in rc.split("\n") if x.strip()] if len(rc) > 0 else []
-
    def current_branch(self) -> str:
        return self._run_git("symbolic-ref", "--short", "HEAD").strip()

@ -397,28 +387,13 @@ def _shasum(value: str) -> str:
    return m.hexdigest()


-def is_commit_hash(ref: str) -> bool:
-    "True if ref is hexadecimal number, else false"
-    try:
-        int(ref, 16)
-    except ValueError:
-        return False
-    return True
-
-
-def are_ghstack_branches_in_sync(
-    repo: GitRepo, head_ref: str, base_ref: Optional[str] = None
-) -> bool:
+def are_ghstack_branches_in_sync(repo: GitRepo, head_ref: str) -> bool:
    """Checks that diff between base and head is the same as diff between orig and its parent"""
    orig_ref = re.sub(r"/head$", "/orig", head_ref)
-    if base_ref is None:
-        base_ref = re.sub(r"/head$", "/base", head_ref)
+    base_ref = re.sub(r"/head$", "/base", head_ref)
    orig_diff_sha = _shasum(repo.diff(f"{repo.remote}/{orig_ref}"))
    head_diff_sha = _shasum(
-        repo.diff(
-            base_ref if is_commit_hash(base_ref) else f"{repo.remote}/{base_ref}",
-            f"{repo.remote}/{head_ref}",
-        )
+        repo.diff(f"{repo.remote}/{base_ref}", f"{repo.remote}/{head_ref}")
    )
    return orig_diff_sha == head_diff_sha

--- a/.github/scripts/gql_mocks.json.gz
+++ b/.github/scripts/gql_mocks.json.gz
--- a/.github/scripts/label_utils.py
+++ b/.github/scripts/label_utils.py
@ -44,10 +44,6 @@ def get_last_page_num_from_header(header: Any) -> int:
    # Link info looks like: <https://api.github.com/repositories/65600975/labels?per_page=100&page=2>;
    # rel="next", <https://api.github.com/repositories/65600975/labels?per_page=100&page=3>; rel="last"
    link_info = header["link"]
-    # Docs does not specify that it should be present for projects with just few labels
-    # And https://github.com/malfet/deleteme/actions/runs/7334565243/job/19971396887 it's not the case
-    if link_info is None:
-        return 1
    prefix = "&page="
    suffix = ">;"
    return int(
--- a/.github/scripts/test_trymerge.py
+++ b/.github/scripts/test_trymerge.py
@ -32,6 +32,7 @@ from trymerge import (
    main as trymerge_main,
    MandatoryChecksMissingError,
    MergeRule,
+    PostCommentError,
    RE_GHSTACK_DESC,
    read_merge_rules,
    remove_job_name_suffix,
@ -221,31 +222,6 @@ def mocked_read_merge_rules(repo: Any, org: str, project: str) -> List[MergeRule
    ]


-def mocked_read_merge_rules_approvers(
-    repo: Any, org: str, project: str
-) -> List[MergeRule]:
-    return [
-        MergeRule(
-            name="Core Reviewers",
-            patterns=["*"],
-            approved_by=["1", "2", "3", "4", "5", "6"],
-            mandatory_checks_name=[
-                "Lint",
-                "pull",
-            ],
-        ),
-        MergeRule(
-            name="Core Maintainers",
-            patterns=["*"],
-            approved_by=["1", "2", "malfet"],
-            mandatory_checks_name=[
-                "Lint",
-                "pull",
-            ],
-        ),
-    ]
-
-
 def mocked_read_merge_rules_raise(repo: Any, org: str, project: str) -> List[MergeRule]:
    raise RuntimeError("testing")

@ -311,27 +287,6 @@ class TestTryMerge(TestCase):
            RuntimeError, "testing", lambda: find_matching_merge_rule(pr, repo)
        )

-    @mock.patch(
-        "trymerge.read_merge_rules", side_effect=mocked_read_merge_rules_approvers
-    )
-    def test_match_rules_approvers(self, *args: Any) -> None:
-        "Tests that PR has the necessary approvers"
-        repo = DummyGitRepo()
-
-        pr = GitHubPR("pytorch", "pytorch", 115329)
-        # Test that all potential approvers across all rules are listed if the
-        # PR doesn't have one of them
-        for mock_rule in ["Core Reviewers", "Core Maintainers"]:
-            self.assertRaisesRegex(
-                RuntimeError,
-                mock_rule,
-                lambda: find_matching_merge_rule(pr, repo),
-            )
-
-        pr = GitHubPR("pytorch", "pytorch", 115495)
-        # Test that PR with the correct approvers doesn't raise any exception
-        self.assertTrue(find_matching_merge_rule(pr, repo) is not None)
-
    @mock.patch("trymerge.read_merge_rules", side_effect=mocked_read_merge_rules)
    def test_lint_fails(self, *args: Any) -> None:
        "Tests that PR fails mandatory lint check"
@ -515,6 +470,20 @@ class TestTryMerge(TestCase):

        self.assertEqual(len(changed_files), pr.get_changed_files_count())

+    def test_revert_codev_fails(self, *args: Any) -> None:
+        pr = GitHubPR("pytorch", "pytorch", 91340)
+
+        class GitRepoCoDev(DummyGitRepo):
+            def commit_message(self, ref: str) -> str:
+                return pr.get_body()
+
+        repo = GitRepoCoDev()
+        self.assertRaisesRegex(
+            PostCommentError,
+            "landed via phabricator",
+            lambda: validate_revert(repo, pr, comment_id=1372496233),
+        )
+
    def test_revert_codev_abandoned_diff_succeeds(self, *args: Any) -> None:
        pr = GitHubPR("pytorch", "pytorch", 100652)

--- a/.github/scripts/trymerge.py
+++ b/.github/scripts/trymerge.py
@ -20,18 +20,7 @@ from collections import defaultdict
 from dataclasses import dataclass
 from functools import lru_cache
 from pathlib import Path
-from typing import (
-    Any,
-    Callable,
-    cast,
-    Dict,
-    Iterable,
-    List,
-    NamedTuple,
-    Optional,
-    Pattern,
-    Tuple,
-)
+from typing import Any, Callable, cast, Dict, List, NamedTuple, Optional, Pattern, Tuple
 from warnings import warn

 import yaml
@ -623,14 +612,19 @@ def can_skip_internal_checks(pr: "GitHubPR", comment_id: Optional[int] = None) -
    return comment.author_login == "facebook-github-bot"


-def _revlist_to_prs(
-    repo: GitRepo,
-    pr: "GitHubPR",
-    rev_list: Iterable[str],
-    should_skip: Optional[Callable[[int, "GitHubPR"], bool]] = None,
+def get_ghstack_prs(
+    repo: GitRepo, pr: "GitHubPR", open_only: bool = True
 ) -> List[Tuple["GitHubPR", str]]:
-    rc: List[Tuple[GitHubPR, str]] = []
-    for idx, rev in enumerate(rev_list):
+    """
+    Get the PRs in the stack that are below this PR (inclusive).  Throws error if any of the open PRs are out of sync.
+    @:param open_only: Only return open PRs
+    """
+    assert pr.is_ghstack_pr()
+    entire_stack: List[Tuple[GitHubPR, str]] = []
+    # For ghstack, cherry-pick commits based from origin
+    orig_ref = f"{repo.remote}/{re.sub(r'/head$', '/orig', pr.head_ref())}"
+    rev_list = repo.revlist(f"{pr.default_branch()}..{orig_ref}")
+    for idx, rev in enumerate(reversed(rev_list)):
        msg = repo.commit_message(rev)
        m = RE_PULL_REQUEST_RESOLVED.search(msg)
        if m is None:
@ -641,48 +635,25 @@ def _revlist_to_prs(
            raise RuntimeError(
                f"PR {m.group('number')} resolved to wrong owner/repo pair"
            )
-        pr_num = int(m.group("number"))
-        candidate = GitHubPR(pr.org, pr.project, pr_num) if pr_num != pr.pr_num else pr
-        if should_skip is not None and should_skip(idx, candidate):
-            continue
-        rc.append((candidate, rev))
-    return rc
-
-
-def get_ghstack_prs(
-    repo: GitRepo, pr: "GitHubPR", open_only: bool = True
-) -> List[Tuple["GitHubPR", str]]:
-    """
-    Get the PRs in the stack that are below this PR (inclusive).  Throws error if any of the open PRs are out of sync.
-    @:param open_only: Only return open PRs
-    """
-    # For ghstack, cherry-pick commits based from origin
-    orig_ref = f"{repo.remote}/{pr.get_ghstack_orig_ref()}"
-    rev_list = repo.revlist(f"{pr.default_branch()}..{orig_ref}")
-
-    def skip_func(idx: int, candidate: "GitHubPR") -> bool:
-        if not open_only or not candidate.is_closed():
-            return False
-        print(
-            f"Skipping {idx+1} of {len(rev_list)} PR (#{candidate.pr_num}) as its already been merged"
-        )
-        return True
-
-    assert pr.is_ghstack_pr()
-    entire_stack = _revlist_to_prs(repo, pr, reversed(rev_list), skip_func)
+        stacked_pr_num = int(m.group("number"))
+        if stacked_pr_num != pr.pr_num:
+            stacked_pr = GitHubPR(pr.org, pr.project, stacked_pr_num)
+            if open_only and stacked_pr.is_closed():
+                print(
+                    f"Skipping {idx+1} of {len(rev_list)} PR (#{stacked_pr_num}) as its already been merged"
+                )
+                continue
+            entire_stack.append((stacked_pr, rev))
+        else:
+            entire_stack.append((pr, rev))

    for stacked_pr, rev in entire_stack:
        if stacked_pr.is_closed():
            continue
-        base_ref = stacked_pr.base_ref()
-        if base_ref == pr.default_branch():
-            base_ref = repo.get_merge_base(
-                f"{repo.remote}/{base_ref}", f"{repo.remote}/{stacked_pr.head_ref()}"
-            )
-        if not are_ghstack_branches_in_sync(repo, stacked_pr.head_ref(), base_ref):
+        if not are_ghstack_branches_in_sync(repo, stacked_pr.head_ref()):
            raise RuntimeError(
                f"PR {stacked_pr.pr_num} is out of sync with the corresponding revision {rev} on "
-                + f"branch {stacked_pr.get_ghstack_orig_ref()} that would be merged into {stacked_pr.default_branch()}.  "
+                + f"branch {orig_ref} that would be merged into main.  "
                + "This usually happens because there is a non ghstack change in the PR.  "
                + f"Please sync them and try again (ex. make the changes on {orig_ref} and run ghstack)."
            )
@ -723,10 +694,6 @@ class GitHubPR:
    def is_ghstack_pr(self) -> bool:
        return RE_GHSTACK_HEAD_REF.match(self.head_ref()) is not None

-    def get_ghstack_orig_ref(self) -> str:
-        assert self.is_ghstack_pr()
-        return re.sub(r"/head$", "/orig", self.head_ref())
-
    def is_base_repo_private(self) -> bool:
        return bool(self.info["baseRepository"]["isPrivate"])

@ -1321,9 +1288,6 @@ def find_matching_merge_rule(
        ignore_current_checks=ignore_current_checks,
    )

-    # This keeps the list of all approvers that could stamp the change
-    all_rule_approvers = {}
-
    # PRs can fail multiple merge rules, but it only needs to pass one rule to be approved.
    # If it fails all rules, we need to find the rule that it came closest to passing and report
    # that to the dev.
@ -1367,31 +1331,24 @@ def find_matching_merge_rule(
            continue

        # Does the PR have the required approvals for this rule?
-        rule_approvers = set()
+        rule_approvers_set = set()
        for approver in rule.approved_by:
            if "/" in approver:
                org, name = approver.split("/")
-                rule_approvers.update(gh_get_team_members(org, name))
+                rule_approvers_set.update(gh_get_team_members(org, name))
            else:
-                rule_approvers.add(approver)
-        approvers_intersection = approved_by.intersection(rule_approvers)
+                rule_approvers_set.add(approver)
+        approvers_intersection = approved_by.intersection(rule_approvers_set)
        # If rule requires approvers but they aren't the ones that reviewed PR
-        if len(approvers_intersection) == 0 and len(rule_approvers) > 0:
-            # Less than or equal is intentionally used here to gather all potential
-            # approvers
-            if reject_reason_score <= 10000:
+        if len(approvers_intersection) == 0 and len(rule_approvers_set) > 0:
+            if reject_reason_score < 10000:
                reject_reason_score = 10000
-
-                all_rule_approvers[rule.name] = rule.approved_by
-                # Prepare the reject reason
-                all_rule_approvers_msg = [
-                    f"- {name} ({', '.join(approved_by[:5])}{', ...' if len(approved_by) > 5 else ''})"
-                    for name, approved_by in all_rule_approvers.items()
-                ]
-
-                reject_reason = "Approvers from one of the following sets are needed:\n"
-                reject_reason += "\n".join(all_rule_approvers_msg)
-
+                reject_reason = "\n".join(
+                    (
+                        "Approval needed from one of the following:",
+                        f"{', '.join(list(rule_approvers_set)[:5])}{', ...' if len(rule_approvers_set) > 5 else ''}",
+                    )
+                )
            continue

        # Does the PR pass the checks required by this rule?
@ -1765,16 +1722,6 @@ def filter_checks_with_lambda(
    return [check for check in checks.values() if status_filter(check.status)]


-def get_pr_commit_sha(repo: GitRepo, pr: GitHubPR) -> str:
-    commit_sha = pr.get_merge_commit()
-    if commit_sha is not None:
-        return commit_sha
-    commits = repo.commits_resolving_gh_pr(pr.pr_num)
-    if len(commits) == 0:
-        raise PostCommentError("Can't find any commits resolving PR")
-    return commits[0]
-
-
 def validate_revert(
    repo: GitRepo, pr: GitHubPR, *, comment_id: Optional[int] = None
 ) -> Tuple[str, str]:
@ -1796,98 +1743,32 @@ def validate_revert(
            f"Will not revert as @{author_login} is not one of "
            f"[{', '.join(allowed_reverters)}], but instead is {author_association}."
        )
+    skip_internal_checks = can_skip_internal_checks(pr, comment_id)
+
+    # Ignore associated diff it PR does not have internal changes
+    if pr.has_no_connected_diff():
+        skip_internal_checks = True

    # Raises exception if matching rule is not found, but ignores all status checks
    find_matching_merge_rule(
-        pr, repo, skip_mandatory_checks=True, skip_internal_checks=True
+        pr, repo, skip_mandatory_checks=True, skip_internal_checks=skip_internal_checks
    )
-    commit_sha = get_pr_commit_sha(repo, pr)
+    commit_sha = pr.get_merge_commit()
+    if commit_sha is None:
+        commits = repo.commits_resolving_gh_pr(pr.pr_num)
+        if len(commits) == 0:
+            raise PostCommentError("Can't find any commits resolving PR")
+        commit_sha = commits[0]
+    msg = repo.commit_message(commit_sha)
+    rc = RE_DIFF_REV.search(msg)
+    if rc is not None and not skip_internal_checks:
+        raise PostCommentError(
+            f"Can't revert PR that was landed via phabricator as {rc.group(1)}.  "
+            + "Please revert by going to the internal diff and clicking Unland."
+        )
    return (author_login, commit_sha)


-def get_ghstack_dependent_prs(
-    repo: GitRepo, pr: GitHubPR, only_closed: bool = True
-) -> List[Tuple[str, GitHubPR]]:
-    """
-    Get the PRs in the stack that are above this PR (inclusive).
-    Throws error if stack have branched or original branches are gone
-    """
-    assert pr.is_ghstack_pr()
-    orig_ref = f"{repo.remote}/{pr.get_ghstack_orig_ref()}"
-    rev_list = repo.revlist(f"{pr.default_branch()}..{orig_ref}")
-    if len(rev_list) == 0:
-        raise RuntimeError(
-            f"PR {pr.pr_num} does not have any revisions associated with it"
-        )
-    skip_len = len(rev_list) - 1
-    for branch in repo.branches_containing_ref(orig_ref):
-        candidate = repo.revlist(f"{pr.default_branch()}..{branch}")
-        # Pick longest candidate
-        if len(candidate) > len(rev_list):
-            candidate, rev_list = rev_list, candidate
-        # Validate that candidate always ends rev-list
-        if rev_list[-len(candidate) :] != candidate:
-            raise RuntimeError(
-                f"Branch {branch} revlist {', '.join(candidate)} is not a subset of {', '.join(rev_list)}"
-            )
-    # Remove commits original PR depends on
-    if skip_len > 0:
-        rev_list = rev_list[:-skip_len]
-    rc: List[Tuple[str, GitHubPR]] = []
-    for pr_, sha in _revlist_to_prs(repo, pr, rev_list):
-        if not pr_.is_closed():
-            if not only_closed:
-                rc.append(("", pr_))
-            continue
-        commit_sha = get_pr_commit_sha(repo, pr_)
-        rc.append((commit_sha, pr_))
-    return rc
-
-
-def do_revert_prs(
-    repo: GitRepo,
-    shas_and_prs: List[Tuple[str, GitHubPR]],
-    *,
-    author_login: str,
-    extra_msg: str = "",
-    skip_internal_checks: bool = False,
-    dry_run: bool = False,
-) -> None:
-    # Prepare and push revert commits
-    commit_shas: List[str] = []
-    for commit_sha, pr in shas_and_prs:
-        revert_msg = f"\nReverted {pr.get_pr_url()} on behalf of {prefix_with_github_url(author_login)}"
-        revert_msg += extra_msg
-        repo.checkout(pr.default_branch())
-        repo.revert(commit_sha)
-        msg = repo.commit_message("HEAD")
-        msg = re.sub(RE_PULL_REQUEST_RESOLVED, "", msg)
-        msg += revert_msg
-        repo.amend_commit_message(msg)
-    repo.push(shas_and_prs[0][1].default_branch(), dry_run)
-
-    # Comment/reopen PRs
-    for commit_sha, pr in shas_and_prs:
-        revert_message = (
-            f"@{pr.get_pr_creator_login()} your PR has been successfully reverted."
-        )
-        if (
-            pr.has_internal_changes()
-            and not pr.has_no_connected_diff()
-            and not skip_internal_checks
-        ):
-            revert_message += "\n:warning: This PR might contain internal changes"
-            revert_message += "\ncc: @pytorch/pytorch-dev-infra"
-        gh_post_pr_comment(
-            pr.org, pr.project, pr.pr_num, revert_message, dry_run=dry_run
-        )
-
-        if not dry_run:
-            pr.add_numbered_label("reverted")
-            gh_post_commit_comment(pr.org, pr.project, commit_sha, revert_msg)
-            gh_update_pr_state(pr.org, pr.project, pr.pr_num)
-
-
 def try_revert(
    repo: GitRepo,
    pr: GitHubPR,
@ -1896,37 +1777,34 @@ def try_revert(
    comment_id: Optional[int] = None,
    reason: Optional[str] = None,
 ) -> None:
+    def post_comment(msg: str) -> None:
+        gh_post_pr_comment(pr.org, pr.project, pr.pr_num, msg, dry_run=dry_run)
+
    try:
        author_login, commit_sha = validate_revert(repo, pr, comment_id=comment_id)
    except PostCommentError as e:
-        gh_post_pr_comment(pr.org, pr.project, pr.pr_num, str(e), dry_run=dry_run)
-        return
-
-    extra_msg = f" due to {reason}" if reason is not None else ""
-    extra_msg += (
+        return post_comment(str(e))
+    revert_msg = f"\nReverted {pr.get_pr_url()} on behalf of {prefix_with_github_url(author_login)}"
+    revert_msg += f" due to {reason}" if reason is not None else ""
+    revert_msg += (
        f" ([comment]({pr.get_comment_by_id(comment_id).url}))\n"
        if comment_id is not None
        else "\n"
    )
-    shas_and_prs = [(commit_sha, pr)]
-    if pr.is_ghstack_pr():
-        try:
-            shas_and_prs = get_ghstack_dependent_prs(repo, pr)
-            prs_to_revert = " ".join([t[1].get_pr_url() for t in shas_and_prs])
-            print(f"About to stack of PRs: {prs_to_revert}")
-        except Exception as e:
-            print(
-                f"Failed to fetch dependent PRs: {str(e)}, fall over to single revert"
-            )
-
-    do_revert_prs(
-        repo,
-        shas_and_prs,
-        author_login=author_login,
-        extra_msg=extra_msg,
-        dry_run=dry_run,
-        skip_internal_checks=can_skip_internal_checks(pr, comment_id),
+    repo.checkout(pr.default_branch())
+    repo.revert(commit_sha)
+    msg = repo.commit_message("HEAD")
+    msg = re.sub(RE_PULL_REQUEST_RESOLVED, "", msg)
+    msg += revert_msg
+    repo.amend_commit_message(msg)
+    repo.push(pr.default_branch(), dry_run)
+    post_comment(
+        f"@{pr.get_pr_creator_login()} your PR has been successfully reverted."
    )
+    if not dry_run:
+        pr.add_numbered_label("reverted")
+        gh_post_commit_comment(pr.org, pr.project, commit_sha, revert_msg)
+        gh_update_pr_state(pr.org, pr.project, pr.pr_num)


 def prefix_with_github_url(suffix_str: str) -> str:
--- a/.github/templates/common.yml.j2
+++ b/.github/templates/common.yml.j2
@ -8,7 +8,7 @@
 # NOTE: If testing pytorch/builder changes you can change this variable to change what pytorch/builder reference
 #       the binary builds will check out
 {%- set builder_repo = "pytorch/builder" -%}
-{%- set builder_branch = "main" -%}
+{%- set builder_branch = "release/2.2" -%}

 {%- macro concurrency(build_environment) -%}
 concurrency:
@ -36,7 +36,7 @@ concurrency:
 {%- macro setup_ec2_windows() -%}
      !{{ display_ec2_information() }}
      - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.2
        continue-on-error: true
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/templates/linux_binary_build_workflow.yml.j2
+++ b/.github/templates/linux_binary_build_workflow.yml.j2
@ -7,6 +7,7 @@
 name: !{{ build_environment }}
 {%- endblock %}

+
 on:
  push:
    {%- if branches == "nightly" %}
@ -99,13 +100,13 @@ jobs:
        with:
          name: !{{ config["build_name"] }}
          path: "${{ runner.temp }}/artifacts/"
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
-      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch) }}
+      !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
+      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch, checkout_pr_head=False) }}
      - name: ROCm set GPU_FLAG
        run: |
          echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
      - name: Pull Docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.2
        with:
          docker-image: !{{ config["container_image"] }}
      - name: Test Pytorch binary
--- a/.github/templates/macos_binary_build_workflow.yml.j2
+++ b/.github/templates/macos_binary_build_workflow.yml.j2
@ -81,8 +81,8 @@ jobs:
          elif [ -d "/Applications/Xcode_13.3.1.app" ]; then
            echo "DEVELOPER_DIR=/Applications/Xcode_13.3.1.app/Contents/Developer" >> "${GITHUB_ENV}"
          fi
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
-      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch) }}
+      !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
+      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch, checkout_pr_head=False) }}
      - name: Install sccache (only for non-forked PRs, and pushes to trunk)
        uses: nick-fields/retry@v2.8.2
        if: ${{ github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }}
--- a/.github/templates/upload.yml.j2
+++ b/.github/templates/upload.yml.j2
@ -53,6 +53,9 @@
 {%- macro upload_binaries(config, is_windows=False, has_test=True, use_s3=True) -%}
 !{{ config["build_name"] }}-upload:  # Uploading
    if: ${{ github.repository_owner == 'pytorch' }}
+    permissions:
+      id-token: write
+      contents: read
 {%- if has_test %}
    needs: !{{ config["build_name"] }}-test
 {%- else %}
@ -65,8 +68,6 @@
      {%- endif %}
    secrets:
      github-token: ${{ secrets.GITHUB_TOKEN }}
-      aws-pytorch-uploader-access-key-id: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }}
-      aws-pytorch-uploader-secret-access-key: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }}
      conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
      conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
    uses: ./.github/workflows/_binary-upload.yml
--- a/.github/templates/windows_binary_build_workflow.yml.j2
+++ b/.github/templates/windows_binary_build_workflow.yml.j2
@ -65,8 +65,8 @@ jobs:
    steps:
      !{{ common.setup_ec2_windows() }}
      !{{ set_runner_specific_vars() }}
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
-      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch) }}
+      !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
+      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch, checkout_pr_head=False) }}
      - name: Populate binary env
        shell: bash
        run: |
@ -105,8 +105,8 @@ jobs:
        with:
          name: !{{ config["build_name"] }}
          path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
-      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch) }}
+      !{{ common.checkout(deep_clone=False, directory="pytorch", checkout_pr_head=False) }}
+      !{{ common.checkout(deep_clone=False, directory="builder", repository=common.builder_repo, branch=common.builder_branch, checkout_pr_head=False) }}
      - name: Populate binary env
        shell: bash
        run: |
--- a/.github/workflows/_android-build-test.yml
+++ b/.github/workflows/_android-build-test.yml
@ -29,7 +29,6 @@ env:

 jobs:
  filter:
-    if: github.repository_owner == 'pytorch'
    runs-on: [self-hosted, linux.large]
    outputs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
@ -37,7 +36,7 @@ jobs:
      keep-going: ${{ steps.filter.outputs.keep-going }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
        with:
          fetch-depth: 1
          submodules: false
@ -59,25 +58,25 @@ jobs:
    runs-on: ${{ matrix.runner }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.2
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2

      - name: Setup Linux
        uses: ./.github/actions/setup-linux

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.2
        with:
          docker-image-name: ${{ inputs.docker-image-name }}

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.2
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

@ -141,5 +140,5 @@ jobs:
        if: always()

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.2
        if: always()
--- a/.github/workflows/_android-full-build-test.yml
+++ b/.github/workflows/_android-full-build-test.yml
@ -29,7 +29,6 @@ env:

 jobs:
  filter:
-    if: github.repository_owner == 'pytorch'
    runs-on: [self-hosted, linux.large]
    outputs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
@ -37,7 +36,7 @@ jobs:
      keep-going: ${{ steps.filter.outputs.keep-going }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
        with:
          fetch-depth: 1
          submodules: false
@ -59,25 +58,25 @@ jobs:
    runs-on: ${{ matrix.runner }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.2
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2

      - name: Setup Linux
        uses: ./.github/actions/setup-linux

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.2
        with:
          docker-image-name: ${{ inputs.docker-image-name }}

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.2
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

@ -186,5 +185,5 @@ jobs:
        if: always()

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.2
        if: always()
--- a/.github/workflows/_bazel-build-test.yml
+++ b/.github/workflows/_bazel-build-test.yml
@ -33,7 +33,6 @@ env:

 jobs:
  filter:
-    if: github.repository_owner == 'pytorch'
    runs-on: [self-hosted, linux.large]
    outputs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
@ -42,7 +41,7 @@ jobs:
      reenabled-issues: ${{ steps.filter.outputs.reenabled-issues }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
        with:
          fetch-depth: 1
          submodules: false
@ -64,30 +63,30 @@ jobs:
    runs-on: ${{ matrix.runner }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.2
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2

      - name: Setup Linux
        uses: ./.github/actions/setup-linux

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.2
        with:
          docker-image-name: ${{ inputs.docker-image-name }}

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.2
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
-        uses: pytorch/test-infra/.github/actions/setup-nvidia@main
+        uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.2
        if: ${{ inputs.cuda-version != 'cpu' }}

      - name: Output disk space left
@ -122,6 +121,8 @@ jobs:
          GITHUB_RUN_NUMBER: ${{ github.run_number }}
          GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
          JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
+          PYTORCH_RETRY_TEST_CASES: 1
+          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
          REENABLED_ISSUES: ${{ needs.filter.outputs.reenabled-issues }}
          # TODO duplicated
          AWS_DEFAULT_REGION: us-east-1
@ -158,6 +159,8 @@ jobs:
            -e TORCH_CUDA_ARCH_LIST \
            -e OUR_GITHUB_JOB_ID \
            -e CUDA_VERSION \
+            -e PYTORCH_RETRY_TEST_CASES \
+            -e PYTORCH_OVERRIDE_FLAKY_SIGNAL \
            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
            --security-opt seccomp=unconfined \
            --cap-add=SYS_PTRACE \
@ -196,5 +199,5 @@ jobs:
          file-suffix: bazel-${{ github.job }}_${{ steps.get-job-id.outputs.job-id }}

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.2
        if: always()
--- a/.github/workflows/_binary-build-linux.yml
+++ b/.github/workflows/_binary-build-linux.yml
@ -139,13 +139,13 @@ jobs:
        run: env

      - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.2
        continue-on-error: true
        with:
          github-secret: ${{ secrets.github-token }}

      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
        with:
          no-sudo: ${{ inputs.build_environment == 'linux-aarch64-binary-manywheel' }}

@ -173,7 +173,6 @@ jobs:
      - name: Checkout PyTorch to pytorch dir
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch
          quiet-checkout: true
@ -187,7 +186,7 @@ jobs:
      - name: Checkout pytorch/builder to builder dir
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.2
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -213,7 +212,7 @@ jobs:

      - name: Pull Docker image
        if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' }}
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.2
        with:
          docker-image: ${{ inputs.DOCKER_IMAGE }}

@ -270,7 +269,7 @@ jobs:

      - name: Teardown Linux
        if: always()
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.2

      - name: Chown workspace
        if: always()
--- a/.github/workflows/_binary-test-linux.yml
+++ b/.github/workflows/_binary-test-linux.yml
@ -127,14 +127,14 @@ jobs:
          } >> "${GITHUB_ENV} }}"

      - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.2
        continue-on-error: true
        with:
          github-secret: ${{ secrets.github-token }}

        # Setup the environment
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
        with:
          no-sudo: ${{ inputs.build_environment == 'linux-aarch64-binary-manywheel' }}

@ -155,7 +155,6 @@ jobs:
      - name: Checkout PyTorch to pytorch dir
        uses: malfet/checkout@silent-checkout
        with:
-          ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
          submodules: recursive
          path: pytorch

@ -168,7 +167,7 @@ jobs:
      - name: Checkout pytorch/builder to builder dir
        uses: malfet/checkout@silent-checkout
        with:
-          ref: main
+          ref: release/2.2
          submodules: recursive
          repository: pytorch/builder
          path: builder
@ -199,12 +198,12 @@ jobs:
          path: "${{ runner.temp }}/artifacts/"

      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
-        uses: pytorch/test-infra/.github/actions/setup-nvidia@main
+        uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.2
        if: ${{ inputs.GPU_ARCH_TYPE == 'cuda' && steps.filter.outputs.is-test-matrix-empty == 'False' }}

      - name: Pull Docker image
        if: ${{ steps.filter.outputs.is-test-matrix-empty == 'False' }}
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.2
        with:
          docker-image: ${{ inputs.DOCKER_IMAGE }}

@ -214,7 +213,7 @@ jobs:

      - name: Teardown Linux
        if: always()
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.2

      - name: Chown workspace
        if: always()
--- a/.github/workflows/_binary-upload.yml
+++ b/.github/workflows/_binary-upload.yml
@ -59,18 +59,13 @@ on:
      github-token:
        required: true
        description: Github Token
-      aws-pytorch-uploader-access-key-id:
-        required: true
-        description: AWS access key id
-      aws-pytorch-uploader-secret-access-key:
-        required: true
-        description: AWS secret access key
      conda-pytorchbot-token:
        required: true
        description: Conda PyTorchBot token
      conda-pytorchbot-token-test:
        required: true
        description: Conda PyTorchBot token
+
 jobs:
  upload:
    runs-on: ubuntu-22.04
@ -100,10 +95,24 @@ jobs:
      SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
        with:
          no-sudo: true

+      - name: Configure AWS credentials(PyTorch account) for nightly
+        if: ${{ github.event_name == 'push' && github.event.ref == 'refs/heads/nightly' }}
+        uses: aws-actions/configure-aws-credentials@v3
+        with:
+          role-to-assume: arn:aws:iam::749337293305:role/gha_workflow_nightly_build_wheels
+          aws-region: us-east-1
+
+      - name: Configure AWS credentials(PyTorch account) for RC builds
+        if: ${{ github.event_name == 'push' &&  (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/')) }}
+        uses: aws-actions/configure-aws-credentials@v3
+        with:
+          role-to-assume: arn:aws:iam::749337293305:role/gha_workflow_test_build_wheels
+          aws-region: us-east-1
+
      - name: Download Build Artifacts
        id: download-artifacts
        # NB: When the previous build job is skipped, there won't be any artifacts and
@ -135,8 +144,6 @@ jobs:
          PKG_DIR: "${{ runner.temp }}/artifacts"
          UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
          # When running these on pull_request events these should be blank
-          AWS_ACCESS_KEY_ID: ${{ secrets.aws-pytorch-uploader-access-key-id }}
-          AWS_SECRET_ACCESS_KEY: ${{ secrets.aws-pytorch-uploader-secret-access-key }}
          CONDA_PYTORCHBOT_TOKEN: ${{ secrets.conda-pytorchbot-token }}
          CONDA_PYTORCHBOT_TOKEN_TEST: ${{ secrets.conda-pytorchbot-token-test }}
          BUILD_NAME: ${{ inputs.build_name }}
--- a/.github/workflows/_buck-build-test.yml
+++ b/.github/workflows/_buck-build-test.yml
@ -15,7 +15,6 @@ defaults:

 jobs:
  filter:
-    if: github.repository_owner == 'pytorch'
    runs-on: [self-hosted, linux.large]
    outputs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
@ -23,7 +22,7 @@ jobs:
      keep-going: ${{ steps.filter.outputs.keep-going }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
        with:
          fetch-depth: 1
          submodules: false
@ -44,7 +43,7 @@ jobs:
    runs-on: ${{ matrix.runner }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2

      - name: Set up JDK 8
        uses: actions/setup-java@v3
@ -53,7 +52,7 @@ jobs:
          distribution: 'temurin'

      - name: Setup miniconda
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.2
        with:
          python-version: 3.8
          environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}
--- a/.github/workflows/_docs.yml
+++ b/.github/workflows/_docs.yml
@ -66,7 +66,7 @@ jobs:
    name: build-docs-${{ matrix.docs_type }}-${{ inputs.push }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.2
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}
          instructions: |
@ -77,19 +77,19 @@ jobs:

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2

      - name: Setup Linux
        uses: ./.github/actions/setup-linux

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.2
        with:
          docker-image-name: ${{ inputs.docker-image }}

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.2
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

@ -187,5 +187,5 @@ jobs:
          s3-prefix: pytorch/pytorch/${{ github.event.pull_request.number }}/functorchdocs

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.2
        if: always()
--- a/.github/workflows/_ios-build-test.yml
+++ b/.github/workflows/_ios-build-test.yml
@ -38,7 +38,6 @@ env:

 jobs:
  filter:
-    if: github.repository_owner == 'pytorch'
    runs-on: [self-hosted, linux.large]
    outputs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
@ -46,7 +45,7 @@ jobs:
      keep-going: ${{ steps.filter.outputs.keep-going }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
        with:
          fetch-depth: 1
          submodules: false
@ -80,7 +79,7 @@ jobs:
    steps:
      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2

      - name: Populate CI build options
        shell: bash
@ -102,7 +101,7 @@ jobs:
            brew install libtool

      - name: Setup miniconda for iOS
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.2
        with:
          python-version: "3.9"
          environment-file: .github/requirements/conda-env-iOS.txt
--- a/.github/workflows/_linux-build.yml
+++ b/.github/workflows/_linux-build.yml
@ -73,7 +73,7 @@ jobs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.2
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}

@ -82,14 +82,14 @@ jobs:
      # checkout because when we run this action we don't *have* a local
      # checkout. In other cases you should prefer a local checkout.
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2

      - name: Setup Linux
        uses: ./.github/actions/setup-linux

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.2
        with:
          docker-image-name: ${{ inputs.docker-image-name }}

@ -103,7 +103,7 @@ jobs:
          echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}"

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.2
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

@ -209,5 +209,5 @@ jobs:
          path: sccache-stats-*.json

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.2
        if: always()
--- a/.github/workflows/_linux-test.yml
+++ b/.github/workflows/_linux-test.yml
@ -57,7 +57,7 @@ jobs:
    timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
    steps:
      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.2
        if: ${{ !contains(matrix.runner, 'gcp.a100') }}
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}
@ -66,14 +66,14 @@ jobs:
              docker exec -it $(docker container ps --format '{{.ID}}') bash

      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2

      - name: Setup Linux
        uses: ./.github/actions/setup-linux

      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.2
        with:
          docker-image-name: ${{ inputs.docker-image }}

@ -87,13 +87,13 @@ jobs:
          echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}"

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.2
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
        id: install-nvidia-driver
-        uses: pytorch/test-infra/.github/actions/setup-nvidia@main
+        uses: pytorch/test-infra/.github/actions/setup-nvidia@release/2.2
        if: contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu')

      - name: Lock NVIDIA A100 40GB Frequency
@ -164,6 +164,8 @@ jobs:
          BRANCH: ${{ steps.parse-ref.outputs.branch }}
          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
          BASE_SHA: ${{ github.event.pull_request.base.sha || github.sha }}
+          PYTORCH_RETRY_TEST_CASES: 1
+          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
          TEST_CONFIG: ${{ matrix.config }}
          SHARD_NUMBER: ${{ matrix.shard }}
          NUM_TEST_SHARDS: ${{ matrix.num_shards }}
@ -207,7 +209,6 @@ jobs:
            -e GITHUB_RUN_NUMBER \
            -e GITHUB_RUN_ATTEMPT \
            -e JOB_ID \
-            -e JOB_NAME \
            -e BASE_SHA \
            -e BRANCH \
            -e SHA1 \
@ -218,6 +219,8 @@ jobs:
            -e NUM_TEST_SHARDS \
            -e REENABLED_ISSUES \
            -e CONTINUE_THROUGH_ERROR \
+            -e PYTORCH_RETRY_TEST_CASES \
+            -e PYTORCH_OVERRIDE_FLAKY_SIGNAL \
            -e PR_LABELS \
            -e MAX_JOBS="$(nproc --ignore=2)" \
            -e SCCACHE_BUCKET \
@ -297,7 +300,7 @@ jobs:
          path: ./**/core.[1-9]*

      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.2
        if: always()

      # NB: We are currently having an intermittent GPU-related issue on G5 runners with
--- a/.github/workflows/_mac-build.yml
+++ b/.github/workflows/_mac-build.yml
@ -71,11 +71,11 @@ jobs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
    steps:
      - name: Clean up disk space before running MacOS workflow
-        uses: pytorch/test-infra/.github/actions/check-disk-space@main
+        uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.2

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2

      - name: Set xcode version
        env:
@ -87,7 +87,7 @@ jobs:

      - name: Setup miniconda
        if: inputs.environment-file == ''
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.2
        with:
          python-version: ${{ inputs.python-version }}
          environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}
@ -97,7 +97,7 @@ jobs:
      # environment even though the arch is x86-64
      - name: Setup miniconda using the provided environment file
        if: inputs.environment-file != ''
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.2
        with:
          python-version: ${{ inputs.python-version }}
          environment-file: ${{ inputs.environment-file }}
@ -207,4 +207,4 @@ jobs:
      - name: Clean up disk space
        if: always()
        continue-on-error: true
-        uses: pytorch/test-infra/.github/actions/check-disk-space@main
+        uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.2
--- a/.github/workflows/_mac-test-mps.yml
+++ b/.github/workflows/_mac-test-mps.yml
@ -28,7 +28,6 @@ on:

 jobs:
  filter:
-    if: github.repository_owner == 'pytorch'
    runs-on: [self-hosted, linux.large]
    outputs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
@ -37,7 +36,7 @@ jobs:
      reenabled-issues: ${{ steps.filter.outputs.reenabled-issues }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
        with:
          fetch-depth: 1
          submodules: false
@ -59,6 +58,7 @@ jobs:
    runs-on: ${{ matrix.runner }}
    steps:
      - name: Print runner OS/HW info
+        shell: arch -arch arm64 bash {0}
        run: |
          sysctl machdep.cpu.brand_string kern.osproductversion

@ -69,6 +69,7 @@ jobs:
          quiet-checkout: true

      - name: Clean checkout
+        shell: arch -arch arm64 bash {0}
        run: |
          git clean -fxd

@ -79,7 +80,7 @@ jobs:
          use-gha: true

      - name: Setup miniconda
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.2
        with:
          python-version: ${{ inputs.python-version }}
          environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}
@ -94,9 +95,12 @@ jobs:
          ENV_NAME: conda-test-env-${{ github.run_id }}
          PY_VERS: 3.9
          PR_BODY: ${{ github.event.pull_request.body }}
+          PYTORCH_RETRY_TEST_CASES: 1
+          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
          CONTINUE_THROUGH_ERROR: ${{ needs.filter.outputs.keep-going }}
          PIP_REQUIREMENTS_FILE: .github/requirements/pip-requirements-${{ runner.os }}.txt
          REENABLED_ISSUES: ${{ needs.filter.outputs.reenabled-issues }}
+        shell: arch -arch arm64 bash {0}
        run: |
          # shellcheck disable=SC1090
          set -ex
@ -154,4 +158,4 @@ jobs:
      - name: Clean up disk space
        if: always()
        continue-on-error: true
-        uses: pytorch/test-infra/.github/actions/check-disk-space@main
+        uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.2
--- a/.github/workflows/_mac-test.yml
+++ b/.github/workflows/_mac-test.yml
@ -57,11 +57,9 @@ jobs:
      SHARD_NUMBER: ${{ matrix.shard }}
      NUM_TEST_SHARDS: ${{ matrix.num_shards }}
      PR_BODY: ${{ github.event.pull_request.body }}
+      PYTORCH_RETRY_TEST_CASES: 1
+      PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
    steps:
-      - name: Print runner OS/HW info
-        run: |
-          sysctl machdep.cpu.brand_string kern.osproductversion
-
      - name: Clean up leftover processes on MacOS pet runner
        continue-on-error: true
        run: |
@ -78,12 +76,14 @@ jobs:
            rm -rf "${dir}"
          done

+
+
      - name: Clean up disk space before running MacOS workflow
-        uses: pytorch/test-infra/.github/actions/check-disk-space@main
+        uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.2

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2

      - name: Download build artifacts
        uses: ./.github/actions/download-build-artifacts
@ -92,7 +92,7 @@ jobs:
          use-gha: true

      - name: Setup miniconda
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.2
        with:
          python-version: ${{ inputs.python-version }}
          environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}
@ -218,4 +218,4 @@ jobs:
      - name: Clean up disk space
        if: always()
        continue-on-error: true
-        uses: pytorch/test-infra/.github/actions/check-disk-space@main
+        uses: pytorch/test-infra/.github/actions/check-disk-space@release/2.2
--- a/.github/workflows/_rocm-test.yml
+++ b/.github/workflows/_rocm-test.yml
@ -42,6 +42,10 @@ on:
 env:
  GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}

+permissions:
+  id-token: write
+  contents: read
+
 jobs:
  test:
    # Don't run on forked repos or empty test matrix
@ -54,21 +58,32 @@ jobs:
    steps:
      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
        with:
          no-sudo: true

      - name: Setup ROCm
        uses: ./.github/actions/setup-rocm

+      - name: configure aws credentials
+        id: aws_creds
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_and_ecr_read_only
+          aws-region: us-east-1
+
+      - name: Login to Amazon ECR
+        id: login-ecr
+        uses: aws-actions/amazon-ecr-login@v2
+
      - name: Calculate docker image
        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@release/2.2
        with:
          docker-image-name: ${{ inputs.docker-image }}

      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.2
        with:
          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

@ -131,6 +146,8 @@ jobs:
          JOB_NAME: ${{ steps.get-job-id.outputs.job-name }}
          BRANCH: ${{ steps.parse-ref.outputs.branch }}
          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
+          PYTORCH_RETRY_TEST_CASES: 1
+          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
          CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
          TEST_CONFIG: ${{ matrix.config }}
          SHARD_NUMBER: ${{ matrix.shard }}
@ -170,7 +187,6 @@ jobs:
            -e GITHUB_RUN_NUMBER \
            -e GITHUB_RUN_ATTEMPT \
            -e JOB_ID \
-            -e JOB_NAME \
            -e BRANCH \
            -e SHA1 \
            -e AWS_DEFAULT_REGION \
@ -179,6 +195,8 @@ jobs:
            -e TEST_CONFIG \
            -e NUM_TEST_SHARDS \
            -e REENABLED_ISSUES \
+            -e PYTORCH_RETRY_TEST_CASES \
+            -e PYTORCH_OVERRIDE_FLAKY_SIGNAL \
            -e CONTINUE_THROUGH_ERROR \
            -e MAX_JOBS="$(nproc --ignore=2)" \
            -e SCCACHE_BUCKET \
--- a/.github/workflows/_run_android_tests.yml
+++ b/.github/workflows/_run_android_tests.yml
@ -15,7 +15,6 @@ defaults:

 jobs:
  filter:
-    if: github.repository_owner == 'pytorch'
    runs-on: [self-hosted, linux.large]
    outputs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
@ -23,7 +22,7 @@ jobs:
      keep-going: ${{ steps.filter.outputs.keep-going }}
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
        with:
          fetch-depth: 1
          submodules: false
@ -54,10 +53,10 @@ jobs:
      SUPPORT_ABI: '${{ matrix.support_abi }}'
    steps:
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2

      - name: Setup miniconda
-        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@release/2.2
        with:
          python-version: 3.8
          environment-file: .github/requirements/conda-env-${{ runner.os }}-${{ runner.arch }}.txt
--- a/.github/workflows/_win-build.yml
+++ b/.github/workflows/_win-build.yml
@ -60,10 +60,10 @@ jobs:
          git config --global core.fsmonitor false

      - name: Clean up leftover processes on non-ephemeral Windows runner
-        uses: pytorch/test-infra/.github/actions/cleanup-runner@main
+        uses: pytorch/test-infra/.github/actions/cleanup-runner@release/2.2

      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.2
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}
          instructions: |
@ -78,7 +78,7 @@ jobs:

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
        with:
          no-sudo: true

--- a/.github/workflows/_win-test.yml
+++ b/.github/workflows/_win-test.yml
@ -54,10 +54,10 @@ jobs:
          git config --global core.fsmonitor false

      - name: Clean up leftover processes on non-ephemeral Windows runner
-        uses: pytorch/test-infra/.github/actions/cleanup-runner@main
+        uses: pytorch/test-infra/.github/actions/cleanup-runner@release/2.2

      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@main
+        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.2
        with:
          github-secret: ${{ secrets.GITHUB_TOKEN }}
          instructions: |
@ -73,7 +73,7 @@ jobs:

      # [see note: pytorch repo ref]
      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
        with:
          no-sudo: true

@ -139,6 +139,8 @@ jobs:
          USE_CUDA: ${{ inputs.cuda-version != 'cpu' && '1' || '0' }}
          INSTALL_WINDOWS_SDK: 1
          PYTHON_VERSION: 3.8
+          PYTORCH_RETRY_TEST_CASES: 1
+          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
          CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
          VC_PRODUCT: "BuildTools"
          VC_VERSION: ""
--- a/.github/workflows/_xpu-test.yml
+++ b/.github/workflows/_xpu-test.yml
@ -1,269 +0,0 @@
-# TODO: this looks sort of similar to _linux-test, but there are like a dozen
-# places where you would have to insert an if statement. Probably it's better to
-# just use a different workflow altogether
-
-name: xpu-test
-
-on:
-  workflow_call:
-    inputs:
-      build-environment:
-        required: true
-        type: string
-        description: Top-level label for what's being built/tested.
-      test-matrix:
-        required: true
-        type: string
-        description: JSON description of what test configs to run.
-      docker-image:
-        required: true
-        type: string
-        description: Docker image to run in.
-      sync-tag:
-        required: false
-        type: string
-        default: ""
-        description: |
-          If this is set, our linter will use this to make sure that every other
-          job with the same `sync-tag` is identical.
-      timeout-minutes:
-        required: false
-        type: number
-        default: 300
-        description: |
-          Set the maximum (in minutes) how long the workflow should take to finish
-      tests-to-include:
-        required: false
-        type: string
-        default: ""
-        description: |
-          List of tests to include (empty string implies default list)
-
-env:
-  GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
-
-permissions:
-  id-token: write
-  contents: read
-
-jobs:
-  test:
-    # Don't run on forked repos or empty test matrix
-    if: github.repository_owner == 'pytorch' && toJSON(fromJSON(inputs.test-matrix).include) != '[]'
-    strategy:
-      matrix: ${{ fromJSON(inputs.test-matrix) }}
-      fail-fast: false
-    timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
-    runs-on: ${{ matrix.runner }}
-    steps:
-      # [see note: pytorch repo ref]
-      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
-
-      - name: Setup XPU
-        uses: ./.github/actions/setup-xpu
-
-      - name: configure aws credentials
-        id: aws_creds
-        uses: aws-actions/configure-aws-credentials@v1.7.0
-        with:
-          role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_pytorch_artifacts
-          aws-region: us-east-1
-
-      - name: Login to Amazon ECR
-        id: login-ecr
-        uses: aws-actions/amazon-ecr-login@v2
-
-      - name: Calculate docker image
-        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
-        with:
-          docker-image-name: ${{ inputs.docker-image }}
-
-      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
-        with:
-          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
-
-      - name: Start monitoring script
-        id: monitor-script
-        shell: bash
-        continue-on-error: true
-        run: |
-          python3 -m pip install psutil==5.9.1 nvidia-ml-py==11.525.84
-          python3 -m tools.stats.monitor > usage_log.txt 2>&1 &
-          echo "monitor-script-pid=${!}" >> "${GITHUB_OUTPUT}"
-
-      - name: Download build artifacts
-        uses: ./.github/actions/download-build-artifacts
-        with:
-          name: ${{ inputs.build-environment }}
-
-      - name: Parse ref
-        id: parse-ref
-        run: .github/scripts/parse_ref.py
-
-      - name: Get workflow job id
-        id: get-job-id
-        uses: ./.github/actions/get-workflow-job-id
-        if: always()
-        with:
-          github-token: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Check for keep-going label and re-enabled test issues
-        # This uses the filter-test-configs action because it conviniently
-        # checks for labels and re-enabled test issues.  It does not actually do
-        # any filtering.  All filtering is done in the build step.
-        id: keep-going
-        uses: ./.github/actions/filter-test-configs
-        with:
-          github-token: ${{ secrets.GITHUB_TOKEN }}
-          test-matrix: ${{ inputs.test-matrix }}
-          job-name: ${{ steps.get-job-id.outputs.job-name }}
-
-      - name: Set Test step time
-        id: test-timeout
-        shell: bash
-        env:
-          JOB_TIMEOUT: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
-        run: |
-          echo "timeout=$((JOB_TIMEOUT-30))" >> "${GITHUB_OUTPUT}"
-
-      - name: Test
-        id: test
-        env:
-          BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          GITHUB_REPOSITORY: ${{ github.repository }}
-          GITHUB_WORKFLOW: ${{ github.workflow }}
-          GITHUB_JOB: ${{ github.job }}
-          GITHUB_RUN_ID: ${{ github.run_id }}
-          GITHUB_RUN_NUMBER: ${{ github.run_number }}
-          GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
-          JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
-          JOB_NAME: ${{ steps.get-job-id.outputs.job-name }}
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
-          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-          PYTORCH_RETRY_TEST_CASES: 1
-          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
-          CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
-          TEST_CONFIG: ${{ matrix.config }}
-          SHARD_NUMBER: ${{ matrix.shard }}
-          NUM_TEST_SHARDS: ${{ matrix.num_shards }}
-          REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }}
-          SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
-          DOCKER_IMAGE: ${{ inputs.docker-image }}
-          XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
-          PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
-          PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
-          TESTS_TO_INCLUDE: ${{ inputs.tests-to-include }}
-        timeout-minutes: ${{ fromJson(steps.test-timeout.outputs.timeout) }}
-        run: |
-          set -x
-
-          TEST_COMMAND=.ci/pytorch/test.sh
-
-          # detached container should get cleaned up by teardown_ec2_linux
-          # Used for GPU_FLAG since that doesn't play nice
-          # shellcheck disable=SC2086,SC2090
-          container_name=$(docker run \
-            ${GPU_FLAG:-} \
-            -e BUILD_ENVIRONMENT \
-            -e PR_NUMBER \
-            -e GITHUB_ACTIONS \
-            -e GITHUB_REPOSITORY \
-            -e GITHUB_WORKFLOW \
-            -e GITHUB_JOB \
-            -e GITHUB_RUN_ID \
-            -e GITHUB_RUN_NUMBER \
-            -e GITHUB_RUN_ATTEMPT \
-            -e JOB_ID \
-            -e BRANCH \
-            -e SHA1 \
-            -e AWS_DEFAULT_REGION \
-            -e IN_WHEEL_TEST \
-            -e SHARD_NUMBER \
-            -e TEST_CONFIG \
-            -e NUM_TEST_SHARDS \
-            -e REENABLED_ISSUES \
-            -e PYTORCH_RETRY_TEST_CASES \
-            -e PYTORCH_OVERRIDE_FLAKY_SIGNAL \
-            -e CONTINUE_THROUGH_ERROR \
-            -e MAX_JOBS="$(nproc --ignore=2)" \
-            -e SCCACHE_BUCKET \
-            -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-            -e PYTORCH_TEST_CUDA_MEM_LEAK_CHECK \
-            -e PYTORCH_TEST_RERUN_DISABLED_TESTS \
-            -e TESTS_TO_INCLUDE \
-            -e ZE_AFFINITY_MASK \
-            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
-            --ulimit stack=10485760:83886080 \
-            --ulimit core=0 \
-            --security-opt seccomp=unconfined \
-            --cap-add=SYS_PTRACE \
-            --shm-size="8g" \
-            --tty \
-            --detach \
-            --name="${container_name}" \
-            --user jenkins \
-            --privileged \
-            -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-            -w /var/lib/jenkins/workspace \
-            "${DOCKER_IMAGE}"
-          )
-          # save container name for later step
-          echo "CONTAINER_NAME=${container_name}" >> "$GITHUB_ENV"
-          # jenkins user does not have write permission to mounted workspace; work-around by copying within container to jenkins home
-          docker exec -t "${container_name}" sh -c "cd .. && cp -R workspace pytorch && cd pytorch && pip install dist/*.whl && ${TEST_COMMAND}"
-
-      - name: Save test results
-        if: always()
-        run: |
-          # copy test results back to the mounted workspace, needed sudo, resulting permissions were correct
-          docker exec -t "${{ env.CONTAINER_NAME }}" sh -c "cd ../pytorch && sudo cp -R test/test-reports ../workspace/test"
-
-      - name: Print remaining test logs
-        shell: bash
-        if: always() && steps.test.conclusion
-        run: |
-          cat test/**/*_toprint.log || true
-
-      - name: Stop monitoring script
-        if: always() && steps.monitor-script.outputs.monitor-script-pid
-        shell: bash
-        continue-on-error: true
-        env:
-          MONITOR_SCRIPT_PID: ${{ steps.monitor-script.outputs.monitor-script-pid }}
-        run: |
-          kill "$MONITOR_SCRIPT_PID"
-
-      - name: Upload test artifacts
-        uses: ./.github/actions/upload-test-artifacts
-        if: always() && steps.test.conclusion && steps.test.conclusion != 'skipped'
-        with:
-          use-gha: true
-          file-suffix: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }}
-
-      - name: Collect backtraces from coredumps (if any)
-        if: always()
-        run: |
-          # shellcheck disable=SC2156
-          find . -iname "core.[1-9]*" -exec docker exec "${CONTAINER_NAME}" sh -c "gdb python {} -ex 'bt' -ex 'q'" \;
-
-      - name: Stop container before exit
-        if: always()
-        run: |
-          # Workaround for multiple runners on same IDC node
-          docker stop "${{ env.CONTAINER_NAME }}"
-
-      - name: Store Core dumps on GitHub
-        uses: actions/upload-artifact@v3
-        if: failure()
-        with:
-          name: coredumps-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}
-          retention-days: 14
-          if-no-files-found: ignore
-          path: ./**/core.[1-9]*
-
-      - name: Teardown XPU
-        uses: ./.github/actions/teardown-xpu
--- a/Show More
+++ b/Show More