amend

2025-11-01 04:54:55 +08:00 · 2024-01-11 17:33:10 +00:00 · 2024-01-10 18:00:11 +00:00 · 2024-01-10 12:12:42 +00:00 · 2024-01-10 11:12:49 +00:00 · 2024-01-10 04:46:49 +00:00
1789 changed files with 81599 additions and 52182 deletions
--- a/.ci/docker/README.md
+++ b/.ci/docker/README.md
@ -19,6 +19,7 @@ See `build.sh` for valid build environments (it's the giant switch).
 * `ubuntu` -- Dockerfile for Ubuntu image for CPU build and test jobs
 * `ubuntu-cuda` -- Dockerfile for Ubuntu image with CUDA support for nvidia-docker
 * `ubuntu-rocm` -- Dockerfile for Ubuntu image with ROCm support
+* `ubuntu-xpu` -- Dockerfile for Ubuntu image with XPU support

 ## Usage

--- a/.ci/docker/build.sh
+++ b/.ci/docker/build.sh
@ -71,6 +71,8 @@ if [[ "$image" == *cuda* && "$UBUNTU_VERSION" != "22.04" ]]; then
  DOCKERFILE="${OS}-cuda/Dockerfile"
 elif [[ "$image" == *rocm* ]]; then
  DOCKERFILE="${OS}-rocm/Dockerfile"
+elif [[ "$image" == *xpu* ]]; then
+  DOCKERFILE="${OS}-xpu/Dockerfile"
 elif [[ "$image" == *cuda*linter* ]]; then
  # Use a separate Dockerfile for linter to keep a small image size
  DOCKERFILE="linter-cuda/Dockerfile"
@ -218,6 +220,16 @@ case "$image" in
    CONDA_CMAKE=yes
    TRITON=yes
    ;;
+  pytorch-linux-jammy-xpu-2024.0-py3)
+    ANACONDA_PYTHON_VERSION=3.8
+    GCC_VERSION=11
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    BASEKIT_VERSION=2024.0.0-49522
+    NINJA_VERSION=1.9.0
+    CONDA_CMAKE=yes
+    ;;
    pytorch-linux-jammy-py3.8-gcc11-inductor-benchmarks)
    ANACONDA_PYTHON_VERSION=3.8
    GCC_VERSION=11
@ -374,6 +386,7 @@ docker build \
       --build-arg "DOCS=${DOCS}" \
       --build-arg "INDUCTOR_BENCHMARKS=${INDUCTOR_BENCHMARKS}" \
       --build-arg "EXECUTORCH=${EXECUTORCH}" \
+       --build-arg "BASEKIT_VERSION=${BASEKIT_VERSION}" \
       -f $(dirname ${DOCKERFILE})/Dockerfile \
       -t "$tmp_tag" \
       "$@" \
--- a/.ci/docker/ci_commit_pins/executorch.txt
+++ b/.ci/docker/ci_commit_pins/executorch.txt
@ -1 +1 @@
-b2f5dfe80704404298467347b8ee3ac229efed47
+663882fe7dc518c04adf3d2ee5ccb7d99f41ade4
--- a/.ci/docker/ci_commit_pins/triton.txt
+++ b/.ci/docker/ci_commit_pins/triton.txt
@ -1 +1 @@
-bcad9dabe15021c53b6a88296e9d7a210044f108
+e28a256d71f3cf2bcc7b69d6bda73a9b855e385e
--- a/.ci/docker/common/install_base.sh
+++ b/.ci/docker/common/install_base.sh
@ -61,6 +61,7 @@ install_ubuntu() {
    ${maybe_libiomp_dev} \
    libyaml-dev \
    libz-dev \
+    libjemalloc2 \
    libjpeg-dev \
    libasound2-dev \
    libsndfile-dev \
@ -74,6 +75,7 @@ install_ubuntu() {
    libtool \
    vim \
    unzip \
+    gpg-agent \
    gdb

  # Should resolve issues related to various apt package repository cert issues
--- a/.ci/docker/common/install_cudnn.sh
+++ b/.ci/docker/common/install_cudnn.sh
@ -2,8 +2,8 @@

 if [[ ${CUDNN_VERSION} == 8 ]]; then
    # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
-    mkdir tmp_cudnn && cd tmp_cudnn
-    CUDNN_NAME="cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive"
+    mkdir tmp_cudnn
+    pushd tmp_cudnn
    if [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then
        CUDNN_NAME="cudnn-linux-x86_64-8.9.2.26_cuda12-archive"
        curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/${CUDNN_NAME}.tar.xz
@ -11,17 +11,14 @@ if [[ ${CUDNN_VERSION} == 8 ]]; then
        CUDNN_NAME="cudnn-linux-x86_64-8.7.0.84_cuda11-archive"
        curl --retry 3 -OLs https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/${CUDNN_NAME}.tar.xz
    else
-        curl --retry 3 -OLs https://developer.download.nvidia.com/compute/redist/cudnn/v8.3.2/local_installers/11.5/${CUDNN_NAME}.tar.xz
+        print "Unsupported CUDA version ${CUDA_VERSION}"
+        exit 1
    fi

    tar xf ${CUDNN_NAME}.tar.xz
-    cp -a ${CUDNN_NAME}/include/* /usr/include/
    cp -a ${CUDNN_NAME}/include/* /usr/local/cuda/include/
-    cp -a ${CUDNN_NAME}/include/* /usr/include/x86_64-linux-gnu/
-
    cp -a ${CUDNN_NAME}/lib/* /usr/local/cuda/lib64/
-    cp -a ${CUDNN_NAME}/lib/* /usr/lib/x86_64-linux-gnu/
-    cd ..
+    popd
    rm -rf tmp_cudnn
    ldconfig
 fi
--- a/.ci/docker/common/install_cusparselt.sh
+++ b/.ci/docker/common/install_cusparselt.sh
@ -0,0 +1,21 @@
+#!/bin/bash
+
+set -ex
+
+# cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html
+mkdir tmp_cusparselt && cd tmp_cusparselt
+
+if [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then
+    CUSPARSELT_NAME="libcusparse_lt-linux-x86_64-0.5.2.1-archive"
+    curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/${CUSPARSELT_NAME}.tar.xz
+elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then
+    CUSPARSELT_NAME="libcusparse_lt-linux-x86_64-0.4.0.7-archive"
+    curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/${CUSPARSELT_NAME}.tar.xz
+fi
+
+tar xf ${CUSPARSELT_NAME}.tar.xz
+cp -a ${CUSPARSELT_NAME}/include/* /usr/local/cuda/include/
+cp -a ${CUSPARSELT_NAME}/lib/* /usr/local/cuda/lib64/
+cd ..
+rm -rf tmp_cusparselt
+ldconfig
--- a/.ci/docker/common/install_xpu.sh
+++ b/.ci/docker/common/install_xpu.sh
@ -0,0 +1,115 @@
+#!/bin/bash
+set -xe
+
+
+# Intel® software for general purpose GPU capabilities.
+# Refer to https://dgpu-docs.intel.com/releases/stable_647_21_20230714.html
+
+# Intel® oneAPI Base Toolkit (version 2024.0.0) has been updated to include functional and security updates.
+# Refer to https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html
+
+# Users should update to the latest version as it becomes available
+
+function install_ubuntu() {
+    apt-get update -y
+    apt-get install -y gpg-agent wget
+
+    # Set up the repository. To do this, download the key to the system keyring
+    wget -qO - https://repositories.intel.com/gpu/intel-graphics.key \
+        | gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg
+    wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
+        | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
+
+    # Add the signed entry to APT sources and configure the APT client to use the Intel repository
+    echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/production/2328 unified" \
+        | tee /etc/apt/sources.list.d/intel-gpu-jammy.list
+    echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \
+        | tee /etc/apt/sources.list.d/oneAPI.list
+
+    # Update the packages list and repository index
+    apt-get update
+
+    # The xpu-smi packages
+    apt-get install -y flex bison xpu-smi
+    # Compute and Media Runtimes
+    apt-get install -y \
+        intel-opencl-icd intel-level-zero-gpu level-zero \
+        intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \
+        libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
+        libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
+        mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo
+    # Development Packages
+    apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev
+    # Install Intel® oneAPI Base Toolkit
+    if [ -n "$BASEKIT_VERSION" ]; then
+        apt-get install intel-basekit=$BASEKIT_VERSION -y
+    else
+        apt-get install intel-basekit -y
+    fi
+
+    # Cleanup
+    apt-get autoclean && apt-get clean
+    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+}
+
+function install_centos() {
+    dnf install -y 'dnf-command(config-manager)'
+    dnf config-manager --add-repo \
+        https://repositories.intel.com/gpu/rhel/8.6/production/2328/unified/intel-gpu-8.6.repo
+    # To add the EPEL repository needed for DKMS
+    dnf -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm
+        # https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm
+
+    # Create the YUM repository file in the /temp directory as a normal user
+    tee > /tmp/oneAPI.repo << EOF
+[oneAPI]
+name=Intel® oneAPI repository
+baseurl=https://yum.repos.intel.com/oneapi
+enabled=1
+gpgcheck=1
+repo_gpgcheck=1
+gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+EOF
+
+    # Move the newly created oneAPI.repo file to the YUM configuration directory /etc/yum.repos.d
+    mv /tmp/oneAPI.repo /etc/yum.repos.d
+
+    # The xpu-smi packages
+    dnf install -y flex bison xpu-smi
+    # Compute and Media Runtimes
+    dnf install -y \
+        intel-opencl intel-media intel-mediasdk libmfxgen1 libvpl2\
+        level-zero intel-level-zero-gpu mesa-dri-drivers mesa-vulkan-drivers \
+        mesa-vdpau-drivers libdrm mesa-libEGL mesa-libgbm mesa-libGL \
+        mesa-libxatracker libvpl-tools intel-metrics-discovery \
+        intel-metrics-library intel-igc-core intel-igc-cm \
+        libva libva-utils intel-gmmlib libmetee intel-gsc intel-ocloc hwinfo clinfo
+    # Development packages
+    dnf install -y --refresh \
+        intel-igc-opencl-devel level-zero-devel intel-gsc-devel libmetee-devel \
+        level-zero-devel
+    # Install Intel® oneAPI Base Toolkit
+    dnf install intel-basekit -y
+
+    # Cleanup
+    dnf clean all
+    rm -rf /var/cache/yum
+    rm -rf /var/lib/yum/yumdb
+    rm -rf /var/lib/yum/history
+}
+
+
+# The installation depends on the base OS
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+case "$ID" in
+    ubuntu)
+        install_ubuntu
+    ;;
+    centos)
+        install_centos
+    ;;
+    *)
+        echo "Unable to determine OS..."
+        exit 1
+    ;;
+esac
--- a/.ci/docker/requirements-ci.txt
+++ b/.ci/docker/requirements-ci.txt
@ -298,3 +298,8 @@ pywavelets==1.4.1
 # it here because 1.5.0 conflicts with numpy 1.21.2 used in CI
 #Pinned versions: 1.4.1
 #test that import:
+
+lxml==5.0.0.
+#Description: This is a requirement of unittest-xml-reporting
+
+# Python-3.9 binaries
--- a/.ci/docker/triton_version.txt
+++ b/.ci/docker/triton_version.txt
@ -1 +1 @@
-2.1.0
+2.2.0
--- a/.ci/docker/ubuntu-cuda/Dockerfile
+++ b/.ci/docker/ubuntu-cuda/Dockerfile
@ -142,6 +142,12 @@ COPY ./common/install_cudnn.sh install_cudnn.sh
 RUN if [ "${CUDNN_VERSION}" -eq 8 ]; then bash install_cudnn.sh; fi
 RUN rm install_cudnn.sh

+# Install CUSPARSELT
+ARG CUDA_VERSION
+COPY ./common/install_cusparselt.sh install_cusparselt.sh
+RUN bash install_cusparselt.sh
+RUN rm install_cusparselt.sh
+
 # Delete /usr/local/cuda-11.X/cuda-11.X symlinks
 RUN if [ -h /usr/local/cuda-11.6/cuda-11.6 ]; then rm /usr/local/cuda-11.6/cuda-11.6; fi
 RUN if [ -h /usr/local/cuda-11.7/cuda-11.7 ]; then rm /usr/local/cuda-11.7/cuda-11.7; fi
--- a/.ci/docker/ubuntu-xpu/Dockerfile
+++ b/.ci/docker/ubuntu-xpu/Dockerfile
@ -0,0 +1,118 @@
+ARG UBUNTU_VERSION
+
+FROM ubuntu:${UBUNTU_VERSION}
+
+ARG UBUNTU_VERSION
+
+ENV DEBIAN_FRONTEND noninteractive
+
+ARG CLANG_VERSION
+
+# Install common dependencies (so that this step can be cached separately)
+COPY ./common/install_base.sh install_base.sh
+RUN bash ./install_base.sh && rm install_base.sh
+
+# Install clang
+ARG LLVMDEV
+COPY ./common/install_clang.sh install_clang.sh
+RUN bash ./install_clang.sh && rm install_clang.sh
+
+# Install user
+COPY ./common/install_user.sh install_user.sh
+RUN bash ./install_user.sh && rm install_user.sh
+
+# Install katex
+ARG KATEX
+COPY ./common/install_docs_reqs.sh install_docs_reqs.sh
+RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh
+
+# Install conda and other packages (e.g., numpy, pytest)
+ARG ANACONDA_PYTHON_VERSION
+ARG CONDA_CMAKE
+ARG DOCS
+ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
+ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
+ENV DOCS=$DOCS
+COPY requirements-ci.txt requirements-docs.txt /opt/conda/
+COPY ./common/install_conda.sh install_conda.sh
+COPY ./common/common_utils.sh common_utils.sh
+RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt /opt/conda/requirements-docs.txt
+
+# Install gcc
+ARG GCC_VERSION
+COPY ./common/install_gcc.sh install_gcc.sh
+RUN bash ./install_gcc.sh && rm install_gcc.sh
+
+# Install lcov for C++ code coverage
+COPY ./common/install_lcov.sh install_lcov.sh
+RUN  bash ./install_lcov.sh && rm install_lcov.sh
+
+COPY ./common/install_openssl.sh install_openssl.sh
+RUN bash ./install_openssl.sh
+ENV OPENSSL_ROOT_DIR /opt/openssl
+ENV OPENSSL_DIR /opt/openssl
+RUN rm install_openssl.sh
+
+ARG INDUCTOR_BENCHMARKS
+COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
+COPY ./common/common_utils.sh common_utils.sh
+COPY ci_commit_pins/huggingface.txt huggingface.txt
+COPY ci_commit_pins/timm.txt timm.txt
+RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
+RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt
+
+ARG TRITON
+# Install triton, this needs to be done before sccache because the latter will
+# try to reach out to S3, which docker build runners don't have access
+COPY ./common/install_triton.sh install_triton.sh
+COPY ./common/common_utils.sh common_utils.sh
+# TODO: will add triton xpu commit
+COPY ci_commit_pins/triton.txt triton.txt
+RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
+RUN rm install_triton.sh common_utils.sh triton.txt
+
+# (optional) Install database packages like LMDB and LevelDB
+ARG DB
+COPY ./common/install_db.sh install_db.sh
+RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
+RUN rm install_db.sh
+ENV INSTALLED_DB ${DB}
+
+# (optional) Install vision packages like OpenCV and ffmpeg
+ARG VISION
+COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
+RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
+RUN rm install_vision.sh cache_vision_models.sh common_utils.sh
+ENV INSTALLED_VISION ${VISION}
+
+# Install XPU Dependencies
+ARG BASEKIT_VERSION
+COPY ./common/install_xpu.sh install_xpu.sh
+RUN bash ./install_xpu.sh && rm install_xpu.sh
+
+# (optional) Install non-default CMake version
+ARG CMAKE_VERSION
+COPY ./common/install_cmake.sh install_cmake.sh
+RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
+RUN rm install_cmake.sh
+
+# (optional) Install non-default Ninja version
+ARG NINJA_VERSION
+COPY ./common/install_ninja.sh install_ninja.sh
+RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
+RUN rm install_ninja.sh
+
+# Install ccache/sccache (do this last, so we get priority in PATH)
+COPY ./common/install_cache.sh install_cache.sh
+ENV PATH /opt/cache/bin:$PATH
+RUN bash ./install_cache.sh && rm install_cache.sh
+
+# Include BUILD_ENVIRONMENT environment variable in image
+ARG BUILD_ENVIRONMENT
+ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
+
+# Install LLVM dev version (Defined in the pytorch/builder github repository)
+COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
+
+USER jenkins
+CMD ["bash"]
--- a/.ci/pytorch/build.sh
+++ b/.ci/pytorch/build.sh
@ -28,6 +28,8 @@ echo "Environment variables:"
 env

 if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
+  # Use jemalloc during compilation to mitigate https://github.com/pytorch/pytorch/issues/116289
+  export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libjemalloc.so.2
  echo "NVCC version:"
  nvcc --version
 fi
@ -151,6 +153,12 @@ if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
  python tools/amd_build/build_amd.py
 fi

+if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
+  # shellcheck disable=SC1091
+  source /opt/intel/oneapi/compiler/latest/env/vars.sh
+  export USE_XPU=1
+fi
+
 # sccache will fail for CUDA builds if all cores are used for compiling
 # gcc 7 with sccache seems to have intermittent OOM issue if all cores are used
 if [ -z "$MAX_JOBS" ]; then
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@ -18,6 +18,10 @@ BUILD_DIR="build"
 BUILD_RENAMED_DIR="build_renamed"
 BUILD_BIN_DIR="$BUILD_DIR"/bin

+#Set Default values for these variables in case they are not set
+SHARD_NUMBER="${SHARD_NUMBER:=1}"
+NUM_TEST_SHARDS="${NUM_TEST_SHARDS:=1}"
+
 export VALGRIND=ON
 # export TORCH_INDUCTOR_INSTALL_GXX=ON
 if [[ "$BUILD_ENVIRONMENT" == *clang9* ]]; then
@ -124,6 +128,8 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda* || "$BUILD_ENVIRONMENT" == *rocm* ]]; then
  # mainly used so that we're not spending extra cycles testing cpu
  # devices on expensive gpu machines
  export PYTORCH_TESTING_DEVICE_ONLY_FOR="cuda"
+elif [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
+  export PYTORCH_TESTING_DEVICE_ONLY_FOR="xpu"
 fi

 if [[ "$TEST_CONFIG" == *crossref* ]]; then
@ -136,6 +142,15 @@ if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
  rocminfo | grep -E 'Name:.*\sgfx|Marketing'
 fi

+if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
+  # Source Intel oneAPI envrioment script to enable xpu runtime related libraries
+  # refer to https://www.intel.com/content/www/us/en/docs/oneapi/programming-guide/2024-0/use-the-setvars-and-oneapi-vars-scripts-with-linux.html
+  # shellcheck disable=SC1091
+  source /opt/intel/oneapi/compiler/latest/env/vars.sh
+  # Check XPU status before testing
+  xpu-smi discovery
+fi
+
 if [[ "$BUILD_ENVIRONMENT" != *-bazel-* ]] ; then
  # JIT C++ extensions require ninja.
  pip_install --user "ninja==1.10.2"
@ -259,6 +274,7 @@ test_dynamo_shard() {
    --exclude-jit-executor \
    --exclude-distributed-tests \
    --exclude \
+      test_ao_sparsity \
      test_autograd \
      test_jit \
      test_proxy_tensor \
@ -308,8 +324,10 @@ test_inductor() {

  # docker build uses bdist_wheel which does not work with test_aot_inductor
  # TODO: need a faster way to build
-  BUILD_AOT_INDUCTOR_TEST=1 python setup.py develop
-  CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aot_inductor
+  if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
+      BUILD_AOT_INDUCTOR_TEST=1 python setup.py develop
+      CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aot_inductor
+  fi
 }

 # "Global" flags for inductor benchmarking controlled by TEST_CONFIG
@ -389,8 +407,8 @@ test_perf_for_dashboard() {
            --output "$TEST_REPORTS_DIR/${backend}_dynamic_${suite}_${dtype}_${mode}_cuda_${target}.csv"
      fi
      if [[ "$DASHBOARD_TAG" == *cppwrapper-true* ]] && [[ "$mode" == "inference" ]]; then
-        python "benchmarks/dynamo/$suite.py" \
-            "${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" --disable-cudagraphs --cpp-wrapper "$@" \
+        TORCHINDUCTOR_CPP_WRAPPER=1 python "benchmarks/dynamo/$suite.py" \
+            "${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" --disable-cudagraphs "$@" \
            --output "$TEST_REPORTS_DIR/${backend}_cpp_wrapper_${suite}_${dtype}_${mode}_cuda_${target}.csv"
      fi
      if [[ "$DASHBOARD_TAG" == *freezing_cudagraphs-true* ]] && [[ "$mode" == "inference" ]]; then
@ -491,6 +509,13 @@ test_inductor_torchbench_smoketest_perf() {
  TEST_REPORTS_DIR=$(pwd)/test/test-reports
  mkdir -p "$TEST_REPORTS_DIR"

+  # smoke test the cpp_wrapper mode
+  TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/torchbench.py --device cuda --accuracy --bfloat16 \
+    --inference --inductor --only hf_T5 --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_smoketest.csv"
+  python benchmarks/dynamo/check_accuracy.py \
+      --actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_smoketest.csv" \
+      --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv"
+
  python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --float16 --training \
    --batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" --only hf_Bert \
    --output "$TEST_REPORTS_DIR/inductor_training_smoketest.csv"
@ -500,7 +525,11 @@ test_inductor_torchbench_smoketest_perf() {
  python benchmarks/dynamo/torchbench.py --device cuda --performance --bfloat16 --inference \
    --export-aot-inductor --only nanogpt --output "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv"
  # The threshold value needs to be actively maintained to make this check useful
-  python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" -t 5.2
+  # The perf number of nanogpt seems not very stable, e.g.
+  # https://github.com/pytorch/pytorch/actions/runs/7158691360/job/19491437314,
+  # and thus we lower its threshold to reduce flakiness. If this continues to be a problem,
+  # we switch to use some other model.
+  python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" -t 4.9

  # Check memory compression ratio for a few models
  for test in hf_Albert timm_vision_transformer; do
@ -660,6 +689,20 @@ test_libtorch_api() {
  fi
 }

+test_xpu_bin(){
+  TEST_REPORTS_DIR=$(pwd)/test/test-reports
+  mkdir -p "$TEST_REPORTS_DIR"
+
+  for xpu_case in "${BUILD_BIN_DIR}"/*{xpu,sycl}*
+  do
+    if [[ "$xpu_case" != *"*"* ]]; then
+      case_name=$(basename "$xpu_case")
+      echo "Testing ${case_name} ..."
+      "$xpu_case" --gtest_output=xml:"$TEST_REPORTS_DIR"/"$case_name".xml
+    fi
+  done
+}
+
 test_aot_compilation() {
  echo "Testing Ahead of Time compilation"
  ln -sf "$TORCH_LIB_DIR"/libc10* "$TORCH_BIN_DIR"
@ -1069,7 +1112,7 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
  # https://github.com/opencv/opencv-python/issues/885
  pip_install opencv-python==4.8.0.74
  if [[ "${TEST_CONFIG}" == *inductor_torchbench_smoketest_perf* ]]; then
-    checkout_install_torchbench hf_Bert hf_Albert timm_vision_transformer
+    checkout_install_torchbench hf_Bert hf_Albert nanogpt timm_vision_transformer
    PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_smoketest_perf
  else
    checkout_install_torchbench
@ -1085,19 +1128,21 @@ elif [[ "${TEST_CONFIG}" == *inductor* && "${SHARD_NUMBER}" == 1 ]]; then
  test_inductor
  test_inductor_distributed
 elif [[ "${TEST_CONFIG}" == *dynamo* && "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
-  test_without_numpy
  install_torchvision
  test_dynamo_shard 1
  test_aten
-elif [[ "${TEST_CONFIG}" == *dynamo* && "${SHARD_NUMBER}" == 2 && $NUM_TEST_SHARDS -gt 1 ]]; then
+elif [[ "${TEST_CONFIG}" == *dynamo* && $SHARD_NUMBER -gt 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
  install_torchvision
-  test_dynamo_shard 2
+  test_dynamo_shard "${SHARD_NUMBER}"
 elif [[ "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
  test_without_numpy
  install_torchvision
  test_python_shard 1
  test_aten
  test_libtorch 1
+  if [[ "${BUILD_ENVIRONMENT}" == *xpu* ]]; then
+    test_xpu_bin
+  fi
 elif [[ "${SHARD_NUMBER}" == 2 && $NUM_TEST_SHARDS -gt 1 ]]; then
  install_torchvision
  test_python_shard 2
@ -1122,6 +1167,11 @@ elif [[ "${BUILD_ENVIRONMENT}" == *rocm* && -n "$TESTS_TO_INCLUDE" ]]; then
  install_torchvision
  test_python
  test_aten
+elif [[ "${BUILD_ENVIRONMENT}" == *xpu* ]]; then
+  install_torchvision
+  test_python
+  test_aten
+  test_xpu_bin
 else
  install_torchvision
  install_monkeytype
--- a/.circleci/cimodel/data/binary_build_data.py
+++ b/.circleci/cimodel/data/binary_build_data.py
@ -1,198 +0,0 @@
-"""
-This module models the tree of configuration variants
-for "smoketest" builds.
-
-Each subclass of ConfigNode represents a layer of the configuration hierarchy.
-These tree nodes encapsulate the logic for whether a branch of the hierarchy
-should be "pruned".
-"""
-
-from collections import OrderedDict
-
-import cimodel.data.dimensions as dimensions
-
-from cimodel.lib.conf_tree import ConfigNode
-
-
-LINKING_DIMENSIONS = [
-    "shared",
-    "static",
-]
-
-
-DEPS_INCLUSION_DIMENSIONS = [
-    "with-deps",
-    "without-deps",
-]
-
-
-def get_processor_arch_name(gpu_version):
-    return (
-        "cpu"
-        if not gpu_version
-        else (
-            "cu" + gpu_version.strip("cuda")
-            if gpu_version.startswith("cuda")
-            else gpu_version
-        )
-    )
-
-
-CONFIG_TREE_DATA = OrderedDict()
-
-# GCC config variants:
-#
-# All the nightlies (except libtorch with new gcc ABI) are built with devtoolset7,
-# which can only build with old gcc ABI. It is better than devtoolset3
-# because it understands avx512, which is needed for good fbgemm performance.
-#
-# Libtorch with new gcc ABI is built with gcc 5.4 on Ubuntu 16.04.
-LINUX_GCC_CONFIG_VARIANTS = OrderedDict(
-    manywheel=["devtoolset7"],
-    conda=["devtoolset7"],
-    libtorch=[
-        "devtoolset7",
-        "gcc5.4_cxx11-abi",
-    ],
-)
-
-WINDOWS_LIBTORCH_CONFIG_VARIANTS = [
-    "debug",
-    "release",
-]
-
-
-class TopLevelNode(ConfigNode):
-    def __init__(self, node_name, config_tree_data, smoke):
-        super().__init__(None, node_name)
-
-        self.config_tree_data = config_tree_data
-        self.props["smoke"] = smoke
-
-    def get_children(self):
-        return [
-            OSConfigNode(self, x, c, p) for (x, (c, p)) in self.config_tree_data.items()
-        ]
-
-
-class OSConfigNode(ConfigNode):
-    def __init__(self, parent, os_name, gpu_versions, py_tree):
-        super().__init__(parent, os_name)
-
-        self.py_tree = py_tree
-        self.props["os_name"] = os_name
-        self.props["gpu_versions"] = gpu_versions
-
-    def get_children(self):
-        return [PackageFormatConfigNode(self, k, v) for k, v in self.py_tree.items()]
-
-
-class PackageFormatConfigNode(ConfigNode):
-    def __init__(self, parent, package_format, python_versions):
-        super().__init__(parent, package_format)
-
-        self.props["python_versions"] = python_versions
-        self.props["package_format"] = package_format
-
-    def get_children(self):
-        if self.find_prop("os_name") == "linux":
-            return [
-                LinuxGccConfigNode(self, v)
-                for v in LINUX_GCC_CONFIG_VARIANTS[self.find_prop("package_format")]
-            ]
-        elif (
-            self.find_prop("os_name") == "windows"
-            and self.find_prop("package_format") == "libtorch"
-        ):
-            return [
-                WindowsLibtorchConfigNode(self, v)
-                for v in WINDOWS_LIBTORCH_CONFIG_VARIANTS
-            ]
-        else:
-            return [ArchConfigNode(self, v) for v in self.find_prop("gpu_versions")]
-
-
-class LinuxGccConfigNode(ConfigNode):
-    def __init__(self, parent, gcc_config_variant):
-        super().__init__(parent, "GCC_CONFIG_VARIANT=" + str(gcc_config_variant))
-
-        self.props["gcc_config_variant"] = gcc_config_variant
-
-    def get_children(self):
-        gpu_versions = self.find_prop("gpu_versions")
-
-        # XXX devtoolset7 on CUDA 9.0 is temporarily disabled
-        # see https://github.com/pytorch/pytorch/issues/20066
-        if self.find_prop("gcc_config_variant") == "devtoolset7":
-            gpu_versions = filter(lambda x: x != "cuda_90", gpu_versions)
-
-        # XXX disabling conda rocm build since docker images are not there
-        if self.find_prop("package_format") == "conda":
-            gpu_versions = filter(
-                lambda x: x not in dimensions.ROCM_VERSION_LABELS, gpu_versions
-            )
-
-        # XXX libtorch rocm build  is temporarily disabled
-        if self.find_prop("package_format") == "libtorch":
-            gpu_versions = filter(
-                lambda x: x not in dimensions.ROCM_VERSION_LABELS, gpu_versions
-            )
-
-        return [ArchConfigNode(self, v) for v in gpu_versions]
-
-
-class WindowsLibtorchConfigNode(ConfigNode):
-    def __init__(self, parent, libtorch_config_variant):
-        super().__init__(
-            parent, "LIBTORCH_CONFIG_VARIANT=" + str(libtorch_config_variant)
-        )
-
-        self.props["libtorch_config_variant"] = libtorch_config_variant
-
-    def get_children(self):
-        return [ArchConfigNode(self, v) for v in self.find_prop("gpu_versions")]
-
-
-class ArchConfigNode(ConfigNode):
-    def __init__(self, parent, gpu):
-        super().__init__(parent, get_processor_arch_name(gpu))
-
-        self.props["gpu"] = gpu
-
-    def get_children(self):
-        return [PyVersionConfigNode(self, v) for v in self.find_prop("python_versions")]
-
-
-class PyVersionConfigNode(ConfigNode):
-    def __init__(self, parent, pyver):
-        super().__init__(parent, pyver)
-
-        self.props["pyver"] = pyver
-
-    def get_children(self):
-        package_format = self.find_prop("package_format")
-        os_name = self.find_prop("os_name")
-
-        has_libtorch_variants = package_format == "libtorch" and os_name == "linux"
-        linking_variants = LINKING_DIMENSIONS if has_libtorch_variants else []
-
-        return [LinkingVariantConfigNode(self, v) for v in linking_variants]
-
-
-class LinkingVariantConfigNode(ConfigNode):
-    def __init__(self, parent, linking_variant):
-        super().__init__(parent, linking_variant)
-
-    def get_children(self):
-        return [
-            DependencyInclusionConfigNode(self, v) for v in DEPS_INCLUSION_DIMENSIONS
-        ]
-
-
-class DependencyInclusionConfigNode(ConfigNode):
-    def __init__(self, parent, deps_variant):
-        super().__init__(parent, deps_variant)
-
-        self.props["libtorch_variant"] = "-".join(
-            [self.parent.get_label(), self.get_label()]
-        )
--- a/.circleci/cimodel/data/binary_build_definitions.py
+++ b/.circleci/cimodel/data/binary_build_definitions.py
@ -1,275 +0,0 @@
-from collections import OrderedDict
-
-import cimodel.data.binary_build_data as binary_build_data
-
-import cimodel.data.simple.util.branch_filters as branch_filters
-import cimodel.lib.conf_tree as conf_tree
-import cimodel.lib.miniutils as miniutils
-
-
-class Conf:
-    def __init__(
-        self,
-        os,
-        gpu_version,
-        pydistro,
-        parms,
-        smoke,
-        libtorch_variant,
-        gcc_config_variant,
-        libtorch_config_variant,
-    ):
-        self.os = os
-        self.gpu_version = gpu_version
-        self.pydistro = pydistro
-        self.parms = parms
-        self.smoke = smoke
-        self.libtorch_variant = libtorch_variant
-        self.gcc_config_variant = gcc_config_variant
-        self.libtorch_config_variant = libtorch_config_variant
-
-    def gen_build_env_parms(self):
-        elems = (
-            [self.pydistro]
-            + self.parms
-            + [binary_build_data.get_processor_arch_name(self.gpu_version)]
-        )
-        if self.gcc_config_variant is not None:
-            elems.append(str(self.gcc_config_variant))
-        if self.libtorch_config_variant is not None:
-            elems.append(str(self.libtorch_config_variant))
-        return elems
-
-    def gen_docker_image(self):
-        if self.gcc_config_variant == "gcc5.4_cxx11-abi":
-            if self.gpu_version is None:
-                return miniutils.quote("pytorch/libtorch-cxx11-builder:cpu")
-            else:
-                return miniutils.quote(
-                    f"pytorch/libtorch-cxx11-builder:{self.gpu_version}"
-                )
-        if self.pydistro == "conda":
-            if self.gpu_version is None:
-                return miniutils.quote("pytorch/conda-builder:cpu")
-            else:
-                return miniutils.quote(f"pytorch/conda-builder:{self.gpu_version}")
-
-        docker_word_substitution = {
-            "manywheel": "manylinux",
-            "libtorch": "manylinux",
-        }
-
-        docker_distro_prefix = miniutils.override(
-            self.pydistro, docker_word_substitution
-        )
-
-        # The cpu nightlies are built on the pytorch/manylinux-cuda102 docker image
-        # TODO cuda images should consolidate into tag-base images similar to rocm
-        alt_docker_suffix = (
-            "cuda102"
-            if not self.gpu_version
-            else (
-                "rocm:" + self.gpu_version.strip("rocm")
-                if self.gpu_version.startswith("rocm")
-                else self.gpu_version
-            )
-        )
-        docker_distro_suffix = (
-            alt_docker_suffix
-            if self.pydistro != "conda"
-            else ("cuda" if alt_docker_suffix.startswith("cuda") else "rocm")
-        )
-        return miniutils.quote(
-            "pytorch/" + docker_distro_prefix + "-" + docker_distro_suffix
-        )
-
-    def get_name_prefix(self):
-        return "smoke" if self.smoke else "binary"
-
-    def gen_build_name(self, build_or_test, nightly):
-        parts = [self.get_name_prefix(), self.os] + self.gen_build_env_parms()
-
-        if nightly:
-            parts.append("nightly")
-
-        if self.libtorch_variant:
-            parts.append(self.libtorch_variant)
-
-        if not self.smoke:
-            parts.append(build_or_test)
-
-        joined = "_".join(parts)
-        return joined.replace(".", "_")
-
-    def gen_workflow_job(self, phase, upload_phase_dependency=None, nightly=False):
-        job_def = OrderedDict()
-        job_def["name"] = self.gen_build_name(phase, nightly)
-        job_def["build_environment"] = miniutils.quote(
-            " ".join(self.gen_build_env_parms())
-        )
-        if self.smoke:
-            job_def["requires"] = [
-                "update_s3_htmls",
-            ]
-            job_def["filters"] = branch_filters.gen_filter_dict(
-                branches_list=["postnightly"],
-            )
-        else:
-            filter_branch = r"/.*/"
-            job_def["filters"] = branch_filters.gen_filter_dict(
-                branches_list=[filter_branch],
-                tags_list=[branch_filters.RC_PATTERN],
-            )
-        if self.libtorch_variant:
-            job_def["libtorch_variant"] = miniutils.quote(self.libtorch_variant)
-        if phase == "test":
-            if not self.smoke:
-                job_def["requires"] = [self.gen_build_name("build", nightly)]
-            if not (self.smoke and self.os == "macos") and self.os != "windows":
-                job_def["docker_image"] = self.gen_docker_image()
-
-            # fix this. only works on cuda not rocm
-            if self.os != "windows" and self.gpu_version:
-                job_def["use_cuda_docker_runtime"] = miniutils.quote("1")
-        else:
-            if self.os == "linux" and phase != "upload":
-                job_def["docker_image"] = self.gen_docker_image()
-
-        if phase == "test":
-            if self.gpu_version:
-                if self.os == "windows":
-                    job_def["executor"] = "windows-with-nvidia-gpu"
-                else:
-                    job_def["resource_class"] = "gpu.medium"
-
-        os_name = miniutils.override(self.os, {"macos": "mac"})
-        job_name = "_".join([self.get_name_prefix(), os_name, phase])
-        return {job_name: job_def}
-
-    def gen_upload_job(self, phase, requires_dependency):
-        """Generate binary_upload job for configuration
-
-          Output looks similar to:
-
-        - binary_upload:
-            name: binary_linux_manywheel_3_7m_cu113_devtoolset7_nightly_upload
-            context: org-member
-            requires: binary_linux_manywheel_3_7m_cu113_devtoolset7_nightly_test
-            filters:
-              branches:
-                only:
-                  - nightly
-              tags:
-                only: /v[0-9]+(\\.[0-9]+)*-rc[0-9]+/
-            package_type: manywheel
-            upload_subfolder: cu113
-        """
-        return {
-            "binary_upload": OrderedDict(
-                {
-                    "name": self.gen_build_name(phase, nightly=True),
-                    "context": "org-member",
-                    "requires": [
-                        self.gen_build_name(requires_dependency, nightly=True)
-                    ],
-                    "filters": branch_filters.gen_filter_dict(
-                        branches_list=["nightly"],
-                        tags_list=[branch_filters.RC_PATTERN],
-                    ),
-                    "package_type": self.pydistro,
-                    "upload_subfolder": binary_build_data.get_processor_arch_name(
-                        self.gpu_version,
-                    ),
-                }
-            )
-        }
-
-
-def get_root(smoke, name):
-    return binary_build_data.TopLevelNode(
-        name,
-        binary_build_data.CONFIG_TREE_DATA,
-        smoke,
-    )
-
-
-def gen_build_env_list(smoke):
-    root = get_root(smoke, "N/A")
-    config_list = conf_tree.dfs(root)
-
-    newlist = []
-    for c in config_list:
-        conf = Conf(
-            c.find_prop("os_name"),
-            c.find_prop("gpu"),
-            c.find_prop("package_format"),
-            [c.find_prop("pyver")],
-            c.find_prop("smoke")
-            and not (c.find_prop("os_name") == "macos_arm64"),  # don't test arm64
-            c.find_prop("libtorch_variant"),
-            c.find_prop("gcc_config_variant"),
-            c.find_prop("libtorch_config_variant"),
-        )
-        newlist.append(conf)
-
-    return newlist
-
-
-def predicate_exclude_macos(config):
-    return config.os == "linux" or config.os == "windows"
-
-
-def get_nightly_uploads():
-    configs = gen_build_env_list(False)
-    mylist = []
-    for conf in configs:
-        phase_dependency = "test" if predicate_exclude_macos(conf) else "build"
-        mylist.append(conf.gen_upload_job("upload", phase_dependency))
-
-    return mylist
-
-
-def get_post_upload_jobs():
-    return [
-        {
-            "update_s3_htmls": {
-                "name": "update_s3_htmls",
-                "context": "org-member",
-                "filters": branch_filters.gen_filter_dict(
-                    branches_list=["postnightly"],
-                ),
-            },
-        },
-    ]
-
-
-def get_nightly_tests():
-    configs = gen_build_env_list(False)
-    filtered_configs = filter(predicate_exclude_macos, configs)
-
-    tests = []
-    for conf_options in filtered_configs:
-        yaml_item = conf_options.gen_workflow_job("test", nightly=True)
-        tests.append(yaml_item)
-
-    return tests
-
-
-def get_jobs(toplevel_key, smoke):
-    jobs_list = []
-    configs = gen_build_env_list(smoke)
-    phase = "build" if toplevel_key == "binarybuilds" else "test"
-    for build_config in configs:
-        # don't test for macos_arm64 as it's cross compiled
-        if phase != "test" or build_config.os != "macos_arm64":
-            jobs_list.append(build_config.gen_workflow_job(phase, nightly=True))
-
-    return jobs_list
-
-
-def get_binary_build_jobs():
-    return get_jobs("binarybuilds", False)
-
-
-def get_binary_smoke_test_jobs():
-    return get_jobs("binarysmoketests", True)
--- a/.circleci/cimodel/data/dimensions.py
+++ b/.circleci/cimodel/data/dimensions.py
@ -1,19 +0,0 @@
-PHASES = ["build", "test"]
-
-CUDA_VERSIONS = [
-    "102",
-    "113",
-    "116",
-    "117",
-]
-
-ROCM_VERSIONS = [
-    "4.3.1",
-    "4.5.2",
-]
-
-ROCM_VERSION_LABELS = ["rocm" + v for v in ROCM_VERSIONS]
-
-GPU_VERSIONS = [None] + ["cuda" + v for v in CUDA_VERSIONS] + ROCM_VERSION_LABELS
-
-STANDARD_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10"]
--- a/.circleci/cimodel/data/pytorch_build_data.py
+++ b/.circleci/cimodel/data/pytorch_build_data.py
@ -1,296 +0,0 @@
-from cimodel.lib.conf_tree import ConfigNode
-
-
-CONFIG_TREE_DATA = []
-
-
-def get_major_pyver(dotted_version):
-    parts = dotted_version.split(".")
-    return "py" + parts[0]
-
-
-class TreeConfigNode(ConfigNode):
-    def __init__(self, parent, node_name, subtree):
-        super().__init__(parent, self.modify_label(node_name))
-        self.subtree = subtree
-        self.init2(node_name)
-
-    def modify_label(self, label):
-        return label
-
-    def init2(self, node_name):
-        pass
-
-    def get_children(self):
-        return [self.child_constructor()(self, k, v) for (k, v) in self.subtree]
-
-
-class TopLevelNode(TreeConfigNode):
-    def __init__(self, node_name, subtree):
-        super().__init__(None, node_name, subtree)
-
-    # noinspection PyMethodMayBeStatic
-    def child_constructor(self):
-        return DistroConfigNode
-
-
-class DistroConfigNode(TreeConfigNode):
-    def init2(self, node_name):
-        self.props["distro_name"] = node_name
-
-    def child_constructor(self):
-        distro = self.find_prop("distro_name")
-
-        next_nodes = {
-            "xenial": XenialCompilerConfigNode,
-            "bionic": BionicCompilerConfigNode,
-        }
-        return next_nodes[distro]
-
-
-class PyVerConfigNode(TreeConfigNode):
-    def init2(self, node_name):
-        self.props["pyver"] = node_name
-        self.props["abbreviated_pyver"] = get_major_pyver(node_name)
-        if node_name == "3.9":
-            self.props["abbreviated_pyver"] = "py3.9"
-
-    # noinspection PyMethodMayBeStatic
-    def child_constructor(self):
-        return ExperimentalFeatureConfigNode
-
-
-class ExperimentalFeatureConfigNode(TreeConfigNode):
-    def init2(self, node_name):
-        self.props["experimental_feature"] = node_name
-
-    def child_constructor(self):
-        experimental_feature = self.find_prop("experimental_feature")
-
-        next_nodes = {
-            "asan": AsanConfigNode,
-            "xla": XlaConfigNode,
-            "mps": MPSConfigNode,
-            "vulkan": VulkanConfigNode,
-            "parallel_tbb": ParallelTBBConfigNode,
-            "crossref": CrossRefConfigNode,
-            "dynamo": DynamoConfigNode,
-            "parallel_native": ParallelNativeConfigNode,
-            "onnx": ONNXConfigNode,
-            "libtorch": LibTorchConfigNode,
-            "important": ImportantConfigNode,
-            "build_only": BuildOnlyConfigNode,
-            "shard_test": ShardTestConfigNode,
-            "cuda_gcc_override": CudaGccOverrideConfigNode,
-            "pure_torch": PureTorchConfigNode,
-            "slow_gradcheck": SlowGradcheckConfigNode,
-        }
-        return next_nodes[experimental_feature]
-
-
-class SlowGradcheckConfigNode(TreeConfigNode):
-    def init2(self, node_name):
-        self.props["is_slow_gradcheck"] = True
-
-    def child_constructor(self):
-        return ExperimentalFeatureConfigNode
-
-
-class PureTorchConfigNode(TreeConfigNode):
-    def modify_label(self, label):
-        return "PURE_TORCH=" + str(label)
-
-    def init2(self, node_name):
-        self.props["is_pure_torch"] = node_name
-
-    def child_constructor(self):
-        return ImportantConfigNode
-
-
-class XlaConfigNode(TreeConfigNode):
-    def modify_label(self, label):
-        return "XLA=" + str(label)
-
-    def init2(self, node_name):
-        self.props["is_xla"] = node_name
-
-    def child_constructor(self):
-        return ImportantConfigNode
-
-
-class MPSConfigNode(TreeConfigNode):
-    def modify_label(self, label):
-        return "MPS=" + str(label)
-
-    def init2(self, node_name):
-        self.props["is_mps"] = node_name
-
-    def child_constructor(self):
-        return ImportantConfigNode
-
-
-class AsanConfigNode(TreeConfigNode):
-    def modify_label(self, label):
-        return "Asan=" + str(label)
-
-    def init2(self, node_name):
-        self.props["is_asan"] = node_name
-
-    def child_constructor(self):
-        return ExperimentalFeatureConfigNode
-
-
-class ONNXConfigNode(TreeConfigNode):
-    def modify_label(self, label):
-        return "Onnx=" + str(label)
-
-    def init2(self, node_name):
-        self.props["is_onnx"] = node_name
-
-    def child_constructor(self):
-        return ImportantConfigNode
-
-
-class VulkanConfigNode(TreeConfigNode):
-    def modify_label(self, label):
-        return "Vulkan=" + str(label)
-
-    def init2(self, node_name):
-        self.props["is_vulkan"] = node_name
-
-    def child_constructor(self):
-        return ImportantConfigNode
-
-
-class ParallelTBBConfigNode(TreeConfigNode):
-    def modify_label(self, label):
-        return "PARALLELTBB=" + str(label)
-
-    def init2(self, node_name):
-        self.props["parallel_backend"] = "paralleltbb"
-
-    def child_constructor(self):
-        return ImportantConfigNode
-
-
-class CrossRefConfigNode(TreeConfigNode):
-    def init2(self, node_name):
-        self.props["is_crossref"] = node_name
-
-    def child_constructor(self):
-        return ImportantConfigNode
-
-
-class DynamoConfigNode(TreeConfigNode):
-    def init2(self, node_name):
-        self.props["is_dynamo"] = node_name
-
-    def child_constructor(self):
-        return ImportantConfigNode
-
-
-class ParallelNativeConfigNode(TreeConfigNode):
-    def modify_label(self, label):
-        return "PARALLELNATIVE=" + str(label)
-
-    def init2(self, node_name):
-        self.props["parallel_backend"] = "parallelnative"
-
-    def child_constructor(self):
-        return ImportantConfigNode
-
-
-class LibTorchConfigNode(TreeConfigNode):
-    def modify_label(self, label):
-        return "BUILD_TEST_LIBTORCH=" + str(label)
-
-    def init2(self, node_name):
-        self.props["is_libtorch"] = node_name
-
-    def child_constructor(self):
-        return ExperimentalFeatureConfigNode
-
-
-class CudaGccOverrideConfigNode(TreeConfigNode):
-    def init2(self, node_name):
-        self.props["cuda_gcc_override"] = node_name
-
-    def child_constructor(self):
-        return ExperimentalFeatureConfigNode
-
-
-class BuildOnlyConfigNode(TreeConfigNode):
-    def init2(self, node_name):
-        self.props["build_only"] = node_name
-
-    def child_constructor(self):
-        return ExperimentalFeatureConfigNode
-
-
-class ShardTestConfigNode(TreeConfigNode):
-    def init2(self, node_name):
-        self.props["shard_test"] = node_name
-
-    def child_constructor(self):
-        return ImportantConfigNode
-
-
-class ImportantConfigNode(TreeConfigNode):
-    def modify_label(self, label):
-        return "IMPORTANT=" + str(label)
-
-    def init2(self, node_name):
-        self.props["is_important"] = node_name
-
-    def get_children(self):
-        return []
-
-
-class XenialCompilerConfigNode(TreeConfigNode):
-    def modify_label(self, label):
-        return label or "<unspecified>"
-
-    def init2(self, node_name):
-        self.props["compiler_name"] = node_name
-
-    # noinspection PyMethodMayBeStatic
-    def child_constructor(self):
-        return (
-            XenialCompilerVersionConfigNode
-            if self.props["compiler_name"]
-            else PyVerConfigNode
-        )
-
-
-class BionicCompilerConfigNode(TreeConfigNode):
-    def modify_label(self, label):
-        return label or "<unspecified>"
-
-    def init2(self, node_name):
-        self.props["compiler_name"] = node_name
-
-    # noinspection PyMethodMayBeStatic
-    def child_constructor(self):
-        return (
-            BionicCompilerVersionConfigNode
-            if self.props["compiler_name"]
-            else PyVerConfigNode
-        )
-
-
-class XenialCompilerVersionConfigNode(TreeConfigNode):
-    def init2(self, node_name):
-        self.props["compiler_version"] = node_name
-
-    # noinspection PyMethodMayBeStatic
-    def child_constructor(self):
-        return PyVerConfigNode
-
-
-class BionicCompilerVersionConfigNode(TreeConfigNode):
-    def init2(self, node_name):
-        self.props["compiler_version"] = node_name
-
-    # noinspection PyMethodMayBeStatic
-    def child_constructor(self):
-        return PyVerConfigNode
--- a/.circleci/cimodel/data/pytorch_build_definitions.py
+++ b/.circleci/cimodel/data/pytorch_build_definitions.py
@ -1,382 +0,0 @@
-from collections import OrderedDict
-from dataclasses import dataclass, field
-from typing import List, Optional
-
-import cimodel.data.dimensions as dimensions
-import cimodel.lib.conf_tree as conf_tree
-import cimodel.lib.miniutils as miniutils
-from cimodel.data.pytorch_build_data import CONFIG_TREE_DATA, TopLevelNode
-from cimodel.data.simple.util.branch_filters import gen_filter_dict, RC_PATTERN
-from cimodel.data.simple.util.docker_constants import gen_docker_image
-
-
-@dataclass
-class Conf:
-    distro: str
-    parms: List[str]
-    parms_list_ignored_for_docker_image: Optional[List[str]] = None
-    pyver: Optional[str] = None
-    cuda_version: Optional[str] = None
-    rocm_version: Optional[str] = None
-    # TODO expand this to cover all the USE_* that we want to test for
-    #  tesnrorrt, leveldb, lmdb, redis, opencv, mkldnn, ideep, etc.
-    # (from https://github.com/pytorch/pytorch/pull/17323#discussion_r259453608)
-    is_xla: bool = False
-    is_vulkan: bool = False
-    is_pure_torch: bool = False
-    restrict_phases: Optional[List[str]] = None
-    gpu_resource: Optional[str] = None
-    dependent_tests: List = field(default_factory=list)
-    parent_build: Optional["Conf"] = None
-    is_libtorch: bool = False
-    is_important: bool = False
-    parallel_backend: Optional[str] = None
-    build_only: bool = False
-
-    @staticmethod
-    def is_test_phase(phase):
-        return "test" in phase
-
-    # TODO: Eliminate the special casing for docker paths
-    # In the short term, we *will* need to support special casing as docker images are merged for caffe2 and pytorch
-    def get_parms(self, for_docker):
-        leading = []
-        # We just don't run non-important jobs on pull requests;
-        # previously we also named them in a way to make it obvious
-        # if self.is_important and not for_docker:
-        #    leading.append("AAA")
-        leading.append("pytorch")
-        if self.is_xla and not for_docker:
-            leading.append("xla")
-        if self.is_vulkan and not for_docker:
-            leading.append("vulkan")
-        if self.is_libtorch and not for_docker:
-            leading.append("libtorch")
-        if self.is_pure_torch and not for_docker:
-            leading.append("pure_torch")
-        if self.parallel_backend is not None and not for_docker:
-            leading.append(self.parallel_backend)
-
-        cuda_parms = []
-        if self.cuda_version:
-            cudnn = "cudnn8" if self.cuda_version.startswith("11.") else "cudnn7"
-            cuda_parms.extend(["cuda" + self.cuda_version, cudnn])
-        if self.rocm_version:
-            cuda_parms.extend([f"rocm{self.rocm_version}"])
-        result = leading + ["linux", self.distro] + cuda_parms + self.parms
-        if not for_docker and self.parms_list_ignored_for_docker_image is not None:
-            result = result + self.parms_list_ignored_for_docker_image
-        return result
-
-    def gen_docker_image_path(self):
-        parms_source = self.parent_build or self
-        base_build_env_name = "-".join(parms_source.get_parms(True))
-        image_name, _ = gen_docker_image(base_build_env_name)
-        return miniutils.quote(image_name)
-
-    def gen_docker_image_requires(self):
-        parms_source = self.parent_build or self
-        base_build_env_name = "-".join(parms_source.get_parms(True))
-        _, requires = gen_docker_image(base_build_env_name)
-        return miniutils.quote(requires)
-
-    def get_build_job_name_pieces(self, build_or_test):
-        return self.get_parms(False) + [build_or_test]
-
-    def gen_build_name(self, build_or_test):
-        return (
-            ("_".join(map(str, self.get_build_job_name_pieces(build_or_test))))
-            .replace(".", "_")
-            .replace("-", "_")
-        )
-
-    def get_dependents(self):
-        return self.dependent_tests or []
-
-    def gen_workflow_params(self, phase):
-        parameters = OrderedDict()
-        build_job_name_pieces = self.get_build_job_name_pieces(phase)
-
-        build_env_name = "-".join(map(str, build_job_name_pieces))
-        parameters["build_environment"] = miniutils.quote(build_env_name)
-        parameters["docker_image"] = self.gen_docker_image_path()
-        if Conf.is_test_phase(phase) and self.gpu_resource:
-            parameters["use_cuda_docker_runtime"] = miniutils.quote("1")
-        if Conf.is_test_phase(phase):
-            resource_class = "large"
-            if self.gpu_resource:
-                resource_class = "gpu." + self.gpu_resource
-            if self.rocm_version is not None:
-                resource_class = "pytorch/amd-gpu"
-            parameters["resource_class"] = resource_class
-        if phase == "build" and self.rocm_version is not None:
-            parameters["resource_class"] = "xlarge"
-        if hasattr(self, "filters"):
-            parameters["filters"] = self.filters
-        if self.build_only:
-            parameters["build_only"] = miniutils.quote(str(int(True)))
-        return parameters
-
-    def gen_workflow_job(self, phase):
-        job_def = OrderedDict()
-        job_def["name"] = self.gen_build_name(phase)
-
-        if Conf.is_test_phase(phase):
-            # TODO When merging the caffe2 and pytorch jobs, it might be convenient for a while to make a
-            #  caffe2 test job dependent on a pytorch build job. This way we could quickly dedup the repeated
-            #  build of pytorch in the caffe2 build job, and just run the caffe2 tests off of a completed
-            #  pytorch build job (from https://github.com/pytorch/pytorch/pull/17323#discussion_r259452641)
-
-            dependency_build = self.parent_build or self
-            job_def["requires"] = [dependency_build.gen_build_name("build")]
-            job_name = "pytorch_linux_test"
-        else:
-            job_name = "pytorch_linux_build"
-            job_def["requires"] = [self.gen_docker_image_requires()]
-
-        if not self.is_important:
-            job_def["filters"] = gen_filter_dict()
-        job_def.update(self.gen_workflow_params(phase))
-
-        return {job_name: job_def}
-
-
-# TODO This is a hack to special case some configs just for the workflow list
-class HiddenConf:
-    def __init__(self, name, parent_build=None, filters=None):
-        self.name = name
-        self.parent_build = parent_build
-        self.filters = filters
-
-    def gen_workflow_job(self, phase):
-        return {
-            self.gen_build_name(phase): {
-                "requires": [self.parent_build.gen_build_name("build")],
-                "filters": self.filters,
-            }
-        }
-
-    def gen_build_name(self, _):
-        return self.name
-
-
-class DocPushConf:
-    def __init__(self, name, parent_build=None, branch="master"):
-        self.name = name
-        self.parent_build = parent_build
-        self.branch = branch
-
-    def gen_workflow_job(self, phase):
-        return {
-            "pytorch_doc_push": {
-                "name": self.name,
-                "branch": self.branch,
-                "requires": [self.parent_build],
-                "context": "org-member",
-                "filters": gen_filter_dict(
-                    branches_list=["nightly"], tags_list=RC_PATTERN
-                ),
-            }
-        }
-
-
-def gen_docs_configs(xenial_parent_config):
-    configs = []
-
-    configs.append(
-        HiddenConf(
-            "pytorch_python_doc_build",
-            parent_build=xenial_parent_config,
-            filters=gen_filter_dict(
-                branches_list=["master", "main", "nightly"], tags_list=RC_PATTERN
-            ),
-        )
-    )
-    configs.append(
-        DocPushConf(
-            "pytorch_python_doc_push",
-            parent_build="pytorch_python_doc_build",
-            branch="site",
-        )
-    )
-
-    configs.append(
-        HiddenConf(
-            "pytorch_cpp_doc_build",
-            parent_build=xenial_parent_config,
-            filters=gen_filter_dict(
-                branches_list=["master", "main", "nightly"], tags_list=RC_PATTERN
-            ),
-        )
-    )
-    configs.append(
-        DocPushConf(
-            "pytorch_cpp_doc_push",
-            parent_build="pytorch_cpp_doc_build",
-            branch="master",
-        )
-    )
-    return configs
-
-
-def get_root():
-    return TopLevelNode("PyTorch Builds", CONFIG_TREE_DATA)
-
-
-def gen_tree():
-    root = get_root()
-    configs_list = conf_tree.dfs(root)
-    return configs_list
-
-
-def instantiate_configs(only_slow_gradcheck):
-    config_list = []
-
-    root = get_root()
-    found_configs = conf_tree.dfs(root)
-    for fc in found_configs:
-        restrict_phases = None
-        distro_name = fc.find_prop("distro_name")
-        compiler_name = fc.find_prop("compiler_name")
-        compiler_version = fc.find_prop("compiler_version")
-        is_xla = fc.find_prop("is_xla") or False
-        is_asan = fc.find_prop("is_asan") or False
-        is_crossref = fc.find_prop("is_crossref") or False
-        is_dynamo = fc.find_prop("is_dynamo") or False
-        is_onnx = fc.find_prop("is_onnx") or False
-        is_pure_torch = fc.find_prop("is_pure_torch") or False
-        is_vulkan = fc.find_prop("is_vulkan") or False
-        is_slow_gradcheck = fc.find_prop("is_slow_gradcheck") or False
-        parms_list_ignored_for_docker_image = []
-
-        if only_slow_gradcheck ^ is_slow_gradcheck:
-            continue
-
-        python_version = None
-        if compiler_name == "cuda" or compiler_name == "android":
-            python_version = fc.find_prop("pyver")
-            parms_list = [fc.find_prop("abbreviated_pyver")]
-        else:
-            parms_list = ["py" + fc.find_prop("pyver")]
-
-        cuda_version = None
-        rocm_version = None
-        if compiler_name == "cuda":
-            cuda_version = fc.find_prop("compiler_version")
-
-        elif compiler_name == "rocm":
-            rocm_version = fc.find_prop("compiler_version")
-            restrict_phases = ["build", "test1", "test2", "caffe2_test"]
-
-        elif compiler_name == "android":
-            android_ndk_version = fc.find_prop("compiler_version")
-            # TODO: do we need clang to compile host binaries like protoc?
-            parms_list.append("clang5")
-            parms_list.append("android-ndk-" + android_ndk_version)
-            android_abi = fc.find_prop("android_abi")
-            parms_list_ignored_for_docker_image.append(android_abi)
-            restrict_phases = ["build"]
-
-        elif compiler_name:
-            gcc_version = compiler_name + (fc.find_prop("compiler_version") or "")
-            parms_list.append(gcc_version)
-
-        if is_asan:
-            parms_list.append("asan")
-            python_version = fc.find_prop("pyver")
-            parms_list[0] = fc.find_prop("abbreviated_pyver")
-
-        if is_crossref:
-            parms_list_ignored_for_docker_image.append("crossref")
-
-        if is_dynamo:
-            parms_list_ignored_for_docker_image.append("dynamo")
-
-        if is_onnx:
-            parms_list.append("onnx")
-            python_version = fc.find_prop("pyver")
-            parms_list[0] = fc.find_prop("abbreviated_pyver")
-            restrict_phases = ["build", "ort_test1", "ort_test2"]
-
-        if cuda_version:
-            cuda_gcc_version = fc.find_prop("cuda_gcc_override") or "gcc7"
-            parms_list.append(cuda_gcc_version)
-
-        is_libtorch = fc.find_prop("is_libtorch") or False
-        is_important = fc.find_prop("is_important") or False
-        parallel_backend = fc.find_prop("parallel_backend") or None
-        build_only = fc.find_prop("build_only") or False
-        shard_test = fc.find_prop("shard_test") or False
-        # TODO: fix pure_torch python test packaging issue.
-        if shard_test:
-            restrict_phases = ["build"] if restrict_phases is None else restrict_phases
-            restrict_phases.extend(["test1", "test2"])
-        if build_only or is_pure_torch:
-            restrict_phases = ["build"]
-
-        if is_slow_gradcheck:
-            parms_list_ignored_for_docker_image.append("old")
-            parms_list_ignored_for_docker_image.append("gradcheck")
-
-        gpu_resource = None
-        if cuda_version and cuda_version != "10":
-            gpu_resource = "medium"
-
-        c = Conf(
-            distro_name,
-            parms_list,
-            parms_list_ignored_for_docker_image,
-            python_version,
-            cuda_version,
-            rocm_version,
-            is_xla,
-            is_vulkan,
-            is_pure_torch,
-            restrict_phases,
-            gpu_resource,
-            is_libtorch=is_libtorch,
-            is_important=is_important,
-            parallel_backend=parallel_backend,
-            build_only=build_only,
-        )
-
-        # run docs builds on "pytorch-linux-xenial-py3.7-gcc5.4". Docs builds
-        # should run on a CPU-only build that runs on all PRs.
-        # XXX should this be updated to a more modern build?
-        if (
-            distro_name == "xenial"
-            and fc.find_prop("pyver") == "3.7"
-            and cuda_version is None
-            and parallel_backend is None
-            and not is_vulkan
-            and not is_pure_torch
-            and compiler_name == "gcc"
-            and fc.find_prop("compiler_version") == "5.4"
-        ):
-            c.filters = gen_filter_dict(branches_list=r"/.*/", tags_list=RC_PATTERN)
-            c.dependent_tests = gen_docs_configs(c)
-
-        config_list.append(c)
-
-    return config_list
-
-
-def get_workflow_jobs(only_slow_gradcheck=False):
-    config_list = instantiate_configs(only_slow_gradcheck)
-
-    x = []
-    for conf_options in config_list:
-        phases = conf_options.restrict_phases or dimensions.PHASES
-
-        for phase in phases:
-            # TODO why does this not have a test?
-            if Conf.is_test_phase(phase) and conf_options.cuda_version == "10":
-                continue
-
-            x.append(conf_options.gen_workflow_job(phase))
-
-        # TODO convert to recursion
-        for conf in conf_options.get_dependents():
-            x.append(conf.gen_workflow_job("test"))
-
-    return x
--- a/.circleci/cimodel/data/simple/init.py
+++ b/.circleci/cimodel/data/simple/init.py
--- a/.circleci/cimodel/data/simple/docker_definitions.py
+++ b/.circleci/cimodel/data/simple/docker_definitions.py
@ -1,39 +0,0 @@
-from collections import OrderedDict
-
-from cimodel.data.simple.util.branch_filters import gen_filter_dict, RC_PATTERN
-
-from cimodel.lib.miniutils import quote
-
-
-# NOTE: All hardcoded docker image builds have been migrated to GHA
-IMAGE_NAMES = []
-
-# This entry should be an element from the list above
-# This should contain the image matching the "slow_gradcheck" entry in
-# pytorch_build_data.py
-SLOW_GRADCHECK_IMAGE_NAME = "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
-
-
-def get_workflow_jobs(images=IMAGE_NAMES, only_slow_gradcheck=False):
-    """Generates a list of docker image build definitions"""
-    ret = []
-    for image_name in images:
-        if image_name.startswith("docker-"):
-            image_name = image_name.lstrip("docker-")
-        if only_slow_gradcheck and image_name is not SLOW_GRADCHECK_IMAGE_NAME:
-            continue
-
-        parameters = OrderedDict(
-            {
-                "name": quote(f"docker-{image_name}"),
-                "image_name": quote(image_name),
-            }
-        )
-        if image_name == "pytorch-linux-xenial-py3.7-gcc5.4":
-            # pushing documentation on tags requires CircleCI to also
-            # build all the dependencies on tags, including this docker image
-            parameters["filters"] = gen_filter_dict(
-                branches_list=r"/.*/", tags_list=RC_PATTERN
-            )
-        ret.append(OrderedDict({"docker_build_job": parameters}))
-    return ret
--- a/.circleci/cimodel/data/simple/ios_definitions.py
+++ b/.circleci/cimodel/data/simple/ios_definitions.py
@ -1,100 +0,0 @@
-import cimodel.lib.miniutils as miniutils
-from cimodel.data.simple.util.branch_filters import gen_filter_dict_exclude
-from cimodel.data.simple.util.versions import MultiPartVersion
-
-XCODE_VERSION = MultiPartVersion([12, 5, 1])
-
-
-class ArchVariant:
-    def __init__(self, name, custom_build_name=""):
-        self.name = name
-        self.custom_build_name = custom_build_name
-
-    def render(self):
-        extra_parts = (
-            [self.custom_build_name] if len(self.custom_build_name) > 0 else []
-        )
-        return "-".join([self.name] + extra_parts).replace("_", "-")
-
-
-def get_platform(arch_variant_name):
-    return "SIMULATOR" if arch_variant_name == "x86_64" else "OS"
-
-
-class IOSJob:
-    def __init__(
-        self, xcode_version, arch_variant, is_org_member_context=True, extra_props=None
-    ):
-        self.xcode_version = xcode_version
-        self.arch_variant = arch_variant
-        self.is_org_member_context = is_org_member_context
-        self.extra_props = extra_props
-
-    def gen_name_parts(self):
-        version_parts = self.xcode_version.render_dots_or_parts("-")
-        build_variant_suffix = self.arch_variant.render()
-        return (
-            [
-                "ios",
-            ]
-            + version_parts
-            + [
-                build_variant_suffix,
-            ]
-        )
-
-    def gen_job_name(self):
-        return "-".join(self.gen_name_parts())
-
-    def gen_tree(self):
-        platform_name = get_platform(self.arch_variant.name)
-        props_dict = {
-            "name": self.gen_job_name(),
-            "build_environment": self.gen_job_name(),
-            "ios_arch": self.arch_variant.name,
-            "ios_platform": platform_name,
-        }
-
-        if self.is_org_member_context:
-            props_dict["context"] = "org-member"
-
-        if self.extra_props:
-            props_dict.update(self.extra_props)
-
-        props_dict["filters"] = gen_filter_dict_exclude()
-
-        return [{"pytorch_ios_build": props_dict}]
-
-
-WORKFLOW_DATA = [
-    IOSJob(
-        XCODE_VERSION,
-        ArchVariant("x86_64"),
-        is_org_member_context=False,
-        extra_props={"lite_interpreter": miniutils.quote(str(int(True)))},
-    ),
-    # IOSJob(XCODE_VERSION, ArchVariant("arm64"), extra_props={
-    #     "lite_interpreter": miniutils.quote(str(int(True)))}),
-    # IOSJob(XCODE_VERSION, ArchVariant("arm64", "metal"), extra_props={
-    #     "use_metal": miniutils.quote(str(int(True))),
-    #     "lite_interpreter": miniutils.quote(str(int(True)))}),
-    # IOSJob(XCODE_VERSION, ArchVariant("arm64", "custom-ops"), extra_props={
-    #     "op_list": "mobilenetv2.yaml",
-    #     "lite_interpreter": miniutils.quote(str(int(True)))}),
-    IOSJob(
-        XCODE_VERSION,
-        ArchVariant("x86_64", "coreml"),
-        is_org_member_context=False,
-        extra_props={
-            "use_coreml": miniutils.quote(str(int(True))),
-            "lite_interpreter": miniutils.quote(str(int(True))),
-        },
-    ),
-    # IOSJob(XCODE_VERSION, ArchVariant("arm64", "coreml"), extra_props={
-    #     "use_coreml": miniutils.quote(str(int(True))),
-    #     "lite_interpreter": miniutils.quote(str(int(True)))}),
-]
-
-
-def get_workflow_jobs():
-    return [item.gen_tree() for item in WORKFLOW_DATA]
--- a/.circleci/cimodel/data/simple/macos_definitions.py
+++ b/.circleci/cimodel/data/simple/macos_definitions.py
@ -1,54 +0,0 @@
-class MacOsJob:
-    def __init__(self, os_version, is_build=False, is_test=False, extra_props=tuple()):
-        # extra_props is tuple type, because mutable data structures for argument defaults
-        # is not recommended.
-        self.os_version = os_version
-        self.is_build = is_build
-        self.is_test = is_test
-        self.extra_props = dict(extra_props)
-
-    def gen_tree(self):
-        non_phase_parts = ["pytorch", "macos", self.os_version, "py3"]
-
-        extra_name_list = [name for name, exist in self.extra_props.items() if exist]
-        full_job_name_list = (
-            non_phase_parts
-            + extra_name_list
-            + [
-                "build" if self.is_build else None,
-                "test" if self.is_test else None,
-            ]
-        )
-
-        full_job_name = "_".join(list(filter(None, full_job_name_list)))
-
-        test_build_dependency = "_".join(non_phase_parts + ["build"])
-        extra_dependencies = [test_build_dependency] if self.is_test else []
-        job_dependencies = extra_dependencies
-
-        # Yes we name the job after itself, it needs a non-empty value in here
-        # for the YAML output to work.
-        props_dict = {"requires": job_dependencies, "name": full_job_name}
-
-        return [{full_job_name: props_dict}]
-
-
-WORKFLOW_DATA = [
-    MacOsJob("10_15", is_build=True),
-    MacOsJob("10_13", is_build=True),
-    MacOsJob(
-        "10_13",
-        is_build=False,
-        is_test=True,
-    ),
-    MacOsJob(
-        "10_13",
-        is_build=True,
-        is_test=True,
-        extra_props=tuple({"lite_interpreter": True}.items()),
-    ),
-]
-
-
-def get_workflow_jobs():
-    return [item.gen_tree() for item in WORKFLOW_DATA]
--- a/.circleci/cimodel/data/simple/mobile_definitions.py
+++ b/.circleci/cimodel/data/simple/mobile_definitions.py
@ -1,51 +0,0 @@
-"""
-PyTorch Mobile PR builds (use linux host toolchain + mobile build options)
-"""
-
-import cimodel.data.simple.util.branch_filters
-import cimodel.lib.miniutils as miniutils
-
-
-class MobileJob:
-    def __init__(
-        self, docker_image, docker_requires, variant_parts, is_master_only=False
-    ):
-        self.docker_image = docker_image
-        self.docker_requires = docker_requires
-        self.variant_parts = variant_parts
-        self.is_master_only = is_master_only
-
-    def gen_tree(self):
-        non_phase_parts = [
-            "pytorch",
-            "linux",
-            "xenial",
-            "py3",
-            "clang5",
-            "mobile",
-        ] + self.variant_parts
-
-        full_job_name = "_".join(non_phase_parts)
-        build_env_name = "-".join(non_phase_parts)
-
-        props_dict = {
-            "build_environment": build_env_name,
-            "build_only": miniutils.quote(str(int(True))),
-            "docker_image": self.docker_image,
-            "requires": self.docker_requires,
-            "name": full_job_name,
-        }
-
-        if self.is_master_only:
-            props_dict[
-                "filters"
-            ] = cimodel.data.simple.util.branch_filters.gen_filter_dict()
-
-        return [{"pytorch_linux_build": props_dict}]
-
-
-WORKFLOW_DATA = []
-
-
-def get_workflow_jobs():
-    return [item.gen_tree() for item in WORKFLOW_DATA]
--- a/.circleci/cimodel/data/simple/nightly_ios.py
+++ b/.circleci/cimodel/data/simple/nightly_ios.py
@ -1,96 +0,0 @@
-import cimodel.data.simple.ios_definitions as ios_definitions
-import cimodel.lib.miniutils as miniutils
-
-
-class IOSNightlyJob:
-    def __init__(self, variant, is_full_jit=False, is_upload=False):
-        self.variant = variant
-        self.is_full_jit = is_full_jit
-        self.is_upload = is_upload
-
-    def get_phase_name(self):
-        return "upload" if self.is_upload else "build"
-
-    def get_common_name_pieces(self, sep):
-        extra_name_suffix = [self.get_phase_name()] if self.is_upload else []
-
-        extra_name = ["full_jit"] if self.is_full_jit else []
-
-        common_name_pieces = (
-            [
-                "ios",
-            ]
-            + extra_name
-            + []
-            + ios_definitions.XCODE_VERSION.render_dots_or_parts(sep)
-            + [
-                "nightly",
-                self.variant,
-                "build",
-            ]
-            + extra_name_suffix
-        )
-
-        return common_name_pieces
-
-    def gen_job_name(self):
-        return "_".join(["pytorch"] + self.get_common_name_pieces(None))
-
-    def gen_tree(self):
-        build_configs = BUILD_CONFIGS_FULL_JIT if self.is_full_jit else BUILD_CONFIGS
-        extra_requires = (
-            [x.gen_job_name() for x in build_configs] if self.is_upload else []
-        )
-
-        props_dict = {
-            "build_environment": "-".join(
-                ["libtorch"] + self.get_common_name_pieces(".")
-            ),
-            "requires": extra_requires,
-            "context": "org-member",
-            "filters": {"branches": {"only": "nightly"}},
-        }
-
-        if not self.is_upload:
-            props_dict["ios_arch"] = self.variant
-            props_dict["ios_platform"] = ios_definitions.get_platform(self.variant)
-            props_dict["name"] = self.gen_job_name()
-            props_dict["use_metal"] = miniutils.quote(str(int(True)))
-            props_dict["use_coreml"] = miniutils.quote(str(int(True)))
-
-        if self.is_full_jit:
-            props_dict["lite_interpreter"] = miniutils.quote(str(int(False)))
-
-        template_name = "_".join(
-            [
-                "binary",
-                "ios",
-                self.get_phase_name(),
-            ]
-        )
-
-        return [{template_name: props_dict}]
-
-
-BUILD_CONFIGS = [
-    IOSNightlyJob("x86_64"),
-    IOSNightlyJob("arm64"),
-]
-
-BUILD_CONFIGS_FULL_JIT = [
-    IOSNightlyJob("x86_64", is_full_jit=True),
-    IOSNightlyJob("arm64", is_full_jit=True),
-]
-
-WORKFLOW_DATA = (
-    BUILD_CONFIGS
-    + BUILD_CONFIGS_FULL_JIT
-    + [
-        IOSNightlyJob("binary", is_full_jit=False, is_upload=True),
-        IOSNightlyJob("binary", is_full_jit=True, is_upload=True),
-    ]
-)
-
-
-def get_workflow_jobs():
-    return [item.gen_tree() for item in WORKFLOW_DATA]
--- a/.circleci/cimodel/data/simple/util/init.py
+++ b/.circleci/cimodel/data/simple/util/init.py
--- a/.circleci/cimodel/data/simple/util/branch_filters.py
+++ b/.circleci/cimodel/data/simple/util/branch_filters.py
@ -1,36 +0,0 @@
-NON_PR_BRANCH_LIST = [
-    "main",
-    "master",
-    r"/ci-all\/.*/",
-    r"/release\/.*/",
-]
-
-PR_BRANCH_LIST = [
-    r"/gh\/.*\/head/",
-    r"/pull\/.*/",
-]
-
-RC_PATTERN = r"/v[0-9]+(\.[0-9]+)*-rc[0-9]+/"
-
-MAC_IOS_EXCLUSION_LIST = ["nightly", "postnightly"]
-
-
-def gen_filter_dict(branches_list=NON_PR_BRANCH_LIST, tags_list=None):
-    """Generates a filter dictionary for use with CircleCI's job filter"""
-    filter_dict = {
-        "branches": {
-            "only": branches_list,
-        },
-    }
-
-    if tags_list is not None:
-        filter_dict["tags"] = {"only": tags_list}
-    return filter_dict
-
-
-def gen_filter_dict_exclude(branches_list=MAC_IOS_EXCLUSION_LIST):
-    return {
-        "branches": {
-            "ignore": branches_list,
-        },
-    }
--- a/.circleci/cimodel/data/simple/util/docker_constants.py
+++ b/.circleci/cimodel/data/simple/util/docker_constants.py
@ -1,35 +0,0 @@
-AWS_DOCKER_HOST = "308535385114.dkr.ecr.us-east-1.amazonaws.com"
-
-
-def gen_docker_image(container_type):
-    return (
-        "/".join([AWS_DOCKER_HOST, "pytorch", container_type]),
-        f"docker-{container_type}",
-    )
-
-
-def gen_docker_image_requires(image_name):
-    return [f"docker-{image_name}"]
-
-
-DOCKER_IMAGE_BASIC, DOCKER_REQUIREMENT_BASE = gen_docker_image(
-    "pytorch-linux-xenial-py3.7-gcc5.4"
-)
-
-DOCKER_IMAGE_CUDA_10_2, DOCKER_REQUIREMENT_CUDA_10_2 = gen_docker_image(
-    "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
-)
-
-DOCKER_IMAGE_GCC7, DOCKER_REQUIREMENT_GCC7 = gen_docker_image(
-    "pytorch-linux-xenial-py3.7-gcc7"
-)
-
-
-def gen_mobile_docker(specifier):
-    container_type = "pytorch-linux-xenial-py3-clang5-" + specifier
-    return gen_docker_image(container_type)
-
-
-DOCKER_IMAGE_ASAN, DOCKER_REQUIREMENT_ASAN = gen_mobile_docker("asan")
-
-DOCKER_IMAGE_NDK, DOCKER_REQUIREMENT_NDK = gen_mobile_docker("android-ndk-r21e")
--- a/.circleci/cimodel/data/simple/util/versions.py
+++ b/.circleci/cimodel/data/simple/util/versions.py
@ -1,36 +0,0 @@
-from typing import Optional
-
-
-class MultiPartVersion:
-    def __init__(self, parts, prefix=""):
-        self.parts = parts
-        self.prefix = prefix
-
-    def prefixed_parts(self):
-        """
-        Prepends the first element of the version list
-        with the prefix string.
-        """
-        if self.parts:
-            return [self.prefix + str(self.parts[0])] + [
-                str(part) for part in self.parts[1:]
-            ]
-        else:
-            return [self.prefix]
-
-    def render_dots_or_parts(self, sep: Optional[str] = None):
-        if sep is None:
-            return self.prefixed_parts()
-        else:
-            return [sep.join(self.prefixed_parts())]
-
-
-class CudaVersion(MultiPartVersion):
-    def __init__(self, major, minor):
-        self.major = major
-        self.minor = minor
-
-        super().__init__([self.major, self.minor], "cuda")
-
-    def __str__(self):
-        return f"{self.major}.{self.minor}"
--- a/.circleci/cimodel/lib/init.py
+++ b/.circleci/cimodel/lib/init.py
--- a/.circleci/cimodel/lib/conf_tree.py
+++ b/.circleci/cimodel/lib/conf_tree.py
@ -1,111 +0,0 @@
-from dataclasses import dataclass, field
-from typing import Dict, Optional
-
-
-def X(val):
-    """
-    Compact way to write a leaf node
-    """
-    return val, []
-
-
-def XImportant(name):
-    """Compact way to write an important (run on PRs) leaf node"""
-    return (name, [("important", [X(True)])])
-
-
-@dataclass
-class Ver:
-    """
-    Represents a product with a version number
-    """
-
-    name: str
-    version: str = ""
-
-    def __str__(self):
-        return self.name + self.version
-
-
-@dataclass
-class ConfigNode:
-    parent: Optional["ConfigNode"]
-    node_name: str
-    props: Dict[str, str] = field(default_factory=dict)
-
-    def get_label(self):
-        return self.node_name
-
-    # noinspection PyMethodMayBeStatic
-    def get_children(self):
-        return []
-
-    def get_parents(self):
-        return (
-            (self.parent.get_parents() + [self.parent.get_label()])
-            if self.parent
-            else []
-        )
-
-    def get_depth(self):
-        return len(self.get_parents())
-
-    def get_node_key(self):
-        return "%".join(self.get_parents() + [self.get_label()])
-
-    def find_prop(self, propname, searched=None):
-        """
-        Checks if its own dictionary has
-        the property, otherwise asks parent node.
-        """
-
-        if searched is None:
-            searched = []
-
-        searched.append(self.node_name)
-
-        if propname in self.props:
-            return self.props[propname]
-        elif self.parent:
-            return self.parent.find_prop(propname, searched)
-        else:
-            # raise Exception('Property "%s" does not exist anywhere in the tree! Searched: %s' % (propname, searched))
-            return None
-
-
-def dfs_recurse(
-    node,
-    leaf_callback=lambda x: None,
-    discovery_callback=lambda x, y, z: None,
-    child_callback=lambda x, y: None,
-    sibling_index=0,
-    sibling_count=1,
-):
-    discovery_callback(node, sibling_index, sibling_count)
-
-    node_children = node.get_children()
-    if node_children:
-        for i, child in enumerate(node_children):
-            child_callback(node, child)
-
-            dfs_recurse(
-                child,
-                leaf_callback,
-                discovery_callback,
-                child_callback,
-                i,
-                len(node_children),
-            )
-    else:
-        leaf_callback(node)
-
-
-def dfs(toplevel_config_node):
-    config_list = []
-
-    def leaf_callback(node):
-        config_list.append(node)
-
-    dfs_recurse(toplevel_config_node, leaf_callback)
-
-    return config_list
--- a/.circleci/cimodel/lib/miniutils.py
+++ b/.circleci/cimodel/lib/miniutils.py
@ -1,10 +0,0 @@
-def quote(s):
-    return sandwich('"', s)
-
-
-def sandwich(bread, jam):
-    return bread + jam + bread
-
-
-def override(word, substitutions):
-    return substitutions.get(word, word)
--- a/.circleci/cimodel/lib/miniyaml.py
+++ b/.circleci/cimodel/lib/miniyaml.py
@ -1,51 +0,0 @@
-from collections import OrderedDict
-
-import cimodel.lib.miniutils as miniutils
-
-
-LIST_MARKER = "- "
-INDENTATION_WIDTH = 2
-
-
-def is_dict(data):
-    return type(data) in [dict, OrderedDict]
-
-
-def is_collection(data):
-    return is_dict(data) or type(data) is list
-
-
-def render(fh, data, depth, is_list_member=False):
-    """
-    PyYaml does not allow precise control over the quoting
-    behavior, especially for merge references.
-    Therefore, we use this custom YAML renderer.
-    """
-
-    indentation = " " * INDENTATION_WIDTH * depth
-
-    if is_dict(data):
-        tuples = list(data.items())
-        if type(data) is not OrderedDict:
-            tuples.sort()
-
-        for i, (k, v) in enumerate(tuples):
-            if not v:
-                continue
-            # If this dict is itself a list member, the first key gets prefixed with a list marker
-            list_marker_prefix = LIST_MARKER if is_list_member and not i else ""
-
-            trailing_whitespace = "\n" if is_collection(v) else " "
-            fh.write(indentation + list_marker_prefix + k + ":" + trailing_whitespace)
-
-            render(fh, v, depth + 1 + int(is_list_member))
-
-    elif type(data) is list:
-        for v in data:
-            render(fh, v, depth, True)
-
-    else:
-        # use empty quotes to denote an empty string value instead of blank space
-        modified_data = miniutils.quote(data) if data == "" else data
-        list_member_prefix = indentation + LIST_MARKER if is_list_member else ""
-        fh.write(list_member_prefix + str(modified_data) + "\n")
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
--- a/.circleci/ensure-consistency.py
+++ b/.circleci/ensure-consistency.py
@ -1,41 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-import subprocess
-import sys
-import tempfile
-
-import generate_config_yml
-
-
-CHECKED_IN_FILE = "config.yml"
-REGENERATION_SCRIPT = "regenerate.sh"
-
-PARENT_DIR = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
-README_PATH = os.path.join(PARENT_DIR, "README.md")
-
-ERROR_MESSAGE_TEMPLATE = """
-The checked-in CircleCI "%s" file does not match what was generated by the scripts.
-Please re-run the "%s" script in the "%s" directory and commit the result. See "%s" for more information.
-"""
-
-
-def check_consistency():
-    _, temp_filename = tempfile.mkstemp("-generated-config.yml")
-
-    with open(temp_filename, "w") as fh:
-        generate_config_yml.stitch_sources(fh)
-
-    try:
-        subprocess.check_call(["cmp", temp_filename, CHECKED_IN_FILE])
-    except subprocess.CalledProcessError:
-        sys.exit(
-            ERROR_MESSAGE_TEMPLATE
-            % (CHECKED_IN_FILE, REGENERATION_SCRIPT, PARENT_DIR, README_PATH)
-        )
-    finally:
-        os.remove(temp_filename)
-
-
-if __name__ == "__main__":
-    check_consistency()
--- a/.circleci/generate_config_yml.py
+++ b/.circleci/generate_config_yml.py
@ -1,196 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-This script is the source of truth for config.yml.
-Please see README.md in this directory for details.
-"""
-
-import os
-import shutil
-import sys
-from collections import namedtuple
-
-import cimodel.data.simple.docker_definitions
-import cimodel.data.simple.mobile_definitions
-import cimodel.data.simple.nightly_ios
-import cimodel.lib.miniutils as miniutils
-import cimodel.lib.miniyaml as miniyaml
-
-
-class File:
-    """
-    Verbatim copy the contents of a file into config.yml
-    """
-
-    def __init__(self, filename):
-        self.filename = filename
-
-    def write(self, output_filehandle):
-        with open(os.path.join("verbatim-sources", self.filename)) as fh:
-            shutil.copyfileobj(fh, output_filehandle)
-
-
-class FunctionGen(namedtuple("FunctionGen", "function depth")):
-    __slots__ = ()
-
-
-class Treegen(FunctionGen):
-    """
-    Insert the content of a YAML tree into config.yml
-    """
-
-    def write(self, output_filehandle):
-        miniyaml.render(output_filehandle, self.function(), self.depth)
-
-
-class Listgen(FunctionGen):
-    """
-    Insert the content of a YAML list into config.yml
-    """
-
-    def write(self, output_filehandle):
-        miniyaml.render(output_filehandle, self.function(), self.depth)
-
-
-def horizontal_rule():
-    return "".join("#" * 78)
-
-
-class Header:
-    def __init__(self, title, summary=None):
-        self.title = title
-        self.summary_lines = summary or []
-
-    def write(self, output_filehandle):
-        text_lines = [self.title] + self.summary_lines
-        comment_lines = ["# " + x for x in text_lines]
-        lines = miniutils.sandwich([horizontal_rule()], comment_lines)
-
-        for line in filter(None, lines):
-            output_filehandle.write(line + "\n")
-
-
-def _for_all_items(items, functor) -> None:
-    if isinstance(items, list):
-        for item in items:
-            _for_all_items(item, functor)
-    if isinstance(items, dict) and len(items) == 1:
-        item_type, item = next(iter(items.items()))
-        functor(item_type, item)
-
-
-def filter_master_only_jobs(items):
-    def _is_main_or_master_item(item):
-        filters = item.get("filters", None)
-        branches = filters.get("branches", None) if filters is not None else None
-        branches_only = branches.get("only", None) if branches is not None else None
-        return (
-            ("main" in branches_only or "master" in branches_only)
-            if branches_only is not None
-            else False
-        )
-
-    master_deps = set()
-
-    def _save_requires_if_master(item_type, item):
-        requires = item.get("requires", None)
-        item_name = item.get("name", None)
-        if not isinstance(requires, list):
-            return
-        if _is_main_or_master_item(item) or item_name in master_deps:
-            master_deps.update([n.strip('"') for n in requires])
-
-    def _do_filtering(items):
-        if isinstance(items, list):
-            rc = [_do_filtering(item) for item in items]
-            return [item for item in rc if len(item if item is not None else []) > 0]
-        assert isinstance(items, dict) and len(items) == 1
-        item_type, item = next(iter(items.items()))
-        item_name = item.get("name", None)
-        item_name = item_name.strip('"') if item_name is not None else None
-        if not _is_main_or_master_item(item) and item_name not in master_deps:
-            return None
-        if "filters" in item:
-            item = item.copy()
-            item.pop("filters")
-        return {item_type: item}
-
-    # Scan of dependencies twice to pick up nested required jobs
-    # I.e. jobs depending on jobs that main-only job depend on
-    _for_all_items(items, _save_requires_if_master)
-    _for_all_items(items, _save_requires_if_master)
-    return _do_filtering(items)
-
-
-def generate_required_docker_images(items):
-    required_docker_images = set()
-
-    def _requires_docker_image(item_type, item):
-        requires = item.get("requires", None)
-        if not isinstance(requires, list):
-            return
-        for requirement in requires:
-            requirement = requirement.replace('"', "")
-            if requirement.startswith("docker-"):
-                required_docker_images.add(requirement)
-
-    _for_all_items(items, _requires_docker_image)
-    return required_docker_images
-
-
-def gen_build_workflows_tree():
-    build_workflows_functions = [
-        cimodel.data.simple.mobile_definitions.get_workflow_jobs,
-        cimodel.data.simple.nightly_ios.get_workflow_jobs,
-    ]
-    build_jobs = [f() for f in build_workflows_functions]
-    build_jobs.extend(
-        cimodel.data.simple.docker_definitions.get_workflow_jobs(
-            # sort for consistency
-            sorted(generate_required_docker_images(build_jobs))
-        )
-    )
-    master_build_jobs = filter_master_only_jobs(build_jobs)
-
-    rc = {
-        "workflows": {
-            "build": {
-                "when": r"<< pipeline.parameters.run_build >>",
-                "jobs": build_jobs,
-            },
-        }
-    }
-    if len(master_build_jobs) > 0:
-        rc["workflows"]["master_build"] = {
-            "when": r"<< pipeline.parameters.run_master_build >>",
-            "jobs": master_build_jobs,
-        }
-    return rc
-
-
-# Order of this list matters to the generated config.yml.
-YAML_SOURCES = [
-    File("header-section.yml"),
-    File("commands.yml"),
-    File("nightly-binary-build-defaults.yml"),
-    Header("Build parameters"),
-    File("build-parameters/pytorch-build-params.yml"),
-    File("build-parameters/binary-build-params.yml"),
-    Header("Job specs"),
-    File("job-specs/binary-job-specs.yml"),
-    File("job-specs/job-specs-custom.yml"),
-    File("job-specs/binary_update_htmls.yml"),
-    File("job-specs/binary-build-tests.yml"),
-    File("job-specs/docker_jobs.yml"),
-    Header("Workflows"),
-    Treegen(gen_build_workflows_tree, 0),
-]
-
-
-def stitch_sources(output_filehandle):
-    for f in YAML_SOURCES:
-        f.write(output_filehandle)
-
-
-if __name__ == "__main__":
-    stitch_sources(sys.stdout)
--- a/.circleci/regenerate.ps1
+++ b/.circleci/regenerate.ps1
@ -1,5 +0,0 @@
-cd $PSScriptRoot;
-$NewFile = New-TemporaryFile;
-python generate_config_yml.py > $NewFile.name
-(Get-Content $NewFile.name -Raw).TrimEnd().Replace("`r`n","`n") | Set-Content config.yml -Force
-Remove-Item $NewFile.name
--- a/.circleci/regenerate.sh
+++ b/.circleci/regenerate.sh
@ -1,17 +0,0 @@
-#!/bin/bash -e
-
-# Allows this script to be invoked from any directory:
-cd "$(dirname "$0")"
-
-UNCOMMIT_CHANGE=$(git status -s | grep " config.yml" | wc -l | xargs)
-if [[ $UNCOMMIT_CHANGE != 0 ]]; then
-    OLD_FILE=$(mktemp)
-    cp config.yml "$OLD_FILE"
-    echo "Uncommitted change detected in .circleci/config.yml"
-    echo "It has been backed up to $OLD_FILE"
-fi
-
-NEW_FILE=$(mktemp)
-./generate_config_yml.py > "$NEW_FILE"
-cp "$NEW_FILE" config.yml
-echo "New config generated in .circleci/config.yml"
--- a/.circleci/scripts/binary_populate_env.sh
+++ b/.circleci/scripts/binary_populate_env.sh
@ -58,8 +58,7 @@ fi
 PIP_UPLOAD_FOLDER='nightly/'
 # We put this here so that OVERRIDE_PACKAGE_VERSION below can read from it
 export DATE="$(date -u +%Y%m%d)"
-#TODO: We should be pulling semver version from the base version.txt
-BASE_BUILD_VERSION="2.2.0.dev$DATE"
+BASE_BUILD_VERSION="$(cat ${PYTORCH_ROOT}/version.txt|cut -da -f1).dev${DATE}"
 # Change BASE_BUILD_VERSION to git tag when on a git tag
 # Use 'git -C' to make doubly sure we're in the correct directory for checking
 # the git tag
--- a/.circleci/verbatim-sources/build-parameters/binary-build-params.yml
+++ b/.circleci/verbatim-sources/build-parameters/binary-build-params.yml
@ -1,65 +0,0 @@
-binary_linux_build_params: &binary_linux_build_params
-  parameters:
-    build_environment:
-      type: string
-      default: ""
-    docker_image:
-      type: string
-      default: ""
-    libtorch_variant:
-      type: string
-      default: ""
-    resource_class:
-      type: string
-      default: "2xlarge+"
-  environment:
-    BUILD_ENVIRONMENT: << parameters.build_environment >>
-    LIBTORCH_VARIANT: << parameters.libtorch_variant >>
-    ANACONDA_USER: pytorch
-  resource_class: << parameters.resource_class >>
-  docker:
-    - image: << parameters.docker_image >>
-
-binary_linux_test_upload_params: &binary_linux_test_upload_params
-  parameters:
-    build_environment:
-      type: string
-      default: ""
-    docker_image:
-      type: string
-      default: ""
-    libtorch_variant:
-      type: string
-      default: ""
-    resource_class:
-      type: string
-      default: "medium"
-    use_cuda_docker_runtime:
-      type: string
-      default: ""
-  environment:
-    BUILD_ENVIRONMENT: << parameters.build_environment >>
-    DOCKER_IMAGE: << parameters.docker_image >>
-    USE_CUDA_DOCKER_RUNTIME: << parameters.use_cuda_docker_runtime >>
-    LIBTORCH_VARIANT: << parameters.libtorch_variant >>
-  resource_class: << parameters.resource_class >>
-
-binary_mac_params: &binary_mac_params
-  parameters:
-    build_environment:
-      type: string
-      default: ""
-  environment:
-    BUILD_ENVIRONMENT: << parameters.build_environment >>
-
-binary_windows_params: &binary_windows_params
-  parameters:
-    build_environment:
-      type: string
-      default: ""
-    executor:
-      type: string
-      default: "windows-xlarge-cpu-with-nvidia-cuda"
-  environment:
-    BUILD_ENVIRONMENT: << parameters.build_environment >>
-    JOB_EXECUTOR: <<parameters.executor>>
--- a/.circleci/verbatim-sources/build-parameters/pytorch-build-params.yml
+++ b/.circleci/verbatim-sources/build-parameters/pytorch-build-params.yml
@ -1,105 +0,0 @@
-pytorch_params: &pytorch_params
-  parameters:
-    build_environment:
-      type: string
-      default: ""
-    docker_image:
-      type: string
-      default: ""
-    resource_class:
-      type: string
-      default: "large"
-    use_cuda_docker_runtime:
-      type: string
-      default: ""
-    build_only:
-      type: string
-      default: ""
-    ci_master:
-      type: string
-      default: ""
-  environment:
-    BUILD_ENVIRONMENT: << parameters.build_environment >>
-    DOCKER_IMAGE: << parameters.docker_image >>
-    USE_CUDA_DOCKER_RUNTIME: << parameters.use_cuda_docker_runtime >>
-    BUILD_ONLY: << parameters.build_only >>
-    CI_MASTER: << pipeline.parameters.run_master_build >>
-  resource_class: << parameters.resource_class >>
-
-pytorch_ios_params: &pytorch_ios_params
-  parameters:
-    build_environment:
-      type: string
-      default: ""
-    ios_arch:
-      type: string
-      default: ""
-    ios_platform:
-      type: string
-      default: ""
-    op_list:
-      type: string
-      default: ""
-    use_metal:
-      type: string
-      default: "0"
-    lite_interpreter:
-      type: string
-      default: "1"
-    use_coreml:
-      type: string
-      default: "0"
-  environment:
-    BUILD_ENVIRONMENT: << parameters.build_environment >>
-    IOS_ARCH: << parameters.ios_arch >>
-    IOS_PLATFORM: << parameters.ios_platform >>
-    SELECTED_OP_LIST: << parameters.op_list >>
-    USE_PYTORCH_METAL: << parameters.use_metal >>
-    BUILD_LITE_INTERPRETER: << parameters.lite_interpreter >>
-    USE_COREML_DELEGATE: << parameters.use_coreml >>
-
-pytorch_windows_params: &pytorch_windows_params
-  parameters:
-    executor:
-      type: string
-      default: "windows-xlarge-cpu-with-nvidia-cuda"
-    build_environment:
-      type: string
-      default: ""
-    test_name:
-      type: string
-      default: ""
-    cuda_version:
-      type: string
-      default: "10.1"
-    python_version:
-      type: string
-      default: "3.8"
-    vs_version:
-      type: string
-      default: "16.8.6"
-    vc_version:
-      type: string
-      default: "14.16"
-    vc_year:
-      type: string
-      default: "2019"
-    vc_product:
-      type: string
-      default: "BuildTools"
-    use_cuda:
-      type: string
-      default: ""
-  environment:
-    BUILD_ENVIRONMENT: <<parameters.build_environment>>
-    SCCACHE_BUCKET: "ossci-compiler-cache"
-    CUDA_VERSION: <<parameters.cuda_version>>
-    PYTHON_VERSION: <<parameters.python_version>>
-    VS_VERSION: <<parameters.vs_version>>
-    VC_VERSION: <<parameters.vc_version>>
-    VC_YEAR: <<parameters.vc_year>>
-    VC_PRODUCT: <<parameters.vc_product>>
-    USE_CUDA: <<parameters.use_cuda>>
-    TORCH_CUDA_ARCH_LIST: "5.2 7.5"
-    JOB_BASE_NAME: <<parameters.test_name>>
-    JOB_EXECUTOR: <<parameters.executor>>
--- a/.circleci/verbatim-sources/commands.yml
+++ b/.circleci/verbatim-sources/commands.yml
@ -1,134 +0,0 @@
-commands:
-
-  calculate_docker_image_tag:
-    description: "Calculates the docker image tag"
-    steps:
-      - run:
-          name: "Calculate docker image hash"
-          command: |
-            DOCKER_TAG=$(git rev-parse HEAD:.ci/docker)
-            echo "DOCKER_TAG=${DOCKER_TAG}" >> "${BASH_ENV}"
-
-  designate_upload_channel:
-    description: "inserts the correct upload channel into ${BASH_ENV}"
-    steps:
-      - run:
-          name: adding UPLOAD_CHANNEL to BASH_ENV
-          command: |
-            our_upload_channel=nightly
-            # On tags upload to test instead
-            if [[ -n "${CIRCLE_TAG}" ]]; then
-              our_upload_channel=test
-            fi
-            echo "export UPLOAD_CHANNEL=${our_upload_channel}" >> ${BASH_ENV}
-
-  # This system setup script is meant to run before the CI-related scripts, e.g.,
-  # installing Git client, checking out code, setting up CI env, and
-  # building/testing.
-  setup_linux_system_environment:
-    steps:
-      - run:
-          name: Set Up System Environment
-          no_output_timeout: "1h"
-          command: .circleci/scripts/setup_linux_system_environment.sh
-
-  setup_ci_environment:
-    steps:
-      - run:
-          name: Set Up CI Environment After attach_workspace
-          no_output_timeout: "1h"
-          command: .circleci/scripts/setup_ci_environment.sh
-
-  brew_update:
-    description: "Update Homebrew and install base formulae"
-    steps:
-      - run:
-          name: Update Homebrew
-          no_output_timeout: "10m"
-          command: |
-            set -ex
-
-            # Update repositories manually.
-            # Running `brew update` produces a comparison between the
-            # current checkout and the updated checkout, which takes a
-            # very long time because the existing checkout is 2y old.
-            for path in $(find /usr/local/Homebrew -type d -name .git)
-            do
-            cd $path/..
-            git fetch --depth=1 origin
-            git reset --hard origin/master
-            done
-
-            export HOMEBREW_NO_AUTO_UPDATE=1
-
-            # Install expect and moreutils so that we can call `unbuffer` and `ts`.
-            # moreutils installs a `parallel` executable by default, which conflicts
-            # with the executable from the GNU `parallel`, so we must unlink GNU
-            # `parallel` first, and relink it afterwards.
-            brew unlink parallel
-            brew install moreutils
-            brew link parallel --overwrite
-            brew install expect
-
-  brew_install:
-    description: "Install Homebrew formulae"
-    parameters:
-      formulae:
-        type: string
-        default: ""
-    steps:
-      - run:
-          name: Install << parameters.formulae >>
-          no_output_timeout: "10m"
-          command: |
-            set -ex
-            export HOMEBREW_NO_AUTO_UPDATE=1
-            brew install << parameters.formulae >>
-
-  run_brew_for_macos_build:
-    steps:
-      - brew_update
-      - brew_install:
-          formulae: libomp
-
-  run_brew_for_ios_build:
-    steps:
-      - brew_update
-      - brew_install:
-          formulae: libtool
-
-  optional_merge_target_branch:
-    steps:
-      - run:
-          name: (Optional) Merge target branch
-          no_output_timeout: "10m"
-          command: |
-            if [[ -n "$CIRCLE_PULL_REQUEST" && "$CIRCLE_BRANCH" != "nightly" ]]; then
-              PR_NUM=$(basename $CIRCLE_PULL_REQUEST)
-              CIRCLE_PR_BASE_BRANCH=$(curl -s https://api.github.com/repos/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME/pulls/$PR_NUM | jq -r '.base.ref')
-              if [[ "${BUILD_ENVIRONMENT}" == *"xla"* || "${BUILD_ENVIRONMENT}" == *"gcc5"* ]] ; then
-                set -x
-                git config --global user.email "circleci.ossci@gmail.com"
-                git config --global user.name "CircleCI"
-                git config remote.origin.url https://github.com/pytorch/pytorch.git
-                git config --add remote.origin.fetch +refs/heads/master:refs/remotes/origin/master
-                git fetch --tags --progress https://github.com/pytorch/pytorch.git +refs/heads/master:refs/remotes/origin/master --depth=100 --quiet
-                # PRs generated from ghstack has format CIRCLE_PR_BASE_BRANCH=gh/xxx/1234/base
-                if [[ "${CIRCLE_PR_BASE_BRANCH}" == "gh/"* ]]; then
-                  CIRCLE_PR_BASE_BRANCH=master
-                fi
-                export GIT_MERGE_TARGET=`git log -n 1 --pretty=format:"%H" origin/$CIRCLE_PR_BASE_BRANCH`
-                echo "GIT_MERGE_TARGET: " ${GIT_MERGE_TARGET}
-                export GIT_COMMIT=${CIRCLE_SHA1}
-                echo "GIT_COMMIT: " ${GIT_COMMIT}
-                git checkout -f ${GIT_COMMIT}
-                git reset --hard ${GIT_COMMIT}
-                git merge --allow-unrelated-histories --no-edit --no-ff ${GIT_MERGE_TARGET}
-                echo "Merged $CIRCLE_PR_BASE_BRANCH branch before building in environment $BUILD_ENVIRONMENT"
-                set +x
-              else
-                echo "No need to merge with $CIRCLE_PR_BASE_BRANCH, skipping..."
-              fi
-            else
-              echo "This is not a pull request, skipping..."
-            fi
--- a/.circleci/verbatim-sources/header-section.yml
+++ b/.circleci/verbatim-sources/header-section.yml
@ -1,41 +0,0 @@
-# WARNING: DO NOT EDIT THIS FILE DIRECTLY!!!
-# See the README.md in this directory.
-
-# IMPORTANT: To update Docker image version, please follow
-# the instructions at
-# https://github.com/pytorch/pytorch/wiki/Docker-image-build-on-CircleCI
-
-version: 2.1
-
-parameters:
-  run_binary_tests:
-    type: boolean
-    default: false
-  run_build:
-    type: boolean
-    default: true
-  run_master_build:
-    type: boolean
-    default: false
-  run_slow_gradcheck_build:
-    type: boolean
-    default: false
-
-executors:
-  windows-with-nvidia-gpu:
-    machine:
-      resource_class: windows.gpu.nvidia.medium
-      image: windows-server-2019-nvidia:previous
-      shell: bash.exe
-
-  windows-xlarge-cpu-with-nvidia-cuda:
-    machine:
-      resource_class: windows.xlarge
-      image: windows-server-2019-vs2019:stable
-      shell: bash.exe
-
-  windows-medium-cpu-with-nvidia-cuda:
-    machine:
-      resource_class: windows.medium
-      image: windows-server-2019-vs2019:stable
-      shell: bash.exe
--- a/.circleci/verbatim-sources/job-specs/binary-build-tests.yml
+++ b/.circleci/verbatim-sources/job-specs/binary-build-tests.yml
@ -1,14 +0,0 @@
-
-# There is currently no testing for libtorch TODO
-#  binary_linux_libtorch_3.6m_cpu_test:
-#    environment:
-#      BUILD_ENVIRONMENT: "libtorch 3.6m cpu"
-#    resource_class: gpu.nvidia.small
-#    <<: *binary_linux_test
-#
-#  binary_linux_libtorch_3.6m_cu90_test:
-#    environment:
-#      BUILD_ENVIRONMENT: "libtorch 3.6m cu90"
-#    resource_class: gpu.nvidia.small
-#    <<: *binary_linux_test
-#
--- a/.circleci/verbatim-sources/job-specs/binary-job-specs.yml
+++ b/.circleci/verbatim-sources/job-specs/binary-job-specs.yml
@ -1,44 +0,0 @@
-jobs:
-  binary_ios_build:
-    <<: *pytorch_ios_params
-    macos:
-      xcode: "12.5.1"
-    steps:
-    - attach_workspace:
-        at: ~/workspace
-    - checkout
-    - run_brew_for_ios_build
-    - run:
-        name: Build
-        no_output_timeout: "1h"
-        command: |
-          script="/Users/distiller/project/.circleci/scripts/binary_ios_build.sh"
-          cat "$script"
-          source "$script"
-    - run:
-        name: Test
-        no_output_timeout: "30m"
-        command: |
-          script="/Users/distiller/project/.circleci/scripts/binary_ios_test.sh"
-          cat "$script"
-          source "$script"
-    - persist_to_workspace:
-        root: /Users/distiller/workspace/
-        paths: ios
-
-  binary_ios_upload:
-    <<: *pytorch_ios_params
-    macos:
-      xcode: "12.5.1"
-    steps:
-    - attach_workspace:
-        at: ~/workspace
-    - checkout
-    - run_brew_for_ios_build
-    - run:
-        name: Upload
-        no_output_timeout: "1h"
-        command: |
-          script="/Users/distiller/project/.circleci/scripts/binary_ios_upload.sh"
-          cat "$script"
-          source "$script"
--- a/.circleci/verbatim-sources/job-specs/binary_update_htmls.yml
+++ b/.circleci/verbatim-sources/job-specs/binary_update_htmls.yml
@ -1,53 +0,0 @@
-
-  # update_s3_htmls job
-  # These jobs create html files for every cpu/cu## folder in s3. The html
-  # files just store the names of all the files in that folder (which are
-  # binary files (.whl files)). This is to allow pip installs of the latest
-  # version in a folder without having to know the latest date. Pip has a flag
-  # -f that you can pass an html file listing a bunch of packages, and pip will
-  # then install the one with the most recent version.
-  update_s3_htmls: &update_s3_htmls
-    machine:
-      image: ubuntu-2004:202104-01
-    resource_class: medium
-    steps:
-    - checkout
-    - setup_linux_system_environment
-    - run:
-        <<: *binary_checkout
-    # N.B. we do not run binary_populate_env. The only variable we need is
-    # PIP_UPLOAD_FOLDER (which is 'nightly/' for the nightlies and '' for
-    # releases, and sometimes other things for special cases). Instead we
-    # expect PIP_UPLOAD_FOLDER to be passed directly in the env. This is
-    # because, unlike all the other binary jobs, these jobs only get run once,
-    # in a separate workflow. They are not a step in other binary jobs like
-    # build, test, upload.
-    #
-    # You could attach this to every job, or include it in the upload step if
-    # you wanted. You would need to add binary_populate_env in this case to
-    # make sure it has the same upload folder as the job it's attached to. This
-    # function is idempotent, so it won't hurt anything; it's just a little
-    # unnescessary"
-    - run:
-        name: define PIP_UPLOAD_FOLDER
-        command: |
-          our_upload_folder=nightly/
-          # On tags upload to test instead
-          if [[ -n "${CIRCLE_TAG}" ]]; then
-            our_upload_folder=test/
-          fi
-          echo "export PIP_UPLOAD_FOLDER=${our_upload_folder}" >> ${BASH_ENV}
-    - run:
-        name: Update s3 htmls
-        no_output_timeout: "1h"
-        command: |
-          set +x
-          echo "declare -x \"AWS_ACCESS_KEY_ID=${PYTORCH_BINARY_AWS_ACCESS_KEY_ID}\"" >> /home/circleci/project/env
-          echo "declare -x \"AWS_SECRET_ACCESS_KEY=${PYTORCH_BINARY_AWS_SECRET_ACCESS_KEY}\"" >> /home/circleci/project/env
-          source /home/circleci/project/env
-          set -eux -o pipefail
-          retry () {
-              $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
-          }
-          retry pip install awscli==1.6
-          "/home/circleci/project/builder/cron/update_s3_htmls.sh"
--- a/.circleci/verbatim-sources/job-specs/docker_jobs.yml
+++ b/.circleci/verbatim-sources/job-specs/docker_jobs.yml
@ -1,56 +0,0 @@
-  docker_build_job:
-      parameters:
-        image_name:
-          type: string
-          default: ""
-      machine:
-        image: ubuntu-2004:202104-01
-      resource_class: large
-      environment:
-        IMAGE_NAME: << parameters.image_name >>
-        # Enable 'docker manifest'
-        DOCKER_CLI_EXPERIMENTAL: "enabled"
-        DOCKER_BUILDKIT: 1
-      steps:
-        - checkout
-        - calculate_docker_image_tag
-        - run:
-            name: Check if image should be built
-            command: |
-              set +x
-              export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_DOCKER_BUILDER_V1}
-              export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_DOCKER_BUILDER_V1}
-              export AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
-              export AWS_REGION=us-east-1
-              aws ecr get-login-password --region $AWS_REGION|docker login --username AWS \
-                       --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com
-              set -x
-              # Check if image already exists, if it does then skip building it
-              if docker manifest inspect "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/${IMAGE_NAME}:${DOCKER_TAG}"; then
-                circleci-agent step halt
-                # circleci-agent step halt doesn't actually halt the step so we need to
-                # explicitly exit the step here ourselves before it causes too much trouble
-                exit 0
-              fi
-              # Covers the case where a previous tag doesn't exist for the tree
-              # this is only really applicable on trees that don't have `.ci/docker` at its merge base, i.e. nightly
-              if ! git rev-parse "$(git merge-base HEAD << pipeline.git.base_revision >>):.ci/docker"; then
-                echo "Directory '.ci/docker' not found in tree << pipeline.git.base_revision >>, you should probably rebase onto a more recent commit"
-                exit 1
-              fi
-              PREVIOUS_DOCKER_TAG=$(git rev-parse "$(git merge-base HEAD << pipeline.git.base_revision >>):ci/docker")
-              # If no image exists but the hash is the same as the previous hash then we should error out here
-              if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
-                echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
-                echo "       contact the PyTorch team to restore the original images"
-                exit 1
-              fi
-        - run:
-            name: build_docker_image_<< parameters.image_name >>
-            no_output_timeout: "1h"
-            command: |
-              set +x
-              export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_DOCKER_BUILDER_V1}
-              export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_DOCKER_BUILDER_V1}
-              set -x
-              cd .ci/docker && ./build_docker.sh
--- a/.circleci/verbatim-sources/job-specs/job-specs-custom.yml
+++ b/.circleci/verbatim-sources/job-specs/job-specs-custom.yml
@ -1,747 +0,0 @@
-  pytorch_doc_push:
-    resource_class: medium
-    machine:
-      image: ubuntu-2004:202104-01
-    parameters:
-      branch:
-        type: string
-        default: "main"
-    steps:
-    - attach_workspace:
-        at: /tmp/workspace
-    - run:
-        name: Generate netrc
-        command: |
-          # set credentials for https pushing
-          cat > ~/.netrc \<<DONE
-            machine github.com
-            login pytorchbot
-            password ${GITHUB_PYTORCHBOT_TOKEN}
-          DONE
-    - run:
-        name: Docs push
-        command: |
-          pushd /tmp/workspace
-          git push -u origin "<< parameters.branch >>"
-
-  pytorch_macos_10_15_py3_build:
-    environment:
-      BUILD_ENVIRONMENT: pytorch-macos-10.15-py3-arm64-build
-    macos:
-      xcode: "12.3.0"
-    steps:
-      - checkout
-      - run_brew_for_macos_build
-      - run:
-          name: Build
-          no_output_timeout: "1h"
-          command: |
-            set -e
-            export CROSS_COMPILE_ARM64=1
-            export JOB_BASE_NAME=$CIRCLE_JOB
-
-            # Install sccache
-            sudo curl --retry 3 https://s3.amazonaws.com/ossci-macos/sccache_v2.15 --output /usr/local/bin/sccache
-            sudo chmod +x /usr/local/bin/sccache
-            export SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2
-
-            # This IAM user allows write access to S3 bucket for sccache
-            set +x
-            export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_S3_BUCKET_V4}
-            export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET_V4}
-            set -x
-
-            chmod a+x .ci/pytorch/macos-build.sh
-            unbuffer .ci/pytorch/macos-build.sh 2>&1 | ts
-
-      - persist_to_workspace:
-          root: /Users/distiller/workspace/
-          paths:
-            - miniconda3
-      - store_artifacts:
-          path: /Users/distiller/project/dist
-
-  pytorch_macos_10_13_py3_build:
-    environment:
-      BUILD_ENVIRONMENT: pytorch-macos-10.13-py3-build
-    macos:
-      xcode: "12.0"
-    steps:
-      - checkout
-      - run_brew_for_macos_build
-      - run:
-          name: Build
-          no_output_timeout: "1h"
-          command: |
-            set -e
-            export JOB_BASE_NAME=$CIRCLE_JOB
-
-            # Install sccache
-            sudo curl --retry 3 https://s3.amazonaws.com/ossci-macos/sccache_v2.15 --output /usr/local/bin/sccache
-            sudo chmod +x /usr/local/bin/sccache
-            export SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2
-
-            # This IAM user allows write access to S3 bucket for sccache
-            set +x
-            export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_S3_BUCKET_V4}
-            export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET_V4}
-            set -x
-
-            chmod a+x .ci/pytorch/macos-build.sh
-            unbuffer .ci/pytorch/macos-build.sh 2>&1 | ts
-
-      - persist_to_workspace:
-          root: /Users/distiller/workspace/
-          paths:
-            - miniconda3
-
-  mac_build:
-    parameters:
-      build-environment:
-        type: string
-        description: Top-level label for what's being built/tested.
-      xcode-version:
-        type: string
-        default: "13.3.1"
-        description: What xcode version to build with.
-      build-generates-artifacts:
-        type: boolean
-        default: true
-        description: if the build generates build artifacts
-      python-version:
-        type: string
-        default: "3.8"
-    macos:
-      xcode: << parameters.xcode-version >>
-    resource_class: medium
-    environment:
-      BUILD_ENVIRONMENT: << parameters.build-environment >>
-      AWS_REGION: us-east-1
-    steps:
-
-      - checkout
-      - run_brew_for_macos_build
-
-      - run:
-          name: Install sccache
-          command: |
-            sudo curl --retry 3 https://s3.amazonaws.com/ossci-macos/sccache_v2.15 --output /usr/local/bin/sccache
-            sudo chmod +x /usr/local/bin/sccache
-            echo "export SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2" >> "${BASH_ENV}"
-            echo "export SCCACHE_S3_KEY_PREFIX=${GITHUB_WORKFLOW}" >> "${BASH_ENV}"
-
-            set +x
-            echo "export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_S3_BUCKET_V4}" >> "${BASH_ENV}"
-            echo "export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET_V4}" >> "${BASH_ENV}"
-            set -x
-
-      - run:
-          name: Get workflow job id
-          command: |
-            echo "export OUR_GITHUB_JOB_ID=${CIRCLE_WORKFLOW_JOB_ID}" >> "${BASH_ENV}"
-
-      - run:
-          name: Build
-          command: |
-            set -x
-
-            git submodule sync
-            git submodule update --init --recursive --depth 1 --jobs 0
-
-            export PATH="/usr/local/bin:$PATH"
-            export WORKSPACE_DIR="${HOME}/workspace"
-            mkdir -p "${WORKSPACE_DIR}"
-            MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-py38_4.12.0-MacOSX-x86_64.sh"
-            if [  << parameters.python-version >> == 3.9.12 ]; then
-              MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-MacOSX-x86_64.sh"
-            fi
-
-            # If a local installation of conda doesn't exist, we download and install conda
-            if [ ! -d "${WORKSPACE_DIR}/miniconda3" ]; then
-              mkdir -p "${WORKSPACE_DIR}"
-              curl --retry 3 ${MINICONDA_URL} -o "${WORKSPACE_DIR}"/miniconda3.sh
-              bash "${WORKSPACE_DIR}"/miniconda3.sh -b -p "${WORKSPACE_DIR}"/miniconda3
-            fi
-            export PATH="${WORKSPACE_DIR}/miniconda3/bin:$PATH"
-            # shellcheck disable=SC1091
-            source "${WORKSPACE_DIR}"/miniconda3/bin/activate
-
-            brew link --force libomp
-
-            echo "export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname "$(which conda)")/../"}" >> "${BASH_ENV}"
-            .ci/pytorch/macos-build.sh
-
-      - when:
-          condition: << parameters.build-generates-artifacts >>
-          steps:
-            - run:
-                name: Archive artifacts into zip
-                command: |
-                  zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .additional_ci_files
-                  cp artifacts.zip /Users/distiller/workspace
-
-      - persist_to_workspace:
-          root: /Users/distiller/workspace/
-          paths:
-            - miniconda3
-            - artifacts.zip
-
-      - store_artifacts:
-          path: /Users/distiller/project/artifacts.zip
-
-  mac_test:
-    parameters:
-      build-environment:
-        type: string
-      shard-number:
-        type: string
-      num-test-shards:
-        type: string
-      xcode-version:
-        type: string
-      test-config:
-        type: string
-        default: 'default'
-
-    macos:
-      xcode: << parameters.xcode-version >>
-    environment:
-      GIT_DEFAULT_BRANCH: 'master'
-      BUILD_ENVIRONMENT: << parameters.build-environment >>
-      TEST_CONFIG: << parameters.test-config >>
-      SHARD_NUMBER: << parameters.shard-number >>
-      NUM_TEST_SHARDS: << parameters.num-test-shards >>
-      PYTORCH_RETRY_TEST_CASES: 1
-      PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
-    steps:
-      - checkout
-      - attach_workspace:
-          at: ~/workspace
-      - run_brew_for_macos_build
-      - run:
-          name: Test
-          no_output_timeout: "2h"
-          command: |
-            set -x
-
-            git submodule sync --recursive
-            git submodule update --init --recursive
-
-            mv ~/workspace/artifacts.zip .
-            unzip artifacts.zip
-
-            export IN_CI=1
-
-            COMMIT_MESSAGES=$(git cherry -v "origin/${GIT_DEFAULT_BRANCH:-master}")
-
-            export PATH="/usr/local/bin:$PATH"
-            export WORKSPACE_DIR="${HOME}/workspace"
-            mkdir -p "${WORKSPACE_DIR}"
-
-            export PATH="${WORKSPACE_DIR}/miniconda3/bin:$PATH"
-            source "${WORKSPACE_DIR}"/miniconda3/bin/activate
-
-            # sanitize the input commit message and PR body here:
-
-            # trim all new lines from commit messages to avoid issues with batch environment
-            # variable copying. see https://github.com/pytorch/pytorch/pull/80043#issuecomment-1167796028
-            COMMIT_MESSAGES="${COMMIT_MESSAGES//[$'\n\r']}"
-
-            # then trim all special characters like single and double quotes to avoid unescaped inputs to
-            # wreak havoc internally
-            export COMMIT_MESSAGES="${COMMIT_MESSAGES//[\'\"]}"
-
-            python3 -mpip install dist/*.whl
-            .ci/pytorch/macos-test.sh
-      - run:
-          name: Copy files for uploading test stats
-          command: |
-            # copy into a parent folder test-reports because we can't use CIRCLEI_BUILD_NUM in path when persisting to workspace
-            mkdir -p test-reports/test-reports_${CIRCLE_BUILD_NUM}/test/test-reports
-            cp -r test/test-reports test-reports/test-reports_${CIRCLE_BUILD_NUM}/test/test-reports
-      - store_test_results:
-          path: test/test-reports
-      - persist_to_workspace:
-          root: /Users/distiller/project/
-          paths:
-            - test-reports
-
-  upload_test_stats:
-    machine: # executor type
-      image: ubuntu-2004:202010-01 # # recommended linux image - includes Ubuntu 20.04, docker 19.03.13, docker-compose 1.27.4
-    steps:
-      - checkout
-      - attach_workspace:
-          at: ~/workspace
-      - run:
-          name: upload
-          command: |
-            set -ex
-            if [ -z ${AWS_ACCESS_KEY_FOR_OSSCI_ARTIFACT_UPLOAD} ]; then
-              echo "No credentials found, cannot upload test stats (are you on a fork?)"
-              exit 0
-            fi
-            cp -r ~/workspace/test-reports/* ~/project
-            pip3 install requests==2.26 rockset==1.0.3 boto3==1.19.12
-            export AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_FOR_OSSCI_ARTIFACT_UPLOAD}
-            export AWS_SECRET_ACCESS_KEY=${AWS_SECRET_KEY_FOR_OSSCI_ARTIFACT_UPLOAD}
-            # i dont know how to get the run attempt number for reruns so default to 1
-            python3 -m tools.stats.upload_test_stats --workflow-run-id "${CIRCLE_WORKFLOW_JOB_ID}" --workflow-run-attempt 1 --head-branch << pipeline.git.branch >> --circleci
-  pytorch_macos_10_13_py3_test:
-    environment:
-      BUILD_ENVIRONMENT: pytorch-macos-10.13-py3-test
-    macos:
-      xcode: "12.0"
-    steps:
-      - checkout
-      - attach_workspace:
-          at: ~/workspace
-      - run_brew_for_macos_build
-      - run:
-          name: Test
-          no_output_timeout: "1h"
-          command: |
-            set -e
-            export JOB_BASE_NAME=$CIRCLE_JOB
-
-            chmod a+x .ci/pytorch/macos-test.sh
-            unbuffer .ci/pytorch/macos-test.sh 2>&1 | ts
-      - store_test_results:
-          path: test/test-reports
-
-  pytorch_macos_10_13_py3_lite_interpreter_build_test:
-    environment:
-      BUILD_ENVIRONMENT: pytorch-macos-10.13-py3-test
-    macos:
-      xcode: "12.0"
-    steps:
-      - checkout
-      - attach_workspace:
-          at: ~/workspace
-      - run_brew_for_macos_build
-      - run:
-          name: Test
-          no_output_timeout: "1h"
-          command: |
-            set -e
-            export BUILD_LITE_INTERPRETER=1
-            export JOB_BASE_NAME=$CIRCLE_JOB
-            chmod a+x ${HOME}/project/.ci/pytorch/macos-lite-interpreter-build-test.sh
-            unbuffer ${HOME}/project/.ci/pytorch/macos-lite-interpreter-build-test.sh 2>&1 | ts
-      - store_test_results:
-          path: test/test-reports
-
-  pytorch_android_gradle_build:
-    environment:
-      BUILD_ENVIRONMENT: pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-build
-      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c"
-      PYTHON_VERSION: "3.7"
-    resource_class: large
-    machine:
-      image: ubuntu-2004:202104-01
-    steps:
-    - checkout
-    - calculate_docker_image_tag
-    - setup_linux_system_environment
-    - setup_ci_environment
-    - run:
-        name: pytorch android gradle build
-        no_output_timeout: "1h"
-        command: |
-          set -eux
-          docker_image_commit=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}
-
-          docker_image_libtorch_android_x86_32=${docker_image_commit}-android-x86_32
-          docker_image_libtorch_android_x86_64=${docker_image_commit}-android-x86_64
-          docker_image_libtorch_android_arm_v7a=${docker_image_commit}-android-arm-v7a
-          docker_image_libtorch_android_arm_v8a=${docker_image_commit}-android-arm-v8a
-
-          echo "docker_image_commit: "${docker_image_commit}
-          echo "docker_image_libtorch_android_x86_32: "${docker_image_libtorch_android_x86_32}
-          echo "docker_image_libtorch_android_x86_64: "${docker_image_libtorch_android_x86_64}
-          echo "docker_image_libtorch_android_arm_v7a: "${docker_image_libtorch_android_arm_v7a}
-          echo "docker_image_libtorch_android_arm_v8a: "${docker_image_libtorch_android_arm_v8a}
-
-          # x86_32
-          time docker pull ${docker_image_libtorch_android_x86_32} >/dev/null
-          export id_x86_32=$(docker run --env-file "${BASH_ENV}" -e GRADLE_OFFLINE=1 --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${docker_image_libtorch_android_x86_32})
-
-          export COMMAND='((echo "sudo chown -R jenkins workspace") | docker exec -u jenkins -i "$id_x86_32" bash) 2>&1'
-          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
-
-          # arm-v7a
-          time docker pull ${docker_image_libtorch_android_arm_v7a} >/dev/null
-          export id_arm_v7a=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${docker_image_libtorch_android_arm_v7a})
-
-          export COMMAND='((echo "sudo chown -R jenkins workspace") | docker exec -u jenkins -i "$id_arm_v7a" bash) 2>&1'
-          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
-
-          mkdir -p ~/workspace/build_android_install_arm_v7a
-          docker cp $id_arm_v7a:/var/lib/jenkins/workspace/build_android/install ~/workspace/build_android_install_arm_v7a
-
-          # x86_64
-          time docker pull ${docker_image_libtorch_android_x86_64} >/dev/null
-          export id_x86_64=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${docker_image_libtorch_android_x86_64})
-
-          export COMMAND='((echo "sudo chown -R jenkins workspace") | docker exec -u jenkins -i "$id_x86_64" bash) 2>&1'
-          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
-
-          mkdir -p ~/workspace/build_android_install_x86_64
-          docker cp $id_x86_64:/var/lib/jenkins/workspace/build_android/install ~/workspace/build_android_install_x86_64
-
-          # arm-v8a
-          time docker pull ${docker_image_libtorch_android_arm_v8a} >/dev/null
-          export id_arm_v8a=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${docker_image_libtorch_android_arm_v8a})
-
-          export COMMAND='((echo "sudo chown -R jenkins workspace") | docker exec -u jenkins -i "$id_arm_v8a" bash) 2>&1'
-          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
-
-          mkdir -p ~/workspace/build_android_install_arm_v8a
-          docker cp $id_arm_v8a:/var/lib/jenkins/workspace/build_android/install ~/workspace/build_android_install_arm_v8a
-
-          docker cp ~/workspace/build_android_install_arm_v7a $id_x86_32:/var/lib/jenkins/workspace/build_android_install_arm_v7a
-          docker cp ~/workspace/build_android_install_x86_64 $id_x86_32:/var/lib/jenkins/workspace/build_android_install_x86_64
-          docker cp ~/workspace/build_android_install_arm_v8a $id_x86_32:/var/lib/jenkins/workspace/build_android_install_arm_v8a
-
-          # run gradle buildRelease
-          export COMMAND='((echo "sudo chown -R jenkins workspace && cd workspace && ./.circleci/scripts/build_android_gradle.sh") | docker exec -u jenkins -i "$id_x86_32" bash) 2>&1'
-          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
-
-          mkdir -p ~/workspace/build_android_artifacts
-          docker cp $id_x86_32:/var/lib/jenkins/workspace/android/artifacts.tgz ~/workspace/build_android_artifacts/
-
-          output_image=$docker_image_libtorch_android_x86_32-gradle
-          docker commit "$id_x86_32" ${output_image}
-          time docker push ${output_image}
-    - store_artifacts:
-        path: ~/workspace/build_android_artifacts/artifacts.tgz
-        destination: artifacts.tgz
-
-  pytorch_android_publish_snapshot:
-    environment:
-      BUILD_ENVIRONMENT: pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-publish-snapshot
-      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c"
-      PYTHON_VERSION: "3.7"
-    resource_class: large
-    machine:
-      image: ubuntu-2004:202104-01
-    steps:
-    - checkout
-    - calculate_docker_image_tag
-    - setup_linux_system_environment
-    - setup_ci_environment
-    - run:
-        name: pytorch android gradle build
-        no_output_timeout: "1h"
-        command: |
-          set -eux
-          docker_image_commit=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}
-
-          docker_image_libtorch_android_x86_32_gradle=${docker_image_commit}-android-x86_32-gradle
-
-          echo "docker_image_commit: "${docker_image_commit}
-          echo "docker_image_libtorch_android_x86_32_gradle: "${docker_image_libtorch_android_x86_32_gradle}
-
-          # x86_32
-          time docker pull ${docker_image_libtorch_android_x86_32_gradle} >/dev/null
-          export id_x86_32=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${docker_image_libtorch_android_x86_32_gradle})
-
-          export COMMAND='((echo "sudo chown -R jenkins workspace" && echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "export SONATYPE_NEXUS_USERNAME=${SONATYPE_NEXUS_USERNAME}" && echo "export SONATYPE_NEXUS_PASSWORD=${SONATYPE_NEXUS_PASSWORD}" && echo "export ANDROID_SIGN_KEY=${ANDROID_SIGN_KEY}" && echo "export ANDROID_SIGN_PASS=${ANDROID_SIGN_PASS}" && echo "sudo chown -R jenkins workspace && cd workspace && ./.circleci/scripts/publish_android_snapshot.sh") | docker exec -u jenkins -i "$id_x86_32" bash) 2>&1'
-          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
-
-          output_image=${docker_image_libtorch_android_x86_32_gradle}-publish-snapshot
-          docker commit "$id_x86_32" ${output_image}
-          time docker push ${output_image}
-
-  pytorch_android_gradle_build-x86_32:
-    environment:
-      BUILD_ENVIRONMENT: pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-build-only-x86_32
-      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c"
-      PYTHON_VERSION: "3.7"
-    resource_class: large
-    machine:
-      image: ubuntu-2004:202104-01
-    steps:
-    - checkout
-    - calculate_docker_image_tag
-    - setup_linux_system_environment
-    - checkout
-    - setup_ci_environment
-    - run:
-        name: pytorch android gradle build only x86_32 (for PR)
-        no_output_timeout: "1h"
-        command: |
-          set -e
-          docker_image_libtorch_android_x86_32=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}-android-x86_32
-          echo "docker_image_libtorch_android_x86_32: "${docker_image_libtorch_android_x86_32}
-
-          # x86
-          time docker pull ${docker_image_libtorch_android_x86_32} >/dev/null
-          export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${docker_image_libtorch_android_x86_32})
-
-          export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "export GRADLE_OFFLINE=1" && echo "sudo chown -R jenkins workspace && cd workspace && ./.circleci/scripts/build_android_gradle.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
-          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
-
-          mkdir -p ~/workspace/build_android_x86_32_artifacts
-          docker cp $id:/var/lib/jenkins/workspace/android/artifacts.tgz ~/workspace/build_android_x86_32_artifacts/
-
-          output_image=${docker_image_libtorch_android_x86_32}-gradle
-          docker commit "$id" ${output_image}
-          time docker push ${output_image}
-    - store_artifacts:
-        path: ~/workspace/build_android_x86_32_artifacts/artifacts.tgz
-        destination: artifacts.tgz
-
-  pytorch_ios_build:
-    <<: *pytorch_ios_params
-    macos:
-      xcode: "12.5.1"
-    steps:
-      - run:
-          name: checkout with retry
-          command: |
-            checkout() {
-              set -ex
-              # Workaround old docker images with incorrect $HOME
-              # check https://github.com/docker/docker/issues/2968 for details
-              if [ "${HOME}" = "/" ]
-                then
-                export HOME=$(getent passwd $(id -un) | cut -d: -f6)
-              fi
-
-              mkdir -p ~/.ssh
-
-              echo 'github.com ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ==
-              ' >> ~/.ssh/known_hosts
-
-              # use git+ssh instead of https
-              git config --global url."ssh://git@github.com".insteadOf "https://github.com" || true
-              git config --global gc.auto 0 || true
-
-              echo 'Cloning git repository'
-              mkdir -p '/Users/distiller/project'
-              cd '/Users/distiller/project'
-              git clone "$CIRCLE_REPOSITORY_URL" .
-              echo 'Checking out branch'
-              git checkout --force -B "$CIRCLE_BRANCH" "$CIRCLE_SHA1"
-              git --no-pager log --no-color -n 1 --format='HEAD is now at %h %s'
-            }
-
-            retry () {
-              $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
-            }
-            retry checkout
-      - run_brew_for_ios_build
-      - run:
-          name: Setup Fastlane
-          no_output_timeout: "1h"
-          command: |
-            set -e
-            PROJ_ROOT=/Users/distiller/project
-            cd ${PROJ_ROOT}/ios/TestApp
-            # install fastlane
-            sudo gem install bundler && bundle install
-      - run:
-          name: Build
-          no_output_timeout: "1h"
-          command: |
-            set -e
-            WORKSPACE=/Users/distiller/workspace
-            PROJ_ROOT=/Users/distiller/project
-            export TCLLIBPATH="/usr/local/lib"
-
-            # Install conda
-            curl --retry 3 -o ~/conda.sh https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-MacOSX-x86_64.sh
-            chmod +x ~/conda.sh
-            /bin/bash ~/conda.sh -b -p ~/anaconda
-            export PATH="~/anaconda/bin:${PATH}"
-            source ~/anaconda/bin/activate
-
-            # Install dependencies
-            retry () {
-                $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
-            }
-
-            retry conda install numpy ninja pyyaml mkl mkl-include setuptools cmake requests typing-extensions --yes
-
-            # sync submodules
-            cd ${PROJ_ROOT}
-            git submodule sync
-            git submodule update --init --recursive --depth 1 --jobs 0
-
-            # export
-            export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
-
-            # run build script
-            chmod a+x ${PROJ_ROOT}/scripts/build_ios.sh
-            echo "IOS_ARCH: ${IOS_ARCH}"
-            echo "IOS_PLATFORM: ${IOS_PLATFORM}"
-            echo "USE_PYTORCH_METAL": "${USE_METAL}"
-            echo "BUILD_LITE_INTERPRETER": "${BUILD_LITE_INTERPRETER}"
-            echo "USE_COREML_DELEGATE": "${USE_COREML_DELEGATE}"
-
-            #check the custom build flag
-            echo "SELECTED_OP_LIST: ${SELECTED_OP_LIST}"
-            if [ -n "${SELECTED_OP_LIST}" ]; then
-                export SELECTED_OP_LIST="${PROJ_ROOT}/ios/TestApp/custom_build/${SELECTED_OP_LIST}"
-            fi
-            export IOS_ARCH=${IOS_ARCH}
-            export IOS_PLATFORM=${IOS_PLATFORM}
-            export USE_COREML_DELEGATE=${USE_COREML_DELEGATE}
-            if [ ${IOS_PLATFORM} != "SIMULATOR" ]; then
-              export USE_PYTORCH_METAL=${USE_METAL}
-            fi
-            unbuffer ${PROJ_ROOT}/scripts/build_ios.sh 2>&1 | ts
-      - run:
-          name: Run Build Test
-          no_output_timeout: "30m"
-          command: |
-            set -e
-            PROJ_ROOT=/Users/distiller/project
-            # run the ruby build script
-            if ! [ -x "$(command -v xcodebuild)" ]; then
-              echo 'Error: xcodebuild is not installed.'
-              exit 1
-            fi
-            ruby ${PROJ_ROOT}/scripts/xcode_build.rb -i ${PROJ_ROOT}/build_ios/install -x ${PROJ_ROOT}/ios/TestApp/TestApp.xcodeproj -p ${IOS_PLATFORM}
-            if ! [ "$?" -eq "0" ]; then
-              echo 'xcodebuild failed!'
-              exit 1
-            fi
-      - run:
-          name: Run Simulator Tests
-          no_output_timeout: "2h"
-          command: |
-            set -e
-            if [ ${IOS_PLATFORM} != "SIMULATOR" ]; then
-              echo "not SIMULATOR build, skip it."
-              exit 0
-            fi
-            WORKSPACE=/Users/distiller/workspace
-            PROJ_ROOT=/Users/distiller/project
-            source ~/anaconda/bin/activate
-            # use the pytorch nightly build to generate models
-            pip3 install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
-            # generate models for differnet backends
-            cd ${PROJ_ROOT}/ios/TestApp/benchmark
-            mkdir -p ../models
-            if [ ${USE_COREML_DELEGATE} == 1 ]; then
-              pip install coremltools==5.0b5 protobuf==3.20.1
-              python coreml_backend.py
-            else
-              cd "${PROJ_ROOT}"
-              python test/mobile/model_test/gen_test_model.py ios-test
-            fi
-            cd "${PROJ_ROOT}/ios/TestApp/benchmark"
-            if [ ${BUILD_LITE_INTERPRETER} == 1 ]; then
-              echo "Setting up the TestApp for LiteInterpreter"
-              ruby setup.rb --lite 1
-            else
-              echo "Setting up the TestApp for Full JIT"
-              ruby setup.rb
-            fi
-            cd "${PROJ_ROOT}/ios/TestApp"
-            # instruments -s -devices
-            if [ "${BUILD_LITE_INTERPRETER}" == 1 ]; then
-              if [ "${USE_COREML_DELEGATE}" == 1 ]; then
-                fastlane scan --only_testing TestAppTests/TestAppTests/testCoreML
-              else
-                fastlane scan --only_testing TestAppTests/TestAppTests/testLiteInterpreter
-              fi
-            else
-              fastlane scan --only_testing TestAppTests/TestAppTests/testFullJIT
-            fi
-  pytorch_linux_bazel_build:
-    <<: *pytorch_params
-    machine:
-      image: ubuntu-2004:202104-01
-    steps:
-    - checkout
-    - calculate_docker_image_tag
-    - setup_linux_system_environment
-    - setup_ci_environment
-    - run:
-        name: Bazel Build
-        no_output_timeout: "1h"
-        command: |
-          set -e
-          # Pull Docker image and run build
-          echo "DOCKER_IMAGE: "${DOCKER_IMAGE}:${DOCKER_TAG}
-          time docker pull ${DOCKER_IMAGE}:${DOCKER_TAG} >/dev/null
-          export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}:${DOCKER_TAG})
-
-          echo "Do NOT merge main branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT"
-
-          git submodule sync && git submodule update -q --init --recursive --depth 1 --jobs 0
-
-          docker cp /home/circleci/project/. $id:/var/lib/jenkins/workspace
-
-          export COMMAND='((echo "sudo chown -R jenkins workspace && cd workspace && .ci/pytorch/build.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
-
-          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
-
-          # Push intermediate Docker image for next phase to use
-          if [ -z "${BUILD_ONLY}" ]; then
-            # Augment our output image name with bazel to avoid collisions
-            output_image=${DOCKER_IMAGE}:build-${DOCKER_TAG}-bazel-${CIRCLE_SHA1}
-            export COMMIT_DOCKER_IMAGE=$output_image
-            docker commit "$id" ${COMMIT_DOCKER_IMAGE}
-            time docker push ${COMMIT_DOCKER_IMAGE}
-          fi
-
-  pytorch_linux_bazel_test:
-    <<: *pytorch_params
-    machine:
-      image: ubuntu-2004:202104-01
-    steps:
-    - checkout
-    - calculate_docker_image_tag
-    - setup_linux_system_environment
-    - setup_ci_environment
-    - run:
-        name: Test
-        no_output_timeout: "90m"
-        command: |
-          set -e
-          output_image=${DOCKER_IMAGE}:build-${DOCKER_TAG}-bazel-${CIRCLE_SHA1}
-          export COMMIT_DOCKER_IMAGE=$output_image
-          echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
-
-          time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null
-
-          if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then
-            export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --gpus all -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
-          else
-            export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
-          fi
-
-          retrieve_test_reports() {
-            echo "retrieving test reports"
-            docker cp -L $id:/var/lib/jenkins/workspace/bazel-testlogs ./ || echo 'No test reports found!'
-          }
-          trap "retrieve_test_reports" ERR
-
-          if [[ ${BUILD_ENVIRONMENT} == *"multigpu"* ]]; then
-            export COMMAND='((echo "sudo chown -R jenkins workspace && cd workspace && .ci/pytorch/multigpu-test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
-          else
-            export COMMAND='((echo "sudo chown -R jenkins workspace && cd workspace && .ci/pytorch/test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
-          fi
-          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
-
-          retrieve_test_reports
-          docker stats --all --no-stream
-    - store_test_results:
-        path: bazel-testlogs
-
-  pytorch_windows_test_multigpu:
-    machine:
-      image: ubuntu-2004:202104-01
-    steps:
-      - checkout
-      - run:
-          name: Test
-          no_output_timeout: "90m"
-          command: |
-            set -e
-            python3 -m pip install requests
-            python3 ./.circleci/scripts/trigger_azure_pipeline.py
--- a/.circleci/verbatim-sources/job-specs/job-specs-promote.yml
+++ b/.circleci/verbatim-sources/job-specs/job-specs-promote.yml
@ -1,18 +0,0 @@
-
-  promote_s3:
-    <<: *promote_common
-    steps:
-      - checkout
-      - run:
-          name: Running promote script
-          command: |
-            scripts/release/promote/wheel_to_s3.sh
-
-  promote_conda:
-    <<: *promote_common
-    steps:
-      - checkout
-      - run:
-          name: Running promote script
-          command: |
-            scripts/release/promote/conda_to_conda.sh
--- a/.circleci/verbatim-sources/job-specs/job-specs-setup.yml
+++ b/.circleci/verbatim-sources/job-specs/job-specs-setup.yml
@ -1,29 +0,0 @@
-
-  setup:
-    docker:
-      - image: circleci/python:3.7.3
-    steps:
-      - checkout
-      - run:
-          name: Save commit message
-          command: git log --format='%B' -n 1 HEAD > .circleci/scripts/COMMIT_MSG
-      # Note [Workspace for CircleCI scripts]
-      # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-      # In the beginning, you wrote your CI scripts in a
-      # .circleci/config.yml file, and life was good.  Your CI
-      # configurations flourished and multiplied.
-      #
-      # Then one day, CircleCI cometh down high and say, "Your YAML file
-      # is too biggeth, it stresses our servers so."  And thus they
-      # asketh us to smite the scripts in the yml file.
-      #
-      # But you can't just put the scripts in the .circleci folder,
-      # because in some jobs, you don't ever actually checkout the
-      # source repository.  Where you gonna get the scripts from?
-      #
-      # Here's how you do it: you persist .circleci/scripts into a
-      # workspace, attach the workspace in your subjobs, and run all
-      # your scripts from there.
-      - persist_to_workspace:
-          root: .
-          paths: .circleci/scripts
--- a/.circleci/verbatim-sources/nightly-binary-build-defaults.yml
+++ b/.circleci/verbatim-sources/nightly-binary-build-defaults.yml
@ -1,51 +0,0 @@
-
-##############################################################################
-# Binary build (nightlies nightly build) defaults
-# The binary builds use the docker executor b/c at time of writing the machine
-# executor is limited to only two cores and is painfully slow (4.5+ hours per
-# GPU build). But the docker executor cannot be run with --runtime=nvidia, and
-# so the binary test/upload jobs must run on a machine executor. The package
-# built in the build job is persisted to the workspace, which the test jobs
-# expect. The test jobs just run a few quick smoke tests (very similar to the
-# second-round-user-facing smoke tests above) and then upload the binaries to
-# their final locations. The upload part requires credentials that should only
-# be available to org-members.
-#
-# binary_checkout MUST be run before other commands here. This is because the
-# other commands are written in .circleci/scripts/*.sh , so the pytorch source
-# code must be downloaded on the machine before they can be run. We cannot
-# inline all the code into this file, since that would cause the yaml size to
-# explode past 4 MB (all the code in the command section is just copy-pasted to
-# everywhere in the .circleci/config.yml file where it appears).
-##############################################################################
-
-# Checks out the Pytorch and Builder repos (always both of them), and places
-# them in the right place depending on what executor we're running on. We curl
-# our .sh file from the interweb to avoid yaml size bloat. Note that many jobs
-# do not need both the pytorch and builder repos, so this is a little wasteful
-# (smoke tests and upload jobs do not need the pytorch repo).
-binary_checkout: &binary_checkout
-  name: Checkout pytorch/builder repo
-  no_output_timeout: "30m"
-  command: .circleci/scripts/binary_checkout.sh
-
-# Parses circleci arguments in a consistent way, essentially routing to the
-# correct pythonXgccXcudaXos build we want
-binary_populate_env: &binary_populate_env
-  name: Set up binary env variables
-  command: .circleci/scripts/binary_populate_env.sh
-
-binary_install_miniconda: &binary_install_miniconda
-  name: Install miniconda
-  no_output_timeout: "1h"
-  command: .circleci/scripts/binary_install_miniconda.sh
-
-# This section is used in the binary_test and smoke_test jobs. It expects
-# 'binary_populate_env' to have populated /home/circleci/project/env and it
-# expects another section to populate /home/circleci/project/ci_test_script.sh
-# with the code to run in the docker
-binary_run_in_docker: &binary_run_in_docker
-  name: Run in docker
-  # This step only runs on circleci linux machine executors that themselves
-  # need to start docker images
-  command: .circleci/scripts/binary_run_in_docker.sh
--- a/.circleci/verbatim-sources/workflows/workflows-nightly-uploads-header.yml
+++ b/.circleci/verbatim-sources/workflows/workflows-nightly-uploads-header.yml
@ -1,8 +0,0 @@
-      #- binary_linux_libtorch_3.6m_cpu_test:
-      #    requires:
-      #      - binary_linux_libtorch_3.6m_cpu_build
-      #- binary_linux_libtorch_3.6m_cu90_test:
-      #    requires:
-      #      - binary_linux_libtorch_3.6m_cu90_build
-
-      # Nightly uploads
--- a/.clang-tidy
+++ b/.clang-tidy
@ -52,6 +52,13 @@ modernize-*,
 -modernize-use-nodiscard,
 performance-*,
 readability-container-size-empty,
+readability-delete-null-pointer,
+readability-duplicate-include
+readability-misplaced-array-index,
+readability-redundant-function-ptr-dereference,
+readability-redundant-smartptr-get,
+readability-simplify-subscript-expr,
+readability-string-compare,
 '
 HeaderFilterRegex: '^(aten/|c10/|torch/).*$'
 AnalyzeTemporaryDtors: false
--- a/.flake8
+++ b/.flake8
@ -7,9 +7,7 @@ max-line-length = 120
 # C408 ignored because we like the dict keyword argument syntax
 # E501 is not flexible enough, we're using B950 instead
 ignore =
-    E203,E305,E402,E501,E721,E741,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303,
-    # fix these lints in the future
-    E275,
+    E203,E305,E402,E501,E721,E741,F405,F841,F999,W503,W504,C408,E302,W291,E303,
    # shebang has extra meaning in fbcode lints, so I think it's not worth trying
    # to line this up with executable bit
    EXE001,
@ -31,6 +29,8 @@ ignore =
    TOR102,
 per-file-ignores =
    __init__.py: F401
+    test/**: F821
+    test/**/__init__.py: F401,F821
    torch/utils/cpp_extension.py: B950
    torchgen/api/types/__init__.py: F401,F403
    torchgen/executorch/api/types/__init__.py: F401,F403
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@ -38,3 +38,5 @@ f70844bec783bfce43c950ccf180dc494e86f2bf
 e6ec0efaf87703c5f889cfc20b29be455885d58d
 # 2023-07-31 [optim][BE] split test file into logical parts: SWA, LR, optim
 a53cda1ddc15336dc1ff0ce1eff2a49cdc5f882e
+# 2024-01-02 clangformat: fused adam #116583
+9dc68d1aa9e554d09344a10fff69f7b50b2d23a0
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@ -3,6 +3,7 @@ self-hosted-runner:
    - linux.20_04.4x
    - linux.20_04.16x
    - linux.large
+    - linux.large.arc
    - linux.2xlarge
    - linux.4xlarge
    - linux.12xlarge
--- a/.github/actions/filter-test-configs/action.yml
+++ b/.github/actions/filter-test-configs/action.yml
@ -46,7 +46,8 @@ runs:
        retry_wait_seconds: 30
        command: |
          set -eux
-          python3 -m pip install requests==2.26.0 pyyaml==6.0
+          # PyYAML 6.0 doesn't work with MacOS x86 anymore
+          python3 -m pip install requests==2.26.0 pyyaml==6.0.1

    - name: Parse ref
      id: parse-ref
--- a/.github/actions/setup-xpu/action.yml
+++ b/.github/actions/setup-xpu/action.yml
@ -0,0 +1,67 @@
+name: Setup XPU host
+
+description: Set up XPU host for CI
+
+runs:
+  using: composite
+  steps:
+    - name: Clean all stopped docker containers
+      if: always()
+      shell: bash
+      run: |
+        # Prune all stopped containers.
+        # If other runner is pruning on this node, will skip.
+        nprune=$(ps -ef | grep -c "docker container prune")
+        if [[ $nprune -eq 1 ]]; then
+          docker container prune -f
+        fi
+
+    - name: Runner health check system info
+      if: always()
+      shell: bash
+      run: |
+        cat /etc/os-release || true
+        cat /etc/apt/sources.list.d/oneAPI.list || true
+        cat /etc/apt/sources.list.d/intel-gpu-jammy.list || true
+        whoami
+
+    - name: Runner health check xpu-smi
+      if: always()
+      shell: bash
+      run: |
+        xpu-smi discovery
+
+    - name: Runner health check GPU count
+      if: always()
+      shell: bash
+      run: |
+        ngpu=$(xpu-smi discovery | grep -c -E 'Device Name')
+        msg="Please file an issue on pytorch/pytorch reporting the faulty runner. Include a link to the runner logs so the runner can be identified"
+        if [[ $ngpu -eq 0 ]]; then
+          echo "Error: Failed to detect any GPUs on the runner"
+          echo "$msg"
+          exit 1
+        fi
+
+    - name: Runner diskspace health check
+      uses: ./.github/actions/diskspace-cleanup
+      if: always()
+
+    - name: Runner health check disconnect on failure
+      if: ${{ failure() }}
+      shell: bash
+      run: |
+        killall runsvc.sh
+
+    - name: Preserve github env variables for use in docker
+      shell: bash
+      run: |
+        env | grep '^GITHUB' >> "/tmp/github_env_${GITHUB_RUN_ID}"
+        env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"
+
+    - name: XPU set GPU_FLAG
+      shell: bash
+      run: |
+        # Add render group for container creation.
+        render_gid=`cat /etc/group | grep render | cut -d: -f3`
+        echo "GPU_FLAG=--device=/dev/mem --device=/dev/dri --group-add video --group-add $render_gid" >> "${GITHUB_ENV}"
--- a/.github/actions/teardown-xpu/action.yml
+++ b/.github/actions/teardown-xpu/action.yml
@ -0,0 +1,20 @@
+name: Teardown XPU host
+
+description: Tear down XPU host for CI
+
+runs:
+  using: composite
+  steps:
+    - name: Teardown XPU
+      if: always()
+      shell: bash
+      run: |
+        # Prune all stopped containers.
+        # If other runner is pruning on this node, will skip.
+        nprune=$(ps -ef | grep -c "docker container prune")
+        if [[ $nprune -eq 1 ]]; then
+          docker container prune -f
+        fi
+    - name: Runner diskspace health check
+      uses: ./.github/actions/diskspace-cleanup
+      if: always()
--- a/.github/auto_request_review.yml
+++ b/.github/auto_request_review.yml
@ -12,7 +12,6 @@ reviewers:
    symbolic-shapes:
      - symbolic-shapes
      - antoniojkim
-      - wconstab
      - SherlockNoMad
    Chillee:
      - ezyang
--- a/.github/ci_commit_pins/audio.txt
+++ b/.github/ci_commit_pins/audio.txt
@ -1 +1 @@
-6518fa9b2c74e84d7eb1fc6e3eb51e43213f0c05
+e3efbc2d9094685dd2d4ae143853941f82f167af
--- a/.github/ci_commit_pins/vision.txt
+++ b/.github/ci_commit_pins/vision.txt
@ -1 +1 @@
-c1e2095c3a16fbe7db25b9e2f206025488c2c203
+d23430765b5df76cd1267f438f129f51b7d6e3e1
--- a/.github/ci_commit_pins/xla.txt
+++ b/.github/ci_commit_pins/xla.txt
@ -1 +1 @@
-77b968a541b6d3062e81aafcc140dc20808703ae
+e1c94dfa5a74331a376537c23bf74a2c367f24bd
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@ -8,10 +8,6 @@
 - torch/_inductor/**
 - test/inductor/**

-"module: export":
- torch/_export/**
- test/export/**
-
 "ciflow/inductor":
 - torch/_decomp/**
 - torch/_dynamo/**
@ -23,8 +19,9 @@
 - torch/_subclasses/meta_utils.py
 - test/distributed/test_dynamo_distributed.py
 - test/distributed/test_inductor_collectives.py
- torch/_functorch/partitioners.py
+- torch/_functorch/_aot_autograd/**
 - torch/_functorch/aot_autograd.py
+- torch/_functorch/partitioners.py
 - .ci/docker/ci_commit_pins/**
 - .github/ci_commit_pins/**
 - c10/core/Sym*
@ -72,9 +69,13 @@
 "ciflow/trunk":
 - .ci/docker/ci_commit_pins/triton.txt

-"module: distributed":
+"oncall: distributed":
 - torch/csrc/distributed/**
 - torch/distributed/**
 - torch/nn/parallel/**
 - test/distributed/**
 - torch/testing/_internal/distributed/**
+
+"module: distributed_checkpoint":
+- torch/distributed/checkpoint/**
+- test/distributed/checkpoint/**
--- a/.github/merge_rules.yaml
+++ b/.github/merge_rules.yaml
@ -285,6 +285,7 @@
  - yhcharles
  - kiukchung
  - d4l3k
+  - shuqiangzhang
  mandatory_checks_name:
  - EasyCLA
  - Lint
@ -351,16 +352,22 @@
  - Lint
  - pull

- name: x86 CPU quantization
+- name: CPU inductor
  patterns:
-  - torch/ao/quantization/quantizer/x86_inductor_quantizer.py
+  - torch/_inductor/fx_passes/mkldnn_fusion.py
  - torch/_inductor/fx_passes/quantization.py
-  - test/quantization/core/test_quantized_op.py
+  - torch/_inductor/codegen/cpp.py
  - test/inductor/test_mkldnn_pattern_matcher.py
+  - test/inductor/test_cpu_repo.py
+  - test/inductor/test_cpu_cpp_wrapper.py
+  - aten/src/ATen/native/quantized/cpu/**
+  - test/quantization/core/test_quantized_op.py
+  - torch/ao/quantization/quantizer/x86_inductor_quantizer.py
  - test/quantization/pt2e/test_x86inductor_quantizer.py
  approved_by:
  - leslie-fang-intel
  - jgong5
+  - EikanWang
  mandatory_checks_name:
  - EasyCLA
  - Lint
--- a/.github/pytorch-probot.yml
+++ b/.github/pytorch-probot.yml
@ -14,6 +14,7 @@ ciflow_push_tags:
 - ciflow/slow
 - ciflow/trunk
 - ciflow/unstable
+- ciflow/xpu
 retryable_workflows:
 - lint
 - pull
--- a/.github/scripts/build_triton_wheel.py
+++ b/.github/scripts/build_triton_wheel.py
@ -10,6 +10,9 @@ from typing import Optional
 SCRIPT_DIR = Path(__file__).parent
 REPO_DIR = SCRIPT_DIR.parent.parent

+# TODO: Remove me once Triton version is again in sync for vanilla and ROCm
+ROCM_TRITION_VERSION = "2.1.0"
+

 def read_triton_pin(rocm_hash: bool = False) -> str:
    triton_file = "triton.txt" if not rocm_hash else "triton-rocm.txt"
@ -29,25 +32,37 @@ def check_and_replace(inp: str, src: str, dst: str) -> str:
    return inp.replace(src, dst)


-def patch_setup_py(path: Path, *, version: str, name: str = "triton") -> None:
+def patch_setup_py(
+    path: Path,
+    *,
+    version: str,
+    name: str = "triton",
+    expected_version: Optional[str] = None,
+) -> None:
    with open(path) as f:
        orig = f.read()
    # Replace name
    orig = check_and_replace(orig, 'name="triton",', f'name="{name}",')
    # Replace version
+    if not expected_version:
+        expected_version = read_triton_version()
    orig = check_and_replace(
-        orig, f'version="{read_triton_version()}",', f'version="{version}",'
+        orig, f'version="{expected_version}",', f'version="{version}",'
    )
    with open(path, "w") as f:
        f.write(orig)


-def patch_init_py(path: Path, *, version: str) -> None:
+def patch_init_py(
+    path: Path, *, version: str, expected_version: Optional[str] = None
+) -> None:
+    if not expected_version:
+        expected_version = read_triton_version()
    with open(path) as f:
        orig = f.read()
    # Replace version
    orig = check_and_replace(
-        orig, f"__version__ = '{read_triton_version()}'", f'__version__ = "{version}"'
+        orig, f"__version__ = '{expected_version}'", f'__version__ = "{version}"'
    )
    with open(path, "w") as f:
        f.write(orig)
@ -130,7 +145,7 @@ def build_triton(
                cwd=triton_basedir,
                env=env,
            )
-            conda_path = list(Path(tmpdir).glob("linux-64/torchtriton*.bz2"))[0]
+            conda_path = next(iter(Path(tmpdir).glob("linux-64/torchtriton*.bz2")))
            shutil.copy(conda_path, Path.cwd())
            return Path.cwd() / conda_path.name

@ -140,6 +155,7 @@ def build_triton(
        patch_init_py(
            triton_pythondir / "triton" / "__init__.py",
            version=f"{version}",
+            expected_version=ROCM_TRITION_VERSION if build_rocm else None,
        )

        if build_rocm:
@ -148,6 +164,7 @@ def build_triton(
                triton_pythondir / "setup.py",
                name=triton_pkg_name,
                version=f"{version}",
+                expected_version=ROCM_TRITION_VERSION,
            )
            check_call("scripts/amd/setup_rocm_libs.sh", cwd=triton_basedir, shell=True)
            print("ROCm libraries setup for triton installation...")
@ -156,7 +173,7 @@ def build_triton(
            [sys.executable, "setup.py", "bdist_wheel"], cwd=triton_pythondir, env=env
        )

-        whl_path = list((triton_pythondir / "dist").glob("*.whl"))[0]
+        whl_path = next(iter((triton_pythondir / "dist").glob("*.whl")))
        shutil.copy(whl_path, Path.cwd())

        if build_rocm:
--- a/.github/scripts/drci_mocks.json.gz
+++ b/.github/scripts/drci_mocks.json.gz
--- a/.github/scripts/generate_binary_build_matrix.py
+++ b/.github/scripts/generate_binary_build_matrix.py
@ -16,6 +16,12 @@ from typing import Dict, List, Optional, Tuple
 CUDA_ARCHES = ["11.8", "12.1"]


+CUDA_ARCHES_FULL_VERSION = {"11.8": "11.8.0", "12.1": "12.1.1"}
+
+
+CUDA_ARCHES_CUDNN_VERSION = {"11.8": "8", "12.1": "8"}
+
+
 ROCM_ARCHES = ["5.6", "5.7"]


@ -24,6 +30,7 @@ CPU_CXX11_ABI_ARCH = ["cpu-cxx11-abi"]

 CPU_AARCH64_ARCH = ["cpu-aarch64"]

+
 PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
    "11.8": (
        "nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | "  # noqa: B950
@ -86,7 +93,9 @@ def get_nccl_wheel_version(arch_version: str) -> str:
    requirements = map(
        str.strip, re.split("[;|]", PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version])
    )
-    return [x for x in requirements if x.startswith("nvidia-nccl-cu")][0].split("==")[1]
+    return next(x for x in requirements if x.startswith("nvidia-nccl-cu")).split("==")[
+        1
+    ]


 def validate_nccl_dep_consistency(arch_version: str) -> None:
--- a/.github/scripts/generate_docker_release_matrix.py
+++ b/.github/scripts/generate_docker_release_matrix.py
@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+
+"""Generates a matrix for docker releases through github actions
+
+Will output a condensed version of the matrix. Will include fllowing:
+    * CUDA version short
+    * CUDA full verison
+    * CUDNN version short
+    * Image type either runtime or devel
+    * Platform linux/arm64,linux/amd64
+
+"""
+
+import json
+from typing import Dict, List
+
+import generate_binary_build_matrix
+
+DOCKER_IMAGE_TYPES = ["runtime", "devel"]
+
+
+def generate_docker_matrix() -> Dict[str, List[Dict[str, str]]]:
+    ret: List[Dict[str, str]] = []
+    for cuda, version in generate_binary_build_matrix.CUDA_ARCHES_FULL_VERSION.items():
+        for image in DOCKER_IMAGE_TYPES:
+            ret.append(
+                {
+                    "cuda": cuda,
+                    "cuda_full_version": version,
+                    "cudnn_version": generate_binary_build_matrix.CUDA_ARCHES_CUDNN_VERSION[
+                        cuda
+                    ],
+                    "image_type": image,
+                    "platform": "linux/arm64,linux/amd64",
+                }
+            )
+    return {"include": ret}
+
+
+if __name__ == "__main__":
+    build_matrix = generate_docker_matrix()
+    print(json.dumps(build_matrix))
--- a/.github/scripts/gitutils.py
+++ b/.github/scripts/gitutils.py
@ -145,6 +145,16 @@ class GitRepo:
        rc = self._run_git("rev-list", revision_range, "--", ".").strip()
        return rc.split("\n") if len(rc) > 0 else []

+    def branches_containing_ref(
+        self, ref: str, *, include_remote: bool = True
+    ) -> List[str]:
+        rc = (
+            self._run_git("branch", "--remote", "--contains", ref)
+            if include_remote
+            else self._run_git("branch", "--contains", ref)
+        )
+        return [x.strip() for x in rc.split("\n") if x.strip()] if len(rc) > 0 else []
+
    def current_branch(self) -> str:
        return self._run_git("symbolic-ref", "--short", "HEAD").strip()

@ -387,13 +397,28 @@ def _shasum(value: str) -> str:
    return m.hexdigest()


-def are_ghstack_branches_in_sync(repo: GitRepo, head_ref: str) -> bool:
+def is_commit_hash(ref: str) -> bool:
+    "True if ref is hexadecimal number, else false"
+    try:
+        int(ref, 16)
+    except ValueError:
+        return False
+    return True
+
+
+def are_ghstack_branches_in_sync(
+    repo: GitRepo, head_ref: str, base_ref: Optional[str] = None
+) -> bool:
    """Checks that diff between base and head is the same as diff between orig and its parent"""
    orig_ref = re.sub(r"/head$", "/orig", head_ref)
-    base_ref = re.sub(r"/head$", "/base", head_ref)
+    if base_ref is None:
+        base_ref = re.sub(r"/head$", "/base", head_ref)
    orig_diff_sha = _shasum(repo.diff(f"{repo.remote}/{orig_ref}"))
    head_diff_sha = _shasum(
-        repo.diff(f"{repo.remote}/{base_ref}", f"{repo.remote}/{head_ref}")
+        repo.diff(
+            base_ref if is_commit_hash(base_ref) else f"{repo.remote}/{base_ref}",
+            f"{repo.remote}/{head_ref}",
+        )
    )
    return orig_diff_sha == head_diff_sha

--- a/.github/scripts/gql_mocks.json.gz
+++ b/.github/scripts/gql_mocks.json.gz
--- a/.github/scripts/label_utils.py
+++ b/.github/scripts/label_utils.py
@ -44,6 +44,10 @@ def get_last_page_num_from_header(header: Any) -> int:
    # Link info looks like: <https://api.github.com/repositories/65600975/labels?per_page=100&page=2>;
    # rel="next", <https://api.github.com/repositories/65600975/labels?per_page=100&page=3>; rel="last"
    link_info = header["link"]
+    # Docs does not specify that it should be present for projects with just few labels
+    # And https://github.com/malfet/deleteme/actions/runs/7334565243/job/19971396887 it's not the case
+    if link_info is None:
+        return 1
    prefix = "&page="
    suffix = ">;"
    return int(
--- a/.github/scripts/test_trymerge.py
+++ b/.github/scripts/test_trymerge.py
@ -32,7 +32,6 @@ from trymerge import (
    main as trymerge_main,
    MandatoryChecksMissingError,
    MergeRule,
-    PostCommentError,
    RE_GHSTACK_DESC,
    read_merge_rules,
    remove_job_name_suffix,
@ -222,6 +221,31 @@ def mocked_read_merge_rules(repo: Any, org: str, project: str) -> List[MergeRule
    ]


+def mocked_read_merge_rules_approvers(
+    repo: Any, org: str, project: str
+) -> List[MergeRule]:
+    return [
+        MergeRule(
+            name="Core Reviewers",
+            patterns=["*"],
+            approved_by=["1", "2", "3", "4", "5", "6"],
+            mandatory_checks_name=[
+                "Lint",
+                "pull",
+            ],
+        ),
+        MergeRule(
+            name="Core Maintainers",
+            patterns=["*"],
+            approved_by=["1", "2", "malfet"],
+            mandatory_checks_name=[
+                "Lint",
+                "pull",
+            ],
+        ),
+    ]
+
+
 def mocked_read_merge_rules_raise(repo: Any, org: str, project: str) -> List[MergeRule]:
    raise RuntimeError("testing")

@ -287,6 +311,27 @@ class TestTryMerge(TestCase):
            RuntimeError, "testing", lambda: find_matching_merge_rule(pr, repo)
        )

+    @mock.patch(
+        "trymerge.read_merge_rules", side_effect=mocked_read_merge_rules_approvers
+    )
+    def test_match_rules_approvers(self, *args: Any) -> None:
+        "Tests that PR has the necessary approvers"
+        repo = DummyGitRepo()
+
+        pr = GitHubPR("pytorch", "pytorch", 115329)
+        # Test that all potential approvers across all rules are listed if the
+        # PR doesn't have one of them
+        for mock_rule in ["Core Reviewers", "Core Maintainers"]:
+            self.assertRaisesRegex(
+                RuntimeError,
+                mock_rule,
+                lambda: find_matching_merge_rule(pr, repo),
+            )
+
+        pr = GitHubPR("pytorch", "pytorch", 115495)
+        # Test that PR with the correct approvers doesn't raise any exception
+        self.assertTrue(find_matching_merge_rule(pr, repo) is not None)
+
    @mock.patch("trymerge.read_merge_rules", side_effect=mocked_read_merge_rules)
    def test_lint_fails(self, *args: Any) -> None:
        "Tests that PR fails mandatory lint check"
@ -470,20 +515,6 @@ class TestTryMerge(TestCase):

        self.assertEqual(len(changed_files), pr.get_changed_files_count())

-    def test_revert_codev_fails(self, *args: Any) -> None:
-        pr = GitHubPR("pytorch", "pytorch", 91340)
-
-        class GitRepoCoDev(DummyGitRepo):
-            def commit_message(self, ref: str) -> str:
-                return pr.get_body()
-
-        repo = GitRepoCoDev()
-        self.assertRaisesRegex(
-            PostCommentError,
-            "landed via phabricator",
-            lambda: validate_revert(repo, pr, comment_id=1372496233),
-        )
-
    def test_revert_codev_abandoned_diff_succeeds(self, *args: Any) -> None:
        pr = GitHubPR("pytorch", "pytorch", 100652)

--- a/.github/scripts/trymerge.py
+++ b/.github/scripts/trymerge.py
@ -20,7 +20,18 @@ from collections import defaultdict
 from dataclasses import dataclass
 from functools import lru_cache
 from pathlib import Path
-from typing import Any, Callable, cast, Dict, List, NamedTuple, Optional, Pattern, Tuple
+from typing import (
+    Any,
+    Callable,
+    cast,
+    Dict,
+    Iterable,
+    List,
+    NamedTuple,
+    Optional,
+    Pattern,
+    Tuple,
+)
 from warnings import warn

 import yaml
@ -612,19 +623,14 @@ def can_skip_internal_checks(pr: "GitHubPR", comment_id: Optional[int] = None) -
    return comment.author_login == "facebook-github-bot"


-def get_ghstack_prs(
-    repo: GitRepo, pr: "GitHubPR", open_only: bool = True
+def _revlist_to_prs(
+    repo: GitRepo,
+    pr: "GitHubPR",
+    rev_list: Iterable[str],
+    should_skip: Optional[Callable[[int, "GitHubPR"], bool]] = None,
 ) -> List[Tuple["GitHubPR", str]]:
-    """
-    Get the PRs in the stack that are below this PR (inclusive).  Throws error if any of the open PRs are out of sync.
-    @:param open_only: Only return open PRs
-    """
-    assert pr.is_ghstack_pr()
-    entire_stack: List[Tuple[GitHubPR, str]] = []
-    # For ghstack, cherry-pick commits based from origin
-    orig_ref = f"{repo.remote}/{re.sub(r'/head$', '/orig', pr.head_ref())}"
-    rev_list = repo.revlist(f"{pr.default_branch()}..{orig_ref}")
-    for idx, rev in enumerate(reversed(rev_list)):
+    rc: List[Tuple[GitHubPR, str]] = []
+    for idx, rev in enumerate(rev_list):
        msg = repo.commit_message(rev)
        m = RE_PULL_REQUEST_RESOLVED.search(msg)
        if m is None:
@ -635,25 +641,48 @@ def get_ghstack_prs(
            raise RuntimeError(
                f"PR {m.group('number')} resolved to wrong owner/repo pair"
            )
-        stacked_pr_num = int(m.group("number"))
-        if stacked_pr_num != pr.pr_num:
-            stacked_pr = GitHubPR(pr.org, pr.project, stacked_pr_num)
-            if open_only and stacked_pr.is_closed():
-                print(
-                    f"Skipping {idx+1} of {len(rev_list)} PR (#{stacked_pr_num}) as its already been merged"
-                )
-                continue
-            entire_stack.append((stacked_pr, rev))
-        else:
-            entire_stack.append((pr, rev))
+        pr_num = int(m.group("number"))
+        candidate = GitHubPR(pr.org, pr.project, pr_num) if pr_num != pr.pr_num else pr
+        if should_skip is not None and should_skip(idx, candidate):
+            continue
+        rc.append((candidate, rev))
+    return rc
+
+
+def get_ghstack_prs(
+    repo: GitRepo, pr: "GitHubPR", open_only: bool = True
+) -> List[Tuple["GitHubPR", str]]:
+    """
+    Get the PRs in the stack that are below this PR (inclusive).  Throws error if any of the open PRs are out of sync.
+    @:param open_only: Only return open PRs
+    """
+    # For ghstack, cherry-pick commits based from origin
+    orig_ref = f"{repo.remote}/{pr.get_ghstack_orig_ref()}"
+    rev_list = repo.revlist(f"{pr.default_branch()}..{orig_ref}")
+
+    def skip_func(idx: int, candidate: "GitHubPR") -> bool:
+        if not open_only or not candidate.is_closed():
+            return False
+        print(
+            f"Skipping {idx+1} of {len(rev_list)} PR (#{candidate.pr_num}) as its already been merged"
+        )
+        return True
+
+    assert pr.is_ghstack_pr()
+    entire_stack = _revlist_to_prs(repo, pr, reversed(rev_list), skip_func)

    for stacked_pr, rev in entire_stack:
        if stacked_pr.is_closed():
            continue
-        if not are_ghstack_branches_in_sync(repo, stacked_pr.head_ref()):
+        base_ref = stacked_pr.base_ref()
+        if base_ref == pr.default_branch():
+            base_ref = repo.get_merge_base(
+                f"{repo.remote}/{base_ref}", f"{repo.remote}/{stacked_pr.head_ref()}"
+            )
+        if not are_ghstack_branches_in_sync(repo, stacked_pr.head_ref(), base_ref):
            raise RuntimeError(
                f"PR {stacked_pr.pr_num} is out of sync with the corresponding revision {rev} on "
-                + f"branch {orig_ref} that would be merged into main.  "
+                + f"branch {stacked_pr.get_ghstack_orig_ref()} that would be merged into {stacked_pr.default_branch()}.  "
                + "This usually happens because there is a non ghstack change in the PR.  "
                + f"Please sync them and try again (ex. make the changes on {orig_ref} and run ghstack)."
            )
@ -694,6 +723,10 @@ class GitHubPR:
    def is_ghstack_pr(self) -> bool:
        return RE_GHSTACK_HEAD_REF.match(self.head_ref()) is not None

+    def get_ghstack_orig_ref(self) -> str:
+        assert self.is_ghstack_pr()
+        return re.sub(r"/head$", "/orig", self.head_ref())
+
    def is_base_repo_private(self) -> bool:
        return bool(self.info["baseRepository"]["isPrivate"])

@ -1288,6 +1321,9 @@ def find_matching_merge_rule(
        ignore_current_checks=ignore_current_checks,
    )

+    # This keeps the list of all approvers that could stamp the change
+    all_rule_approvers = {}
+
    # PRs can fail multiple merge rules, but it only needs to pass one rule to be approved.
    # If it fails all rules, we need to find the rule that it came closest to passing and report
    # that to the dev.
@ -1331,24 +1367,31 @@ def find_matching_merge_rule(
            continue

        # Does the PR have the required approvals for this rule?
-        rule_approvers_set = set()
+        rule_approvers = set()
        for approver in rule.approved_by:
            if "/" in approver:
                org, name = approver.split("/")
-                rule_approvers_set.update(gh_get_team_members(org, name))
+                rule_approvers.update(gh_get_team_members(org, name))
            else:
-                rule_approvers_set.add(approver)
-        approvers_intersection = approved_by.intersection(rule_approvers_set)
+                rule_approvers.add(approver)
+        approvers_intersection = approved_by.intersection(rule_approvers)
        # If rule requires approvers but they aren't the ones that reviewed PR
-        if len(approvers_intersection) == 0 and len(rule_approvers_set) > 0:
-            if reject_reason_score < 10000:
+        if len(approvers_intersection) == 0 and len(rule_approvers) > 0:
+            # Less than or equal is intentionally used here to gather all potential
+            # approvers
+            if reject_reason_score <= 10000:
                reject_reason_score = 10000
-                reject_reason = "\n".join(
-                    (
-                        "Approval needed from one of the following:",
-                        f"{', '.join(list(rule_approvers_set)[:5])}{', ...' if len(rule_approvers_set) > 5 else ''}",
-                    )
-                )
+
+                all_rule_approvers[rule.name] = rule.approved_by
+                # Prepare the reject reason
+                all_rule_approvers_msg = [
+                    f"- {name} ({', '.join(approved_by[:5])}{', ...' if len(approved_by) > 5 else ''})"
+                    for name, approved_by in all_rule_approvers.items()
+                ]
+
+                reject_reason = "Approvers from one of the following sets are needed:\n"
+                reject_reason += "\n".join(all_rule_approvers_msg)
+
            continue

        # Does the PR pass the checks required by this rule?
@ -1722,6 +1765,16 @@ def filter_checks_with_lambda(
    return [check for check in checks.values() if status_filter(check.status)]


+def get_pr_commit_sha(repo: GitRepo, pr: GitHubPR) -> str:
+    commit_sha = pr.get_merge_commit()
+    if commit_sha is not None:
+        return commit_sha
+    commits = repo.commits_resolving_gh_pr(pr.pr_num)
+    if len(commits) == 0:
+        raise PostCommentError("Can't find any commits resolving PR")
+    return commits[0]
+
+
 def validate_revert(
    repo: GitRepo, pr: GitHubPR, *, comment_id: Optional[int] = None
 ) -> Tuple[str, str]:
@ -1743,32 +1796,98 @@ def validate_revert(
            f"Will not revert as @{author_login} is not one of "
            f"[{', '.join(allowed_reverters)}], but instead is {author_association}."
        )
-    skip_internal_checks = can_skip_internal_checks(pr, comment_id)
-
-    # Ignore associated diff it PR does not have internal changes
-    if pr.has_no_connected_diff():
-        skip_internal_checks = True

    # Raises exception if matching rule is not found, but ignores all status checks
    find_matching_merge_rule(
-        pr, repo, skip_mandatory_checks=True, skip_internal_checks=skip_internal_checks
+        pr, repo, skip_mandatory_checks=True, skip_internal_checks=True
    )
-    commit_sha = pr.get_merge_commit()
-    if commit_sha is None:
-        commits = repo.commits_resolving_gh_pr(pr.pr_num)
-        if len(commits) == 0:
-            raise PostCommentError("Can't find any commits resolving PR")
-        commit_sha = commits[0]
-    msg = repo.commit_message(commit_sha)
-    rc = RE_DIFF_REV.search(msg)
-    if rc is not None and not skip_internal_checks:
-        raise PostCommentError(
-            f"Can't revert PR that was landed via phabricator as {rc.group(1)}.  "
-            + "Please revert by going to the internal diff and clicking Unland."
-        )
+    commit_sha = get_pr_commit_sha(repo, pr)
    return (author_login, commit_sha)


+def get_ghstack_dependent_prs(
+    repo: GitRepo, pr: GitHubPR, only_closed: bool = True
+) -> List[Tuple[str, GitHubPR]]:
+    """
+    Get the PRs in the stack that are above this PR (inclusive).
+    Throws error if stack have branched or original branches are gone
+    """
+    assert pr.is_ghstack_pr()
+    orig_ref = f"{repo.remote}/{pr.get_ghstack_orig_ref()}"
+    rev_list = repo.revlist(f"{pr.default_branch()}..{orig_ref}")
+    if len(rev_list) == 0:
+        raise RuntimeError(
+            f"PR {pr.pr_num} does not have any revisions associated with it"
+        )
+    skip_len = len(rev_list) - 1
+    for branch in repo.branches_containing_ref(orig_ref):
+        candidate = repo.revlist(f"{pr.default_branch()}..{branch}")
+        # Pick longest candidate
+        if len(candidate) > len(rev_list):
+            candidate, rev_list = rev_list, candidate
+        # Validate that candidate always ends rev-list
+        if rev_list[-len(candidate) :] != candidate:
+            raise RuntimeError(
+                f"Branch {branch} revlist {', '.join(candidate)} is not a subset of {', '.join(rev_list)}"
+            )
+    # Remove commits original PR depends on
+    if skip_len > 0:
+        rev_list = rev_list[:-skip_len]
+    rc: List[Tuple[str, GitHubPR]] = []
+    for pr_, sha in _revlist_to_prs(repo, pr, rev_list):
+        if not pr_.is_closed():
+            if not only_closed:
+                rc.append(("", pr_))
+            continue
+        commit_sha = get_pr_commit_sha(repo, pr_)
+        rc.append((commit_sha, pr_))
+    return rc
+
+
+def do_revert_prs(
+    repo: GitRepo,
+    shas_and_prs: List[Tuple[str, GitHubPR]],
+    *,
+    author_login: str,
+    extra_msg: str = "",
+    skip_internal_checks: bool = False,
+    dry_run: bool = False,
+) -> None:
+    # Prepare and push revert commits
+    commit_shas: List[str] = []
+    for commit_sha, pr in shas_and_prs:
+        revert_msg = f"\nReverted {pr.get_pr_url()} on behalf of {prefix_with_github_url(author_login)}"
+        revert_msg += extra_msg
+        repo.checkout(pr.default_branch())
+        repo.revert(commit_sha)
+        msg = repo.commit_message("HEAD")
+        msg = re.sub(RE_PULL_REQUEST_RESOLVED, "", msg)
+        msg += revert_msg
+        repo.amend_commit_message(msg)
+    repo.push(shas_and_prs[0][1].default_branch(), dry_run)
+
+    # Comment/reopen PRs
+    for commit_sha, pr in shas_and_prs:
+        revert_message = (
+            f"@{pr.get_pr_creator_login()} your PR has been successfully reverted."
+        )
+        if (
+            pr.has_internal_changes()
+            and not pr.has_no_connected_diff()
+            and not skip_internal_checks
+        ):
+            revert_message += "\n:warning: This PR might contain internal changes"
+            revert_message += "\ncc: @pytorch/pytorch-dev-infra"
+        gh_post_pr_comment(
+            pr.org, pr.project, pr.pr_num, revert_message, dry_run=dry_run
+        )
+
+        if not dry_run:
+            pr.add_numbered_label("reverted")
+            gh_post_commit_comment(pr.org, pr.project, commit_sha, revert_msg)
+            gh_update_pr_state(pr.org, pr.project, pr.pr_num)
+
+
 def try_revert(
    repo: GitRepo,
    pr: GitHubPR,
@ -1777,34 +1896,37 @@ def try_revert(
    comment_id: Optional[int] = None,
    reason: Optional[str] = None,
 ) -> None:
-    def post_comment(msg: str) -> None:
-        gh_post_pr_comment(pr.org, pr.project, pr.pr_num, msg, dry_run=dry_run)
-
    try:
        author_login, commit_sha = validate_revert(repo, pr, comment_id=comment_id)
    except PostCommentError as e:
-        return post_comment(str(e))
-    revert_msg = f"\nReverted {pr.get_pr_url()} on behalf of {prefix_with_github_url(author_login)}"
-    revert_msg += f" due to {reason}" if reason is not None else ""
-    revert_msg += (
+        gh_post_pr_comment(pr.org, pr.project, pr.pr_num, str(e), dry_run=dry_run)
+        return
+
+    extra_msg = f" due to {reason}" if reason is not None else ""
+    extra_msg += (
        f" ([comment]({pr.get_comment_by_id(comment_id).url}))\n"
        if comment_id is not None
        else "\n"
    )
-    repo.checkout(pr.default_branch())
-    repo.revert(commit_sha)
-    msg = repo.commit_message("HEAD")
-    msg = re.sub(RE_PULL_REQUEST_RESOLVED, "", msg)
-    msg += revert_msg
-    repo.amend_commit_message(msg)
-    repo.push(pr.default_branch(), dry_run)
-    post_comment(
-        f"@{pr.get_pr_creator_login()} your PR has been successfully reverted."
+    shas_and_prs = [(commit_sha, pr)]
+    if pr.is_ghstack_pr():
+        try:
+            shas_and_prs = get_ghstack_dependent_prs(repo, pr)
+            prs_to_revert = " ".join([t[1].get_pr_url() for t in shas_and_prs])
+            print(f"About to stack of PRs: {prs_to_revert}")
+        except Exception as e:
+            print(
+                f"Failed to fetch dependent PRs: {str(e)}, fall over to single revert"
+            )
+
+    do_revert_prs(
+        repo,
+        shas_and_prs,
+        author_login=author_login,
+        extra_msg=extra_msg,
+        dry_run=dry_run,
+        skip_internal_checks=can_skip_internal_checks(pr, comment_id),
    )
-    if not dry_run:
-        pr.add_numbered_label("reverted")
-        gh_post_commit_comment(pr.org, pr.project, commit_sha, revert_msg)
-        gh_update_pr_state(pr.org, pr.project, pr.pr_num)


 def prefix_with_github_url(suffix_str: str) -> str:
--- a/.github/workflows/_android-build-test.yml
+++ b/.github/workflows/_android-build-test.yml
@ -29,6 +29,7 @@ env:

 jobs:
  filter:
+    if: github.repository_owner == 'pytorch'
    runs-on: [self-hosted, linux.large]
    outputs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
--- a/.github/workflows/_android-full-build-test.yml
+++ b/.github/workflows/_android-full-build-test.yml
@ -29,6 +29,7 @@ env:

 jobs:
  filter:
+    if: github.repository_owner == 'pytorch'
    runs-on: [self-hosted, linux.large]
    outputs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
--- a/.github/workflows/_bazel-build-test.yml
+++ b/.github/workflows/_bazel-build-test.yml
@ -33,6 +33,7 @@ env:

 jobs:
  filter:
+    if: github.repository_owner == 'pytorch'
    runs-on: [self-hosted, linux.large]
    outputs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
@ -121,8 +122,6 @@ jobs:
          GITHUB_RUN_NUMBER: ${{ github.run_number }}
          GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
          JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
-          PYTORCH_RETRY_TEST_CASES: 1
-          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
          REENABLED_ISSUES: ${{ needs.filter.outputs.reenabled-issues }}
          # TODO duplicated
          AWS_DEFAULT_REGION: us-east-1
@ -159,8 +158,6 @@ jobs:
            -e TORCH_CUDA_ARCH_LIST \
            -e OUR_GITHUB_JOB_ID \
            -e CUDA_VERSION \
-            -e PYTORCH_RETRY_TEST_CASES \
-            -e PYTORCH_OVERRIDE_FLAKY_SIGNAL \
            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
            --security-opt seccomp=unconfined \
            --cap-add=SYS_PTRACE \
--- a/.github/workflows/_buck-build-test.yml
+++ b/.github/workflows/_buck-build-test.yml
@ -15,6 +15,7 @@ defaults:

 jobs:
  filter:
+    if: github.repository_owner == 'pytorch'
    runs-on: [self-hosted, linux.large]
    outputs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
--- a/.github/workflows/_ios-build-test.yml
+++ b/.github/workflows/_ios-build-test.yml
@ -38,6 +38,7 @@ env:

 jobs:
  filter:
+    if: github.repository_owner == 'pytorch'
    runs-on: [self-hosted, linux.large]
    outputs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
--- a/.github/workflows/_linux-test.yml
+++ b/.github/workflows/_linux-test.yml
@ -164,8 +164,6 @@ jobs:
          BRANCH: ${{ steps.parse-ref.outputs.branch }}
          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
          BASE_SHA: ${{ github.event.pull_request.base.sha || github.sha }}
-          PYTORCH_RETRY_TEST_CASES: 1
-          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
          TEST_CONFIG: ${{ matrix.config }}
          SHARD_NUMBER: ${{ matrix.shard }}
          NUM_TEST_SHARDS: ${{ matrix.num_shards }}
@ -209,6 +207,7 @@ jobs:
            -e GITHUB_RUN_NUMBER \
            -e GITHUB_RUN_ATTEMPT \
            -e JOB_ID \
+            -e JOB_NAME \
            -e BASE_SHA \
            -e BRANCH \
            -e SHA1 \
@ -219,8 +218,6 @@ jobs:
            -e NUM_TEST_SHARDS \
            -e REENABLED_ISSUES \
            -e CONTINUE_THROUGH_ERROR \
-            -e PYTORCH_RETRY_TEST_CASES \
-            -e PYTORCH_OVERRIDE_FLAKY_SIGNAL \
            -e PR_LABELS \
            -e MAX_JOBS="$(nproc --ignore=2)" \
            -e SCCACHE_BUCKET \
--- a/.github/workflows/_mac-test-mps.yml
+++ b/.github/workflows/_mac-test-mps.yml
@ -28,6 +28,7 @@ on:

 jobs:
  filter:
+    if: github.repository_owner == 'pytorch'
    runs-on: [self-hosted, linux.large]
    outputs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
@ -58,7 +59,6 @@ jobs:
    runs-on: ${{ matrix.runner }}
    steps:
      - name: Print runner OS/HW info
-        shell: arch -arch arm64 bash {0}
        run: |
          sysctl machdep.cpu.brand_string kern.osproductversion

@ -69,7 +69,6 @@ jobs:
          quiet-checkout: true

      - name: Clean checkout
-        shell: arch -arch arm64 bash {0}
        run: |
          git clean -fxd

@ -95,12 +94,9 @@ jobs:
          ENV_NAME: conda-test-env-${{ github.run_id }}
          PY_VERS: 3.9
          PR_BODY: ${{ github.event.pull_request.body }}
-          PYTORCH_RETRY_TEST_CASES: 1
-          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
          CONTINUE_THROUGH_ERROR: ${{ needs.filter.outputs.keep-going }}
          PIP_REQUIREMENTS_FILE: .github/requirements/pip-requirements-${{ runner.os }}.txt
          REENABLED_ISSUES: ${{ needs.filter.outputs.reenabled-issues }}
-        shell: arch -arch arm64 bash {0}
        run: |
          # shellcheck disable=SC1090
          set -ex
--- a/.github/workflows/_mac-test.yml
+++ b/.github/workflows/_mac-test.yml
@ -57,9 +57,11 @@ jobs:
      SHARD_NUMBER: ${{ matrix.shard }}
      NUM_TEST_SHARDS: ${{ matrix.num_shards }}
      PR_BODY: ${{ github.event.pull_request.body }}
-      PYTORCH_RETRY_TEST_CASES: 1
-      PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
    steps:
+      - name: Print runner OS/HW info
+        run: |
+          sysctl machdep.cpu.brand_string kern.osproductversion
+
      - name: Clean up leftover processes on MacOS pet runner
        continue-on-error: true
        run: |
@ -76,8 +78,6 @@ jobs:
            rm -rf "${dir}"
          done

-
-
      - name: Clean up disk space before running MacOS workflow
        uses: pytorch/test-infra/.github/actions/check-disk-space@main

--- a/.github/workflows/_rocm-test.yml
+++ b/.github/workflows/_rocm-test.yml
@ -131,8 +131,6 @@ jobs:
          JOB_NAME: ${{ steps.get-job-id.outputs.job-name }}
          BRANCH: ${{ steps.parse-ref.outputs.branch }}
          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-          PYTORCH_RETRY_TEST_CASES: 1
-          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
          CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
          TEST_CONFIG: ${{ matrix.config }}
          SHARD_NUMBER: ${{ matrix.shard }}
@ -172,6 +170,7 @@ jobs:
            -e GITHUB_RUN_NUMBER \
            -e GITHUB_RUN_ATTEMPT \
            -e JOB_ID \
+            -e JOB_NAME \
            -e BRANCH \
            -e SHA1 \
            -e AWS_DEFAULT_REGION \
@ -180,8 +179,6 @@ jobs:
            -e TEST_CONFIG \
            -e NUM_TEST_SHARDS \
            -e REENABLED_ISSUES \
-            -e PYTORCH_RETRY_TEST_CASES \
-            -e PYTORCH_OVERRIDE_FLAKY_SIGNAL \
            -e CONTINUE_THROUGH_ERROR \
            -e MAX_JOBS="$(nproc --ignore=2)" \
            -e SCCACHE_BUCKET \
--- a/.github/workflows/_run_android_tests.yml
+++ b/.github/workflows/_run_android_tests.yml
@ -15,6 +15,7 @@ defaults:

 jobs:
  filter:
+    if: github.repository_owner == 'pytorch'
    runs-on: [self-hosted, linux.large]
    outputs:
      test-matrix: ${{ steps.filter.outputs.test-matrix }}
--- a/.github/workflows/_win-test.yml
+++ b/.github/workflows/_win-test.yml
@ -139,8 +139,6 @@ jobs:
          USE_CUDA: ${{ inputs.cuda-version != 'cpu' && '1' || '0' }}
          INSTALL_WINDOWS_SDK: 1
          PYTHON_VERSION: 3.8
-          PYTORCH_RETRY_TEST_CASES: 1
-          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
          CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
          VC_PRODUCT: "BuildTools"
          VC_VERSION: ""
--- a/.github/workflows/_xpu-test.yml
+++ b/.github/workflows/_xpu-test.yml
@ -0,0 +1,269 @@
+# TODO: this looks sort of similar to _linux-test, but there are like a dozen
+# places where you would have to insert an if statement. Probably it's better to
+# just use a different workflow altogether
+
+name: xpu-test
+
+on:
+  workflow_call:
+    inputs:
+      build-environment:
+        required: true
+        type: string
+        description: Top-level label for what's being built/tested.
+      test-matrix:
+        required: true
+        type: string
+        description: JSON description of what test configs to run.
+      docker-image:
+        required: true
+        type: string
+        description: Docker image to run in.
+      sync-tag:
+        required: false
+        type: string
+        default: ""
+        description: |
+          If this is set, our linter will use this to make sure that every other
+          job with the same `sync-tag` is identical.
+      timeout-minutes:
+        required: false
+        type: number
+        default: 300
+        description: |
+          Set the maximum (in minutes) how long the workflow should take to finish
+      tests-to-include:
+        required: false
+        type: string
+        default: ""
+        description: |
+          List of tests to include (empty string implies default list)
+
+env:
+  GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
+
+permissions:
+  id-token: write
+  contents: read
+
+jobs:
+  test:
+    # Don't run on forked repos or empty test matrix
+    if: github.repository_owner == 'pytorch' && toJSON(fromJSON(inputs.test-matrix).include) != '[]'
+    strategy:
+      matrix: ${{ fromJSON(inputs.test-matrix) }}
+      fail-fast: false
+    timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
+    runs-on: ${{ matrix.runner }}
+    steps:
+      # [see note: pytorch repo ref]
+      - name: Checkout PyTorch
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+
+      - name: Setup XPU
+        uses: ./.github/actions/setup-xpu
+
+      - name: configure aws credentials
+        id: aws_creds
+        uses: aws-actions/configure-aws-credentials@v1.7.0
+        with:
+          role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_pytorch_artifacts
+          aws-region: us-east-1
+
+      - name: Login to Amazon ECR
+        id: login-ecr
+        uses: aws-actions/amazon-ecr-login@v2
+
+      - name: Calculate docker image
+        id: calculate-docker-image
+        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+        with:
+          docker-image-name: ${{ inputs.docker-image }}
+
+      - name: Pull docker image
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        with:
+          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
+
+      - name: Start monitoring script
+        id: monitor-script
+        shell: bash
+        continue-on-error: true
+        run: |
+          python3 -m pip install psutil==5.9.1 nvidia-ml-py==11.525.84
+          python3 -m tools.stats.monitor > usage_log.txt 2>&1 &
+          echo "monitor-script-pid=${!}" >> "${GITHUB_OUTPUT}"
+
+      - name: Download build artifacts
+        uses: ./.github/actions/download-build-artifacts
+        with:
+          name: ${{ inputs.build-environment }}
+
+      - name: Parse ref
+        id: parse-ref
+        run: .github/scripts/parse_ref.py
+
+      - name: Get workflow job id
+        id: get-job-id
+        uses: ./.github/actions/get-workflow-job-id
+        if: always()
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Check for keep-going label and re-enabled test issues
+        # This uses the filter-test-configs action because it conviniently
+        # checks for labels and re-enabled test issues.  It does not actually do
+        # any filtering.  All filtering is done in the build step.
+        id: keep-going
+        uses: ./.github/actions/filter-test-configs
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          test-matrix: ${{ inputs.test-matrix }}
+          job-name: ${{ steps.get-job-id.outputs.job-name }}
+
+      - name: Set Test step time
+        id: test-timeout
+        shell: bash
+        env:
+          JOB_TIMEOUT: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
+        run: |
+          echo "timeout=$((JOB_TIMEOUT-30))" >> "${GITHUB_OUTPUT}"
+
+      - name: Test
+        id: test
+        env:
+          BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          GITHUB_REPOSITORY: ${{ github.repository }}
+          GITHUB_WORKFLOW: ${{ github.workflow }}
+          GITHUB_JOB: ${{ github.job }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
+          GITHUB_RUN_NUMBER: ${{ github.run_number }}
+          GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
+          JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
+          JOB_NAME: ${{ steps.get-job-id.outputs.job-name }}
+          BRANCH: ${{ steps.parse-ref.outputs.branch }}
+          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
+          PYTORCH_RETRY_TEST_CASES: 1
+          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
+          CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
+          TEST_CONFIG: ${{ matrix.config }}
+          SHARD_NUMBER: ${{ matrix.shard }}
+          NUM_TEST_SHARDS: ${{ matrix.num_shards }}
+          REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }}
+          SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
+          DOCKER_IMAGE: ${{ inputs.docker-image }}
+          XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
+          PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
+          PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
+          TESTS_TO_INCLUDE: ${{ inputs.tests-to-include }}
+        timeout-minutes: ${{ fromJson(steps.test-timeout.outputs.timeout) }}
+        run: |
+          set -x
+
+          TEST_COMMAND=.ci/pytorch/test.sh
+
+          # detached container should get cleaned up by teardown_ec2_linux
+          # Used for GPU_FLAG since that doesn't play nice
+          # shellcheck disable=SC2086,SC2090
+          container_name=$(docker run \
+            ${GPU_FLAG:-} \
+            -e BUILD_ENVIRONMENT \
+            -e PR_NUMBER \
+            -e GITHUB_ACTIONS \
+            -e GITHUB_REPOSITORY \
+            -e GITHUB_WORKFLOW \
+            -e GITHUB_JOB \
+            -e GITHUB_RUN_ID \
+            -e GITHUB_RUN_NUMBER \
+            -e GITHUB_RUN_ATTEMPT \
+            -e JOB_ID \
+            -e BRANCH \
+            -e SHA1 \
+            -e AWS_DEFAULT_REGION \
+            -e IN_WHEEL_TEST \
+            -e SHARD_NUMBER \
+            -e TEST_CONFIG \
+            -e NUM_TEST_SHARDS \
+            -e REENABLED_ISSUES \
+            -e PYTORCH_RETRY_TEST_CASES \
+            -e PYTORCH_OVERRIDE_FLAKY_SIGNAL \
+            -e CONTINUE_THROUGH_ERROR \
+            -e MAX_JOBS="$(nproc --ignore=2)" \
+            -e SCCACHE_BUCKET \
+            -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
+            -e PYTORCH_TEST_CUDA_MEM_LEAK_CHECK \
+            -e PYTORCH_TEST_RERUN_DISABLED_TESTS \
+            -e TESTS_TO_INCLUDE \
+            -e ZE_AFFINITY_MASK \
+            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
+            --ulimit stack=10485760:83886080 \
+            --ulimit core=0 \
+            --security-opt seccomp=unconfined \
+            --cap-add=SYS_PTRACE \
+            --shm-size="8g" \
+            --tty \
+            --detach \
+            --name="${container_name}" \
+            --user jenkins \
+            --privileged \
+            -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
+            -w /var/lib/jenkins/workspace \
+            "${DOCKER_IMAGE}"
+          )
+          # save container name for later step
+          echo "CONTAINER_NAME=${container_name}" >> "$GITHUB_ENV"
+          # jenkins user does not have write permission to mounted workspace; work-around by copying within container to jenkins home
+          docker exec -t "${container_name}" sh -c "cd .. && cp -R workspace pytorch && cd pytorch && pip install dist/*.whl && ${TEST_COMMAND}"
+
+      - name: Save test results
+        if: always()
+        run: |
+          # copy test results back to the mounted workspace, needed sudo, resulting permissions were correct
+          docker exec -t "${{ env.CONTAINER_NAME }}" sh -c "cd ../pytorch && sudo cp -R test/test-reports ../workspace/test"
+
+      - name: Print remaining test logs
+        shell: bash
+        if: always() && steps.test.conclusion
+        run: |
+          cat test/**/*_toprint.log || true
+
+      - name: Stop monitoring script
+        if: always() && steps.monitor-script.outputs.monitor-script-pid
+        shell: bash
+        continue-on-error: true
+        env:
+          MONITOR_SCRIPT_PID: ${{ steps.monitor-script.outputs.monitor-script-pid }}
+        run: |
+          kill "$MONITOR_SCRIPT_PID"
+
+      - name: Upload test artifacts
+        uses: ./.github/actions/upload-test-artifacts
+        if: always() && steps.test.conclusion && steps.test.conclusion != 'skipped'
+        with:
+          use-gha: true
+          file-suffix: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }}
+
+      - name: Collect backtraces from coredumps (if any)
+        if: always()
+        run: |
+          # shellcheck disable=SC2156
+          find . -iname "core.[1-9]*" -exec docker exec "${CONTAINER_NAME}" sh -c "gdb python {} -ex 'bt' -ex 'q'" \;
+
+      - name: Stop container before exit
+        if: always()
+        run: |
+          # Workaround for multiple runners on same IDC node
+          docker stop "${{ env.CONTAINER_NAME }}"
+
+      - name: Store Core dumps on GitHub
+        uses: actions/upload-artifact@v3
+        if: failure()
+        with:
+          name: coredumps-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}
+          retention-days: 14
+          if-no-files-found: ignore
+          path: ./**/core.[1-9]*
+
+      - name: Teardown XPU
+        uses: ./.github/actions/teardown-xpu
--- a/.github/workflows/build-triton-wheel.yml
+++ b/.github/workflows/build-triton-wheel.yml
@ -182,7 +182,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        py_vers: [ "3.8", "3.9", "3.10", "3.11" ]
+        py_vers: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
    timeout-minutes: 40
    env:
      DOCKER_IMAGE: pytorch/conda-builder:cpu
--- a/.github/workflows/check_mergeability_ghstack.yml
+++ b/.github/workflows/check_mergeability_ghstack.yml
@ -0,0 +1,30 @@
+name: Check mergeability and dependencies for ghstack prs
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, edited]
+
+jobs:
+  check-regex:
+    runs-on: ubuntu-latest
+    outputs:
+      regex-match: ${{ steps.regex-match.outputs.match }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - id: regex-match
+        uses: actions-ecosystem/action-regex-match@d50fd2e7a37d0e617aea3d7ada663bd56862b9cc
+        with:
+          text: ${{ github.head_ref }}
+          regex: '^(gh/[^/]+/[0-9]+/)head$'
+
+  pr-dependencies-check:
+    needs: check-regex
+    if: ${{ needs.check-regex.outputs.regex-match != '' }}
+    uses: pytorch/test-infra/.github/workflows/pr-dependencies-check.yml@main
+    with:
+      pr_number: ${{ github.event.pull_request.number }}
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
--- a/.github/workflows/docker-builds.yml
+++ b/.github/workflows/docker-builds.yml
@ -47,6 +47,7 @@ jobs:
          - docker-image-name: pytorch-linux-focal-py3-clang9-android-ndk-r21e
          - docker-image-name: pytorch-linux-jammy-py3.8-gcc11
          - docker-image-name: pytorch-linux-jammy-py3.8-gcc11-inductor-benchmarks
+          - docker-image-name: pytorch-linux-jammy-xpu-2024.0-py3
          - docker-image-name: pytorch-linux-jammy-py3-clang15-asan
          - docker-image-name: pytorch-linux-focal-py3-clang10-onnx
          - docker-image-name: pytorch-linux-focal-linter
--- a/.github/workflows/docker-release.yml
+++ b/.github/workflows/docker-release.yml
@ -26,25 +26,42 @@ env:
  DOCKER_REGISTRY: ghcr.io
  NO_BUILD_SUFFIX: true
  USE_BUILDX: 1
-  WITH_PUSH: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/'))) }}
+  WITH_PUSH: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || startsWith(github.event.ref, 'refs/tags/v')) }}

 jobs:
+  generate-matrix:
+    if: github.repository_owner == 'pytorch'
+    runs-on: [self-hosted, linux.large]
+    outputs:
+      matrix: ${{ steps.generate-matrix.outputs.matrix }}
+    steps:
+      - name: Checkout PyTorch
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        with:
+          fetch-depth: 1
+          submodules: true
+      - name: Get docker release matrix
+        id: generate-matrix
+        run: |
+          MATRIX_BLOB="$(python3 .github/scripts/generate_docker_release_matrix.py)"
+          echo "${MATRIX_BLOB}"
+          echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}"
+
  build:
    if: ${{ github.repository == 'pytorch/pytorch' }}
    runs-on: [self-hosted, linux.2xlarge]
-    environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
+    environment: ${{ (github.ref == 'refs/heads/nightly' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
    timeout-minutes: 240
+    needs: generate-matrix
    strategy:
-      matrix:
-        include:
-          # nvidia specific images don't exist for arm64 so only build the runtime image
-          - image_type: runtime
-            platform: linux/arm64,linux/amd64
-          - image_type: devel
-            platform: linux/amd64
+      matrix: ${{ fromJson(needs.generate-matrix.outputs.matrix) }}
+      fail-fast: false
    env:
      BUILD_IMAGE_TYPE: ${{ matrix.image_type }}
      BUILD_PLATFORMS: ${{ matrix.platform }}
+      CUDA_VERSION: ${{ matrix.cuda_full_version }}
+      CUDA_VERSION_SHORT: ${{ matrix.cuda }}
+      CUDNN_VERSION: ${{ matrix.cudnn_version }}
    steps:
      - name: Setup SSH (Click me for login details)
        uses: pytorch/test-infra/.github/actions/setup-ssh@main
@ -97,10 +114,11 @@ jobs:
      - name: Push nightly tags
        if: ${{ github.event.ref == 'refs/heads/nightly' && matrix.image_type == 'runtime' }}
        run: |
-          PYTORCH_DOCKER_TAG="${PYTORCH_VERSION}-runtime"
-          CUDA_VERSION=$(python3 -c "import re;print(re.search('CUDA_VERSION\s+=\s+([0-9\.]+)',open('docker.Makefile').read())[1],end='')")
+          PYTORCH_DOCKER_TAG="${PYTORCH_VERSION}-cuda${CUDA_VERSION_SHORT}-cudnn${CUDNN_VERSION}-runtime"
+
          PYTORCH_NIGHTLY_COMMIT=$(docker run ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_DOCKER_TAG}" \
                                          python -c 'import torch; print(torch.version.git_version[:7],end="")')
+
          docker tag ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_DOCKER_TAG}" \
                 ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}-cu${CUDA_VERSION}"
          docker push ghcr.io/pytorch/pytorch-nightly:"${PYTORCH_NIGHTLY_COMMIT}-cu${CUDA_VERSION}"
--- a/.github/workflows/inductor.yml
+++ b/.github/workflows/inductor.yml
@ -4,6 +4,7 @@ on:
  push:
    branches:
      - main
+      - release/*
    tags:
      - ciflow/inductor/*
  workflow_dispatch:
@ -13,6 +14,26 @@ concurrency:
  cancel-in-progress: true

 jobs:
+  linux-focal-rocm5_7-py3_8-inductor-build:
+    name: rocm5.7-py3.8-inductor
+    uses: ./.github/workflows/_linux-build.yml
+    with:
+      build-environment: linux-focal-rocm5.7-py3.8
+      docker-image-name: pytorch-linux-focal-rocm-n-py3
+      test-matrix: |
+        { include: [
+          { config: "inductor", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.2" },
+        ]}
+
+  linux-focal-rocm5_7-py3_8-inductor-test:
+    name: rocm5.7-py3.8-inductor
+    uses: ./.github/workflows/_rocm-test.yml
+    needs: linux-focal-rocm5_7-py3_8-inductor-build
+    with:
+      build-environment: linux-focal-rocm5.7-py3.8
+      docker-image: ${{ needs.linux-focal-rocm5_7-py3_8-inductor-build.outputs.docker-image }}
+      test-matrix:  ${{ needs.linux-focal-rocm5_7-py3_8-inductor-build.outputs.test-matrix }}
+
  linux-focal-cuda12_1-py3_10-gcc9-inductor-build:
    name: cuda12.1-py3.10-gcc9-sm86
    uses: ./.github/workflows/_linux-build.yml
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@ -228,8 +228,8 @@ jobs:
          pip install torch --pre --index-url https://download.pytorch.org/whl/nightly/cpu/
      - name: Run run_test.py (nonretryable)
        run: |
-          # Run test_weak, which is very fast
-          python3 test/run_test.py --include test_weak --verbose
+          # Run test_vulkan, which is a fast noop on Linux
+          python3 test/run_test.py --include test_vulkan --verbose

  test_collect_env:
    if: ${{ github.repository == 'pytorch/pytorch' }}
--- a/.github/workflows/mac-mps.yml
+++ b/.github/workflows/mac-mps.yml
@ -28,8 +28,7 @@ jobs:
      test-matrix: |
        { include: [
          { config: "mps", shard: 1, num_shards: 1, runner: "macos-m1-12" },
-          # TODO: Revert me when those runners are back online
-          # { config: "mps", shard: 1, num_shards: 1, runner: "macos-m1-13" },
+          { config: "mps", shard: 1, num_shards: 1, runner: "macos-m2-14" },
        ]}

  macos-12-py3-arm64-mps-test:
--- a/.github/workflows/periodic.yml
+++ b/.github/workflows/periodic.yml
@ -12,6 +12,8 @@ on:
  push:
    tags:
      - ciflow/periodic/*
+    branches:
+      - release/*
  workflow_dispatch:

 concurrency:
@ -156,34 +158,6 @@ jobs:
          { config: "default", shard: 1, num_shards: 1, runner: "ubuntu-latest" },
        ]}

-  macos-12-py3-x86-64-build:
-    name: macos-12-py3-x86-64
-    if: github.event_name != 'schedule' || github.event.schedule == '45 4,12,20 * * 1-5' || github.event.schedule == '45 12 * * 0,6' || github.event.schedule == '29 8 * * *'
-
-    uses: ./.github/workflows/_mac-build.yml
-    with:
-      build-environment: macos-12-py3-x86-64
-      xcode-version: "13.3.1"
-      runner-type: macos-12-xl
-      build-generates-artifacts: true
-      sccache-use-gha: true
-      test-matrix: |
-        { include: [
-          { config: "default", shard: 1, num_shards: 4, runner: "macos-12" },
-          { config: "default", shard: 2, num_shards: 4, runner: "macos-12" },
-          { config: "default", shard: 3, num_shards: 4, runner: "macos-12" },
-          { config: "default", shard: 4, num_shards: 4, runner: "macos-12" },
-        ]}
-
-  macos-12-py3-x86-64-test:
-    name: macos-12-py3-x86-64
-    uses: ./.github/workflows/_mac-test.yml
-    needs: macos-12-py3-x86-64-build
-    with:
-      build-environment: macos-12-py3-x86-64
-      test-matrix: ${{ needs.macos-12-py3-x86-64-build.outputs.test-matrix }}
-      arch: x86_64
-
  android-emulator-build-test:
    name: android-emulator-build-test
    uses: ./.github/workflows/_run_android_tests.yml
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@ -136,8 +136,13 @@ jobs:
          { config: "default", shard: 3, num_shards: 3, runner: "linux.2xlarge" },
          { config: "crossref", shard: 1, num_shards: 2, runner: "linux.2xlarge" },
          { config: "crossref", shard: 2, num_shards: 2, runner: "linux.2xlarge" },
-          { config: "dynamo", shard: 1, num_shards: 2, runner: "linux.2xlarge" },
-          { config: "dynamo", shard: 2, num_shards: 2, runner: "linux.2xlarge" },
+          { config: "dynamo", shard: 1, num_shards: 7, runner: "linux.2xlarge" },
+          { config: "dynamo", shard: 2, num_shards: 7, runner: "linux.2xlarge" },
+          { config: "dynamo", shard: 3, num_shards: 7, runner: "linux.2xlarge" },
+          { config: "dynamo", shard: 4, num_shards: 7, runner: "linux.2xlarge" },
+          { config: "dynamo", shard: 5, num_shards: 7, runner: "linux.2xlarge" },
+          { config: "dynamo", shard: 6, num_shards: 7, runner: "linux.2xlarge" },
+          { config: "dynamo", shard: 7, num_shards: 7, runner: "linux.2xlarge" },
        ]}

  linux-focal-py3_8-clang10-test:
@ -162,8 +167,13 @@ jobs:
          { config: "default", shard: 3, num_shards: 3, runner: "linux.2xlarge" },
          { config: "crossref", shard: 1, num_shards: 2, runner: "linux.2xlarge" },
          { config: "crossref", shard: 2, num_shards: 2, runner: "linux.2xlarge" },
-          { config: "dynamo", shard: 1, num_shards: 2, runner: "linux.2xlarge" },
-          { config: "dynamo", shard: 2, num_shards: 2, runner: "linux.2xlarge" },
+          { config: "dynamo", shard: 1, num_shards: 7, runner: "linux.2xlarge" },
+          { config: "dynamo", shard: 2, num_shards: 7, runner: "linux.2xlarge" },
+          { config: "dynamo", shard: 3, num_shards: 7, runner: "linux.2xlarge" },
+          { config: "dynamo", shard: 4, num_shards: 7, runner: "linux.2xlarge" },
+          { config: "dynamo", shard: 5, num_shards: 7, runner: "linux.2xlarge" },
+          { config: "dynamo", shard: 6, num_shards: 7, runner: "linux.2xlarge" },
+          { config: "dynamo", shard: 7, num_shards: 7, runner: "linux.2xlarge" },
        ]}

  linux-focal-py3_11-clang10-test:
--- a/.github/workflows/rocm.yml
+++ b/.github/workflows/rocm.yml
@ -25,9 +25,12 @@ jobs:
      sync-tag: rocm-build
      test-matrix: |
        { include: [
-          { config: "default", shard: 1, num_shards: 3, runner: "linux.rocm.gpu" },
-          { config: "default", shard: 2, num_shards: 3, runner: "linux.rocm.gpu" },
-          { config: "default", shard: 3, num_shards: 3, runner: "linux.rocm.gpu" },
+          { config: "default", shard: 1, num_shards: 6, runner: "linux.rocm.gpu.2" },
+          { config: "default", shard: 2, num_shards: 6, runner: "linux.rocm.gpu.2" },
+          { config: "default", shard: 3, num_shards: 6, runner: "linux.rocm.gpu.2" },
+          { config: "default", shard: 4, num_shards: 6, runner: "linux.rocm.gpu.2" },
+          { config: "default", shard: 5, num_shards: 6, runner: "linux.rocm.gpu.2" },
+          { config: "default", shard: 6, num_shards: 6, runner: "linux.rocm.gpu.2" },
        ]}

  linux-focal-rocm5_7-py3_8-test:
--- a/.github/workflows/slow.yml
+++ b/.github/workflows/slow.yml
@ -10,6 +10,8 @@ on:
  push:
    tags:
      - ciflow/slow/*
+    branches:
+      - release/*
  workflow_dispatch:

 concurrency:
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@ -16,11 +16,12 @@ on:
  schedule:
    # Run hourly.
    - cron: 30 * * * *
+  workflow_dispatch:

 jobs:
  stale:
    if: ${{ github.repository == 'pytorch/pytorch' }}
-    runs-on: ubuntu-latest
+    runs-on: linux.large.arc

    steps:
      - uses: actions/github-script@v6
--- a/.github/workflows/trunk.yml
+++ b/.github/workflows/trunk.yml
@ -195,4 +195,4 @@ jobs:
      build-environment: linux-focal-rocm5.7-py3.8
      docker-image: ${{ needs.linux-focal-rocm5_7-py3_8-build.outputs.docker-image }}
      test-matrix: ${{ needs.linux-focal-rocm5_7-py3_8-build.outputs.test-matrix }}
-      tests-to-include: "test_nn test_torch test_cuda test_ops test_unary_ufuncs test_binary_ufuncs test_autograd"
+      tests-to-include: "test_nn test_torch test_cuda test_ops test_unary_ufuncs test_binary_ufuncs test_autograd inductor/test_torchinductor"
--- a/Show More
+++ b/Show More
 @ -1 +1 @@
 .1.0
 .2.0