Disable AVX512 CPU dispatch by default (#80253 ) (#80356 )

As it can be slower, see https://github.com/pytorch/pytorch/issues/80252 Update trunk test matrix to test AVX512 config in `nogpu_AVX512` flavor. Kill `nogpu_noAVX` as AVX support were replaced with AVX512 when https://github.com/pytorch/pytorch/pull/61903 were landed Pull Request resolved: https://github.com/pytorch/pytorch/pull/80253 Approved by: https://github.com/ngimel (cherry picked from commit 14813536a7120f1104be2270be341b7a383415c5)
[JIT] Imbue stringbuf with C locale (#79929 ) (#79983 )
2025-10-27 09:04:53 +08:00 · 2022-06-27 13:41:56 -04:00 · 2022-06-21 21:35:03 -04:00 · 2022-06-20 20:16:14 -04:00 · 2022-06-20 17:15:16 -04:00 · 2022-06-20 11:27:32 -07:00
15155 changed files with 561598 additions and 1424308 deletions
--- a/.bazelignore
+++ b/.bazelignore
@ -1,3 +0,0 @@
 # We do not use this library in our Bazel build. It contains an
 # infinitely recursing symlink that makes Bazel very unhappy.
 third_party/ittapi/
--- a/.bazelrc
+++ b/.bazelrc
@ -1,4 +1,4 @@
-build --cxxopt=--std=c++17
+build --cxxopt=--std=c++14
 build --copt=-I.
 # Bazel does not support including its cc_library targets as system
 # headers. We work around this for generated code
@ -13,102 +13,15 @@ build:no-tty --curses no
 build:no-tty --progress_report_interval 10
 build:no-tty --show_progress_rate_limit 10
-# Build with GPU support by default.
+# Configuration to build with GPU support
-build --define=cuda=true
+build:gpu --define=cuda=true
 # rules_cuda configuration
 build --@rules_cuda//cuda:enable_cuda
 build --@rules_cuda//cuda:cuda_targets=sm_52
 build --@rules_cuda//cuda:compiler=nvcc
 build --repo_env=CUDA_PATH=/usr/local/cuda
 # Configuration to build without GPU support
 build:cpu-only --define=cuda=false
 # define a separate build folder for faster switching between configs
-build:cpu-only --platform_suffix=-cpu-only
+build:gpu --platform_suffix=-gpu
 # See the note on the config-less build for details about why we are
-# doing this. We must also do it for the "-cpu-only" platform suffix.
+# doing this. We must also do it for the "-gpu" platform suffix.
-build --copt=-isystem --copt=bazel-out/k8-fastbuild-cpu-only/bin
+build --copt=-isystem --copt=bazel-out/k8-fastbuild-gpu/bin
 # rules_cuda configuration
-build:cpu-only --@rules_cuda//cuda:enable_cuda=False
+build:gpu --@rules_cuda//cuda:enable_cuda
-
+build:gpu --@rules_cuda//cuda:cuda_targets=sm_52
-# Definition of --config=shell
+build:gpu --@rules_cuda//cuda:compiler=nvcc
-# interactive shell immediately before execution
+build:gpu --repo_env=CUDA_PATH=/usr/local/cuda
 build:shell --run_under="//tools/bazel_tools:shellwrap"
 # Disable all warnings for external repositories. We don't care about
 # their warnings.
 build --per_file_copt=^external/@-w
 # Set additional warnings to error level.
 #
 # Implementation notes:
 #  * we use file extensions to determine if we are using the C++
 #    compiler or the cuda compiler
 #  * we use ^// at the start of the regex to only permit matching
 #    PyTorch files. This excludes external repos.
 #
 # Note that because this is logically a command-line flag, it is
 # considered the word on what warnings are enabled. This has the
 # unfortunate consequence of preventing us from disabling an error at
 # the target level because those flags will come before these flags in
 # the action invocation. Instead we provide per-file exceptions after
 # this.
 #
 # On the bright side, this means we don't have to more broadly apply
 # the exceptions to an entire target.
 #
 # Looking for CUDA flags? We have a cu_library macro that we can edit
 # directly. Look in //tools/rules:cu.bzl for details. Editing the
 # macro over this has the following advantages:
 #  * making changes does not require discarding the Bazel analysis
 #    cache
 #  * it allows for selective overrides on individual targets since the
 #    macro-level opts will come earlier than target level overrides
 build --per_file_copt='^//.*\.(cpp|cc)$'@-Werror=all
 # The following warnings come from -Wall. We downgrade them from error
 # to warnings here.
 #
 # We intentionally use #pragma unroll, which is compiler specific.
 build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-error=unknown-pragmas
 build --per_file_copt='^//.*\.(cpp|cc)$'@-Werror=extra
 # The following warnings come from -Wextra. We downgrade them from error
 # to warnings here.
 #
 # unused-parameter-compare has a tremendous amount of violations in the
 # codebase. It will be a lot of work to fix them, just disable it for
 # now.
 build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-unused-parameter
 # missing-field-parameters has both a large number of violations in
 # the codebase, but it also is used pervasively in the Python C
 # API. There are a couple of catches though:
 # * we use multiple versions of the Python API and hence have
 #   potentially multiple different versions of each relevant
 #   struct. They may have different numbers of fields. It will be
 #   unwieldy to support multiple versions in the same source file.
 # * Python itself for many of these structs recommends only
 #   initializing a subset of the fields. We should respect the API
 #   usage conventions of our dependencies.
 #
 # Hence, we just disable this warning altogether. We may want to clean
 # up some of the clear-cut cases that could be risky, but we still
 # likely want to have this disabled for the most part.
 build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-missing-field-initializers
 build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-unused-function
 build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-unused-variable
 build --per_file_copt='//:aten/src/ATen/RegisterCompositeExplicitAutograd\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:aten/src/ATen/RegisterCompositeImplicitAutograd\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:aten/src/ATen/RegisterMkldnnCPU\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:aten/src/ATen/RegisterNestedTensorCPU\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:aten/src/ATen/RegisterQuantizedCPU\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:aten/src/ATen/RegisterSparseCPU\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:aten/src/ATen/RegisterSparseCsrCPU\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:aten/src/ATen/RegisterNestedTensorMeta\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:aten/src/ATen/RegisterSparseMeta\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:aten/src/ATen/RegisterQuantizedMeta\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:aten/src/ATen/RegisterZeroTensor\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:torch/csrc/lazy/generated/RegisterAutogradLazy\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:torch/csrc/lazy/generated/RegisterLazy\.cpp$'@-Wno-error=unused-function
--- a/.bazelversion
+++ b/.bazelversion
@ -1 +1 @@
-6.1.1
+4.2.1
--- a/.buckconfig.oss
+++ b/.buckconfig.oss
@ -1,26 +1,15 @@
 [pt]
  is_oss=1
 [buildfile]
-  name = BUCK.oss
+name = BUILD.buck
  includes = //tools/build_defs/select.bzl
 [repositories]
  bazel_skylib = third_party/bazel-skylib/
  ovr_config = .
 [download]
  in_build = true
 [cxx]
  cxxflags = -std=c++17
  ldflags = -Wl,--no-undefined
  should_remap_host_platform = true
  cpp = /usr/bin/clang
  cc = /usr/bin/clang
  cxx = /usr/bin/clang++
  cxxpp = /usr/bin/clang++
  ld = /usr/bin/clang++
 [project]
  default_flavors_mode=all
--- a/.ci/caffe2/common.sh
+++ b/.ci/caffe2/common.sh
@ -1,36 +0,0 @@
 set -ex
 LOCAL_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 ROOT_DIR=$(cd "$LOCAL_DIR"/../.. && pwd)
 TEST_DIR="$ROOT_DIR/test"
 gtest_reports_dir="${TEST_DIR}/test-reports/cpp"
 pytest_reports_dir="${TEST_DIR}/test-reports/python"
 # Figure out which Python to use
 PYTHON="$(which python)"
 if [[ "${BUILD_ENVIRONMENT}" =~ py((2|3)\.?[0-9]?\.?[0-9]?) ]]; then
  PYTHON=$(which "python${BASH_REMATCH[1]}")
 fi
 if [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
    # HIP_PLATFORM is auto-detected by hipcc; unset to avoid build errors
    unset HIP_PLATFORM
    if which sccache > /dev/null; then
        # Save sccache logs to file
        sccache --stop-server || true
        rm -f ~/sccache_error.log || true
        SCCACHE_ERROR_LOG=~/sccache_error.log SCCACHE_IDLE_TIMEOUT=0 sccache --start-server
        # Report sccache stats for easier debugging
        sccache --zero-stats
    fi
 fi
 # /usr/local/caffe2 is where the cpp bits are installed to in cmake-only
 # builds. In +python builds the cpp tests are copied to /usr/local/caffe2 so
 # that the test code in .ci/test.sh is the same
 INSTALL_PREFIX="/usr/local/caffe2"
 mkdir -p "$gtest_reports_dir" || true
 mkdir -p "$pytest_reports_dir" || true
 mkdir -p "$INSTALL_PREFIX" || true
--- a/.ci/caffe2/test.sh
+++ b/.ci/caffe2/test.sh
@ -1,172 +0,0 @@
 #!/bin/bash
 # shellcheck source=./common.sh
 source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
 if [[ ${BUILD_ENVIRONMENT} == *onnx* ]]; then
  pip install click mock tabulate networkx==2.0
  pip -q install --user "file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx"
 fi
 # Skip tests in environments where they are not built/applicable
 if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
  echo 'Skipping tests'
  exit 0
 fi
 if [[ "${BUILD_ENVIRONMENT}" == *-rocm* ]]; then
  # temporary to locate some kernel issues on the CI nodes
  export HSAKMT_DEBUG_LEVEL=4
 fi
 # These additional packages are needed for circleci ROCm builds.
 if [[ $BUILD_ENVIRONMENT == *rocm* ]]; then
    # Need networkx 2.0 because bellmand_ford was moved in 2.1 . Scikit-image by
    # defaults installs the most recent networkx version, so we install this lower
    # version explicitly before scikit-image pulls it in as a dependency
    pip install networkx==2.0
    # click - onnx
    pip install --progress-bar off click protobuf tabulate virtualenv mock typing-extensions
 fi
 # Find where cpp tests and Caffe2 itself are installed
 if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
  # For cmake only build we install everything into /usr/local
  cpp_test_dir="$INSTALL_PREFIX/cpp_test"
  ld_library_path="$INSTALL_PREFIX/lib"
 else
  # For Python builds we install into python
  # cd to /usr first so the python import doesn't get confused by any 'caffe2'
  # directory in cwd
  python_installation="$(dirname $(dirname $(cd /usr && $PYTHON -c 'import os; import caffe2; print(os.path.realpath(caffe2.__file__))')))"
  caffe2_pypath="$python_installation/caffe2"
  cpp_test_dir="$python_installation/torch/test"
  ld_library_path="$python_installation/torch/lib"
 fi
 ################################################################################
 # C++ tests #
 ################################################################################
 # Only run cpp tests in the first shard, don't run cpp tests a second time in the second shard
 if [[ "${SHARD_NUMBER:-1}" == "1" ]]; then
  echo "Running C++ tests.."
  for test in $(find "$cpp_test_dir" -executable -type f); do
    case "$test" in
      # skip tests we know are hanging or bad
      */mkl_utils_test|*/aten/integer_divider_test)
        continue
        ;;
      */scalar_tensor_test|*/basic|*/native_test)
        if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
          continue
        else
          LD_LIBRARY_PATH="$ld_library_path" "$test"
        fi
        ;;
      */*_benchmark)
        LD_LIBRARY_PATH="$ld_library_path" "$test" --benchmark_color=false
        ;;
      *)
        # Currently, we use a mixture of gtest (caffe2) and Catch2 (ATen). While
        # planning to migrate to gtest as the common PyTorch c++ test suite, we
        # currently do NOT use the xml test reporter, because Catch doesn't
        # support multiple reporters
        # c.f. https://github.com/catchorg/Catch2/blob/master/docs/release-notes.md#223
        # which means that enabling XML output means you lose useful stdout
        # output for Jenkins.  It's more important to have useful console
        # output than it is to have XML output for Jenkins.
        # Note: in the future, if we want to use xml test reporter once we switch
        # to all gtest, one can simply do:
        LD_LIBRARY_PATH="$ld_library_path" \
            "$test" --gtest_output=xml:"$gtest_reports_dir/$(basename $test).xml"
        ;;
    esac
  done
 fi
 ################################################################################
 # Python tests #
 ################################################################################
 if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
  exit 0
 fi
 # If pip is installed as root, we must use sudo.
 # CircleCI docker images could install conda as jenkins user, or use the OS's python package.
 PIP=$(which pip)
 PIP_USER=$(stat --format '%U' $PIP)
 CURRENT_USER=$(id -u -n)
 if [[ "$PIP_USER" = root && "$CURRENT_USER" != root ]]; then
  MAYBE_SUDO=sudo
 fi
 # Uninstall pre-installed hypothesis and coverage to use an older version as newer
 # versions remove the timeout parameter from settings which ideep/conv_transpose_test.py uses
 $MAYBE_SUDO pip -q uninstall -y hypothesis
 $MAYBE_SUDO pip -q uninstall -y coverage
 # "pip install hypothesis==3.44.6" from official server is unreliable on
 # CircleCI, so we host a copy on S3 instead
 $MAYBE_SUDO pip -q install attrs==18.1.0 -f https://s3.amazonaws.com/ossci-linux/wheels/attrs-18.1.0-py2.py3-none-any.whl
 $MAYBE_SUDO pip -q install coverage==4.5.1 -f https://s3.amazonaws.com/ossci-linux/wheels/coverage-4.5.1-cp36-cp36m-macosx_10_12_x86_64.whl
 $MAYBE_SUDO pip -q install hypothesis==3.44.6 -f https://s3.amazonaws.com/ossci-linux/wheels/hypothesis-3.44.6-py3-none-any.whl
 # Collect additional tests to run (outside caffe2/python)
 EXTRA_TESTS=()
 # CUDA builds always include NCCL support
 if [[ "$BUILD_ENVIRONMENT" == *-cuda* ]] || [[ "$BUILD_ENVIRONMENT" == *-rocm* ]]; then
  EXTRA_TESTS+=("$caffe2_pypath/contrib/nccl")
 fi
 rocm_ignore_test=()
 if [[ $BUILD_ENVIRONMENT == *-rocm* ]]; then
  # Currently these tests are failing on ROCM platform:
  # On ROCm, RCCL (distributed) development isn't complete.
  # https://github.com/ROCmSoftwarePlatform/rccl
  rocm_ignore_test+=("--ignore $caffe2_pypath/python/data_parallel_model_test.py")
  # This test has been flaky in ROCm CI (but note the tests are
  # cpu-only so should be unrelated to ROCm)
  rocm_ignore_test+=("--ignore $caffe2_pypath/python/operator_test/blobs_queue_db_test.py")
  # This test is skipped on Jenkins(compiled without MKL) and otherwise known flaky
  rocm_ignore_test+=("--ignore $caffe2_pypath/python/ideep/convfusion_op_test.py")
  # This test is skipped on Jenkins(compiled without MKL) and causing segfault on Circle
  rocm_ignore_test+=("--ignore $caffe2_pypath/python/ideep/pool_op_test.py")
 fi
 echo "Running Python tests.."
 # locale setting is required by click package
 for loc in "en_US.utf8" "C.UTF-8"; do
  if locale -a | grep "$loc" >/dev/null 2>&1; then
    export LC_ALL="$loc"
    export LANG="$loc"
    break;
  fi
 done
 # Some Caffe2 tests fail when run using AVX512 ISA, see https://github.com/pytorch/pytorch/issues/66111
 export DNNL_MAX_CPU_ISA=AVX2
 # Should still run even in the absence of SHARD_NUMBER
 if [[ "${SHARD_NUMBER:-1}" == "1" ]]; then
  # TODO(sdym@meta.com) remove this when the linked issue resolved.
  # py is temporary until https://github.com/Teemu/pytest-sugar/issues/241 is fixed
  pip install --user py==1.11.0
  pip install --user pytest-sugar
  # NB: Warnings are disabled because they make it harder to see what
  # the actual erroring test is
  "$PYTHON" \
    -m pytest \
    -x \
    -v \
    --disable-warnings \
    --junit-xml="$pytest_reports_dir/result.xml" \
    --ignore "$caffe2_pypath/python/test/executor_test.py" \
    --ignore "$caffe2_pypath/python/operator_test/matmul_op_test.py" \
    --ignore "$caffe2_pypath/python/operator_test/pack_ops_test.py" \
    --ignore "$caffe2_pypath/python/mkl/mkl_sbn_speed_test.py" \
    --ignore "$caffe2_pypath/python/trt/test_pt_onnx_trt.py" \
    ${rocm_ignore_test[@]} \
    "$caffe2_pypath/python" \
    "${EXTRA_TESTS[@]}"
 fi
--- a/.ci/docker/README.md
+++ b/.ci/docker/README.md
@ -1,32 +0,0 @@
 # Docker images for GitHub CI
 This directory contains everything needed to build the Docker images
 that are used in our CI.
 The Dockerfiles located in subdirectories are parameterized to
 conditionally run build stages depending on build arguments passed to
 `docker build`. This lets us use only a few Dockerfiles for many
 images. The different configurations are identified by a freeform
 string that we call a _build environment_. This string is persisted in
 each image as the `BUILD_ENVIRONMENT` environment variable.
 See `build.sh` for valid build environments (it's the giant switch).
 ## Contents
 * `build.sh` -- dispatch script to launch all builds
 * `common` -- scripts used to execute individual Docker build stages
 * `ubuntu` -- Dockerfile for Ubuntu image for CPU build and test jobs
 * `ubuntu-cuda` -- Dockerfile for Ubuntu image with CUDA support for nvidia-docker
 * `ubuntu-rocm` -- Dockerfile for Ubuntu image with ROCm support
 * `ubuntu-xpu` -- Dockerfile for Ubuntu image with XPU support
 ## Usage
 ```bash
 # Build a specific image
 ./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
 # Set flags (see build.sh) and build image
 sudo bash -c 'PROTOBUF=1 ./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
 ```
--- a/.ci/docker/build.sh
+++ b/.ci/docker/build.sh
@ -1,462 +0,0 @@
 #!/bin/bash
 set -ex
 image="$1"
 shift
 if [ -z "${image}" ]; then
  echo "Usage: $0 IMAGE"
  exit 1
 fi
 function extract_version_from_image_name() {
  eval export $2=$(echo "${image}" | perl -n -e"/$1(\d+(\.\d+)?(\.\d+)?)/ && print \$1")
  if [ "x${!2}" = x ]; then
    echo "variable '$2' not correctly parsed from image='$image'"
    exit 1
  fi
 }
 function extract_all_from_image_name() {
  # parts $image into array, splitting on '-'
  keep_IFS="$IFS"
  IFS="-"
  declare -a parts=($image)
  IFS="$keep_IFS"
  unset keep_IFS
  for part in "${parts[@]}"; do
    name=$(echo "${part}" | perl -n -e"/([a-zA-Z]+)\d+(\.\d+)?(\.\d+)?/ && print \$1")
    vername="${name^^}_VERSION"
    # "py" is the odd one out, needs this special case
    if [ "x${name}" = xpy ]; then
      vername=ANACONDA_PYTHON_VERSION
    fi
    # skip non-conforming fields such as "pytorch", "linux" or "bionic" without version string
    if [ -n "${name}" ]; then
      extract_version_from_image_name "${name}" "${vername}"
    fi
  done
 }
 # Use the same pre-built XLA test image from PyTorch/XLA
 if [[ "$image" == *xla* ]]; then
  echo "Using pre-built XLA test image..."
  exit 0
 fi
 if [[ "$image" == *-focal* ]]; then
  UBUNTU_VERSION=20.04
 elif [[ "$image" == *-jammy* ]]; then
  UBUNTU_VERSION=22.04
 elif [[ "$image" == *ubuntu* ]]; then
  extract_version_from_image_name ubuntu UBUNTU_VERSION
 elif [[ "$image" == *centos* ]]; then
  extract_version_from_image_name centos CENTOS_VERSION
 fi
 if [ -n "${UBUNTU_VERSION}" ]; then
  OS="ubuntu"
 elif [ -n "${CENTOS_VERSION}" ]; then
  OS="centos"
 else
  echo "Unable to derive operating system base..."
  exit 1
 fi
 DOCKERFILE="${OS}/Dockerfile"
 # When using ubuntu - 22.04, start from Ubuntu docker image, instead of nvidia/cuda docker image.
 if [[ "$image" == *cuda* && "$UBUNTU_VERSION" != "22.04" ]]; then
  DOCKERFILE="${OS}-cuda/Dockerfile"
 elif [[ "$image" == *rocm* ]]; then
  DOCKERFILE="${OS}-rocm/Dockerfile"
 elif [[ "$image" == *xpu* ]]; then
  DOCKERFILE="${OS}-xpu/Dockerfile"
 elif [[ "$image" == *cuda*linter* ]]; then
  # Use a separate Dockerfile for linter to keep a small image size
  DOCKERFILE="linter-cuda/Dockerfile"
 elif [[ "$image" == *linter* ]]; then
  # Use a separate Dockerfile for linter to keep a small image size
  DOCKERFILE="linter/Dockerfile"
 fi
 # CMake 3.18 is needed to support CUDA17 language variant
 CMAKE_VERSION=3.18.5
 _UCX_COMMIT=00bcc6bb18fc282eb160623b4c0d300147f579af
 _UCC_COMMIT=7cb07a76ccedad7e56ceb136b865eb9319c258ea
 # It's annoying to rename jobs every time you want to rewrite a
 # configuration, so we hardcode everything here rather than do it
 # from scratch
 case "$image" in
  pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9)
    CUDA_VERSION=12.1.1
    CUDNN_VERSION=8
    ANACONDA_PYTHON_VERSION=3.10
    GCC_VERSION=9
    PROTOBUF=yes
    DB=yes
    VISION=yes
    KATEX=yes
    UCX_COMMIT=${_UCX_COMMIT}
    UCC_COMMIT=${_UCC_COMMIT}
    CONDA_CMAKE=yes
    TRITON=yes
    ;;
  pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9-inductor-benchmarks)
    CUDA_VERSION=12.1.1
    CUDNN_VERSION=8
    ANACONDA_PYTHON_VERSION=3.10
    GCC_VERSION=9
    PROTOBUF=yes
    DB=yes
    VISION=yes
    KATEX=yes
    UCX_COMMIT=${_UCX_COMMIT}
    UCC_COMMIT=${_UCC_COMMIT}
    CONDA_CMAKE=yes
    TRITON=yes
    INDUCTOR_BENCHMARKS=yes
    ;;
  pytorch-linux-focal-cuda11.8-cudnn8-py3-gcc9)
    CUDA_VERSION=11.8.0
    CUDNN_VERSION=8
    ANACONDA_PYTHON_VERSION=3.10
    GCC_VERSION=9
    PROTOBUF=yes
    DB=yes
    VISION=yes
    KATEX=yes
    UCX_COMMIT=${_UCX_COMMIT}
    UCC_COMMIT=${_UCC_COMMIT}
    CONDA_CMAKE=yes
    TRITON=yes
    ;;
  pytorch-linux-focal-cuda12.1-cudnn8-py3-gcc9)
    CUDA_VERSION=12.1.1
    CUDNN_VERSION=8
    ANACONDA_PYTHON_VERSION=3.10
    GCC_VERSION=9
    PROTOBUF=yes
    DB=yes
    VISION=yes
    KATEX=yes
    UCX_COMMIT=${_UCX_COMMIT}
    UCC_COMMIT=${_UCC_COMMIT}
    CONDA_CMAKE=yes
    TRITON=yes
    ;;
  pytorch-linux-focal-py3-clang10-onnx)
    ANACONDA_PYTHON_VERSION=3.8
    CLANG_VERSION=10
    PROTOBUF=yes
    DB=yes
    VISION=yes
    CONDA_CMAKE=yes
    ONNX=yes
    ;;
  pytorch-linux-focal-py3-clang9-android-ndk-r21e)
    ANACONDA_PYTHON_VERSION=3.8
    CLANG_VERSION=9
    LLVMDEV=yes
    PROTOBUF=yes
    ANDROID=yes
    ANDROID_NDK_VERSION=r21e
    GRADLE_VERSION=6.8.3
    NINJA_VERSION=1.9.0
    ;;
  pytorch-linux-focal-py3.8-clang10)
    ANACONDA_PYTHON_VERSION=3.8
    CLANG_VERSION=10
    PROTOBUF=yes
    DB=yes
    VISION=yes
    VULKAN_SDK_VERSION=1.2.162.1
    SWIFTSHADER=yes
    CONDA_CMAKE=yes
    TRITON=yes
    ;;
  pytorch-linux-focal-py3.11-clang10)
    ANACONDA_PYTHON_VERSION=3.11
    CLANG_VERSION=10
    PROTOBUF=yes
    DB=yes
    VISION=yes
    VULKAN_SDK_VERSION=1.2.162.1
    SWIFTSHADER=yes
    CONDA_CMAKE=yes
    TRITON=yes
    ;;
  pytorch-linux-focal-py3.8-gcc9)
    ANACONDA_PYTHON_VERSION=3.8
    GCC_VERSION=9
    PROTOBUF=yes
    DB=yes
    VISION=yes
    CONDA_CMAKE=yes
    TRITON=yes
    ;;
  pytorch-linux-focal-rocm-n-1-py3)
    ANACONDA_PYTHON_VERSION=3.8
    GCC_VERSION=9
    PROTOBUF=yes
    DB=yes
    VISION=yes
    ROCM_VERSION=5.7
    NINJA_VERSION=1.9.0
    CONDA_CMAKE=yes
    TRITON=yes
    ;;
  pytorch-linux-focal-rocm-n-py3)
    ANACONDA_PYTHON_VERSION=3.8
    GCC_VERSION=9
    PROTOBUF=yes
    DB=yes
    VISION=yes
    ROCM_VERSION=6.0
    NINJA_VERSION=1.9.0
    CONDA_CMAKE=yes
    TRITON=yes
    ;;
  pytorch-linux-jammy-xpu-2024.0-py3)
    ANACONDA_PYTHON_VERSION=3.8
    GCC_VERSION=11
    PROTOBUF=yes
    DB=yes
    VISION=yes
    BASEKIT_VERSION=2024.0.0-49522
    NINJA_VERSION=1.9.0
    CONDA_CMAKE=yes
    ;;
    pytorch-linux-jammy-py3.8-gcc11-inductor-benchmarks)
    ANACONDA_PYTHON_VERSION=3.8
    GCC_VERSION=11
    PROTOBUF=yes
    DB=yes
    VISION=yes
    KATEX=yes
    CONDA_CMAKE=yes
    TRITON=yes
    DOCS=yes
    INDUCTOR_BENCHMARKS=yes
    ;;
  pytorch-linux-jammy-cuda11.8-cudnn8-py3.8-clang12)
    ANACONDA_PYTHON_VERSION=3.8
    CUDA_VERSION=11.8
    CUDNN_VERSION=8
    CLANG_VERSION=12
    PROTOBUF=yes
    DB=yes
    VISION=yes
    TRITON=yes
    ;;
  pytorch-linux-jammy-py3-clang12-asan)
    ANACONDA_PYTHON_VERSION=3.9
    CLANG_VERSION=12
    PROTOBUF=yes
    DB=yes
    VISION=yes
    CONDA_CMAKE=yes
    TRITON=yes
    ;;
  pytorch-linux-jammy-py3-clang15-asan)
    ANACONDA_PYTHON_VERSION=3.10
    CLANG_VERSION=15
    CONDA_CMAKE=yes
    VISION=yes
    ;;
  pytorch-linux-jammy-py3.8-gcc11)
    ANACONDA_PYTHON_VERSION=3.8
    GCC_VERSION=11
    PROTOBUF=yes
    DB=yes
    VISION=yes
    KATEX=yes
    CONDA_CMAKE=yes
    TRITON=yes
    DOCS=yes
    UNINSTALL_DILL=yes
    ;;
  pytorch-linux-jammy-py3-clang12-executorch)
    ANACONDA_PYTHON_VERSION=3.10
    CLANG_VERSION=12
    CONDA_CMAKE=yes
    EXECUTORCH=yes
    ;;
  pytorch-linux-focal-linter)
    # TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
    # We will need to update mypy version eventually, but that's for another day. The task
    # would be to upgrade mypy to 1.0.0 with Python 3.11
    ANACONDA_PYTHON_VERSION=3.9
    CONDA_CMAKE=yes
    ;;
  pytorch-linux-jammy-cuda11.8-cudnn8-py3.9-linter)
    ANACONDA_PYTHON_VERSION=3.9
    CUDA_VERSION=11.8
    CONDA_CMAKE=yes
    ;;
  pytorch-linux-jammy-aarch64-py3.10-gcc11)
    ANACONDA_PYTHON_VERSION=3.10
    GCC_VERSION=11
    ACL=yes
    PROTOBUF=yes
    DB=yes
    VISION=yes
    CONDA_CMAKE=yes
    ;;
  *)
    # Catch-all for builds that are not hardcoded.
    PROTOBUF=yes
    DB=yes
    VISION=yes
    echo "image '$image' did not match an existing build configuration"
    if [[ "$image" == *py* ]]; then
      extract_version_from_image_name py ANACONDA_PYTHON_VERSION
    fi
    if [[ "$image" == *cuda* ]]; then
      extract_version_from_image_name cuda CUDA_VERSION
      extract_version_from_image_name cudnn CUDNN_VERSION
    fi
    if [[ "$image" == *rocm* ]]; then
      extract_version_from_image_name rocm ROCM_VERSION
      NINJA_VERSION=1.9.0
      TRITON=yes
      # To ensure that any ROCm config will build using conda cmake
      # and thus have LAPACK/MKL enabled
      CONDA_CMAKE=yes
    fi
    if [[ "$image" == *centos7* ]]; then
      NINJA_VERSION=1.10.2
    fi
    if [[ "$image" == *gcc* ]]; then
      extract_version_from_image_name gcc GCC_VERSION
    fi
    if [[ "$image" == *clang* ]]; then
      extract_version_from_image_name clang CLANG_VERSION
    fi
    if [[ "$image" == *devtoolset* ]]; then
      extract_version_from_image_name devtoolset DEVTOOLSET_VERSION
    fi
    if [[ "$image" == *glibc* ]]; then
      extract_version_from_image_name glibc GLIBC_VERSION
    fi
    if [[ "$image" == *cmake* ]]; then
      extract_version_from_image_name cmake CMAKE_VERSION
    fi
  ;;
 esac
 tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
 #when using cudnn version 8 install it separately from cuda
 if [[ "$image" == *cuda*  && ${OS} == "ubuntu" ]]; then
  IMAGE_NAME="nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu${UBUNTU_VERSION}"
  if [[ ${CUDNN_VERSION} == 8 ]]; then
    IMAGE_NAME="nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}"
  fi
 fi
 # Build image
 DOCKER_BUILDKIT=1 docker build \
       --no-cache \
       --progress=plain \
       --build-arg "BUILD_ENVIRONMENT=${image}" \
       --build-arg "PROTOBUF=${PROTOBUF:-}" \
       --build-arg "LLVMDEV=${LLVMDEV:-}" \
       --build-arg "DB=${DB:-}" \
       --build-arg "VISION=${VISION:-}" \
       --build-arg "UBUNTU_VERSION=${UBUNTU_VERSION}" \
       --build-arg "CENTOS_VERSION=${CENTOS_VERSION}" \
       --build-arg "DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}" \
       --build-arg "GLIBC_VERSION=${GLIBC_VERSION}" \
       --build-arg "CLANG_VERSION=${CLANG_VERSION}" \
       --build-arg "ANACONDA_PYTHON_VERSION=${ANACONDA_PYTHON_VERSION}" \
       --build-arg "GCC_VERSION=${GCC_VERSION}" \
       --build-arg "CUDA_VERSION=${CUDA_VERSION}" \
       --build-arg "CUDNN_VERSION=${CUDNN_VERSION}" \
       --build-arg "TENSORRT_VERSION=${TENSORRT_VERSION}" \
       --build-arg "ANDROID=${ANDROID}" \
       --build-arg "ANDROID_NDK=${ANDROID_NDK_VERSION}" \
       --build-arg "GRADLE_VERSION=${GRADLE_VERSION}" \
       --build-arg "VULKAN_SDK_VERSION=${VULKAN_SDK_VERSION}" \
       --build-arg "SWIFTSHADER=${SWIFTSHADER}" \
       --build-arg "CMAKE_VERSION=${CMAKE_VERSION:-}" \
       --build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
       --build-arg "KATEX=${KATEX:-}" \
       --build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
       --build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx906;gfx90a}" \
       --build-arg "IMAGE_NAME=${IMAGE_NAME}" \
       --build-arg "UCX_COMMIT=${UCX_COMMIT}" \
       --build-arg "UCC_COMMIT=${UCC_COMMIT}" \
       --build-arg "CONDA_CMAKE=${CONDA_CMAKE}" \
       --build-arg "TRITON=${TRITON}" \
       --build-arg "ONNX=${ONNX}" \
       --build-arg "DOCS=${DOCS}" \
       --build-arg "INDUCTOR_BENCHMARKS=${INDUCTOR_BENCHMARKS}" \
       --build-arg "EXECUTORCH=${EXECUTORCH}" \
       --build-arg "BASEKIT_VERSION=${BASEKIT_VERSION}" \
       --build-arg "ACL=${ACL:-}" \
       -f $(dirname ${DOCKERFILE})/Dockerfile \
       -t "$tmp_tag" \
       "$@" \
       .
 # NVIDIA dockers for RC releases use tag names like `11.0-cudnn8-devel-ubuntu18.04-rc`,
 # for this case we will set UBUNTU_VERSION to `18.04-rc` so that the Dockerfile could
 # find the correct image. As a result, here we have to replace the
 #   "$UBUNTU_VERSION" == "18.04-rc"
 # with
 #   "$UBUNTU_VERSION" == "18.04"
 UBUNTU_VERSION=$(echo ${UBUNTU_VERSION} | sed 's/-rc$//')
 function drun() {
  docker run --rm "$tmp_tag" $*
 }
 if [[ "$OS" == "ubuntu" ]]; then
  if !(drun lsb_release -a 2>&1 | grep -qF Ubuntu); then
    echo "OS=ubuntu, but:"
    drun lsb_release -a
    exit 1
  fi
  if !(drun lsb_release -a 2>&1 | grep -qF "$UBUNTU_VERSION"); then
    echo "UBUNTU_VERSION=$UBUNTU_VERSION, but:"
    drun lsb_release -a
    exit 1
  fi
 fi
 if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
  if !(drun python --version 2>&1 | grep -qF "Python $ANACONDA_PYTHON_VERSION"); then
    echo "ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION, but:"
    drun python --version
    exit 1
  fi
 fi
 if [ -n "$GCC_VERSION" ]; then
  if !(drun gcc --version 2>&1 | grep -q " $GCC_VERSION\\W"); then
    echo "GCC_VERSION=$GCC_VERSION, but:"
    drun gcc --version
    exit 1
  fi
 fi
 if [ -n "$CLANG_VERSION" ]; then
  if !(drun clang --version 2>&1 | grep -qF "clang version $CLANG_VERSION"); then
    echo "CLANG_VERSION=$CLANG_VERSION, but:"
    drun clang --version
    exit 1
  fi
 fi
 if [ -n "$KATEX" ]; then
  if !(drun katex --version); then
    echo "KATEX=$KATEX, but:"
    drun katex --version
    exit 1
  fi
 fi
--- a/.ci/docker/centos-rocm/Dockerfile
+++ b/.ci/docker/centos-rocm/Dockerfile
@ -1,123 +0,0 @@
 ARG CENTOS_VERSION
 FROM centos:${CENTOS_VERSION}
 ARG CENTOS_VERSION
 # Set AMD gpu targets to build for
 ARG PYTORCH_ROCM_ARCH
 ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
 # Install required packages to build Caffe2
 # Install common dependencies (so that this step can be cached separately)
 COPY ./common/install_base.sh install_base.sh
 RUN bash ./install_base.sh && rm install_base.sh
 # Update CentOS git version
 RUN yum -y remove git
 RUN yum -y remove git-*
 RUN yum -y install https://packages.endpoint.com/rhel/7/os/x86_64/endpoint-repo-1.9-1.x86_64.rpm || \
    (yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo-1.9-1.x86_64.rpm && \
    sed -i "s/packages.endpoint/packages.endpointdev/" /etc/yum.repos.d/endpoint.repo)
 RUN yum install -y git
 # Install devtoolset
 ARG DEVTOOLSET_VERSION
 COPY ./common/install_devtoolset.sh install_devtoolset.sh
 RUN bash ./install_devtoolset.sh && rm install_devtoolset.sh
 ENV BASH_ENV "/etc/profile"
 # (optional) Install non-default glibc version
 ARG GLIBC_VERSION
 COPY ./common/install_glibc.sh install_glibc.sh
 RUN if [ -n "${GLIBC_VERSION}" ]; then bash ./install_glibc.sh; fi
 RUN rm install_glibc.sh
 # Install user
 COPY ./common/install_user.sh install_user.sh
 RUN bash ./install_user.sh && rm install_user.sh
 # Install conda and other packages (e.g., numpy, pytest)
 ARG ANACONDA_PYTHON_VERSION
 ARG CONDA_CMAKE
 ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
 ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
 COPY requirements-ci.txt /opt/conda/requirements-ci.txt
 COPY ./common/install_conda.sh install_conda.sh
 COPY ./common/common_utils.sh common_utils.sh
 RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
 # (optional) Install protobuf for ONNX
 ARG PROTOBUF
 COPY ./common/install_protobuf.sh install_protobuf.sh
 RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
 RUN rm install_protobuf.sh
 ENV INSTALLED_PROTOBUF ${PROTOBUF}
 # (optional) Install database packages like LMDB and LevelDB
 ARG DB
 COPY ./common/install_db.sh install_db.sh
 RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
 RUN rm install_db.sh
 ENV INSTALLED_DB ${DB}
 # (optional) Install vision packages like OpenCV and ffmpeg
 ARG VISION
 COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
 RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
 RUN rm install_vision.sh cache_vision_models.sh common_utils.sh
 ENV INSTALLED_VISION ${VISION}
 # Install rocm
 ARG ROCM_VERSION
 COPY ./common/install_rocm.sh install_rocm.sh
 RUN bash ./install_rocm.sh
 RUN rm install_rocm.sh
 COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
 RUN bash ./install_rocm_magma.sh
 RUN rm install_rocm_magma.sh
 ENV PATH /opt/rocm/bin:$PATH
 ENV PATH /opt/rocm/hcc/bin:$PATH
 ENV PATH /opt/rocm/hip/bin:$PATH
 ENV PATH /opt/rocm/opencl/bin:$PATH
 ENV PATH /opt/rocm/llvm/bin:$PATH
 ENV MAGMA_HOME /opt/rocm/magma
 ENV LANG en_US.utf8
 ENV LC_ALL en_US.utf8
 # (optional) Install non-default CMake version
 ARG CMAKE_VERSION
 COPY ./common/install_cmake.sh install_cmake.sh
 RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
 RUN rm install_cmake.sh
 # (optional) Install non-default Ninja version
 ARG NINJA_VERSION
 COPY ./common/install_ninja.sh install_ninja.sh
 RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
 RUN rm install_ninja.sh
 ARG TRITON
 # Install triton, this needs to be done before sccache because the latter will
 # try to reach out to S3, which docker build runners don't have access
 ENV CMAKE_C_COMPILER cc
 ENV CMAKE_CXX_COMPILER c++
 COPY ./common/install_triton.sh install_triton.sh
 COPY ./common/common_utils.sh common_utils.sh
 COPY ci_commit_pins/triton-rocm.txt triton-rocm.txt
 COPY triton_version.txt triton_version.txt
 RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
 RUN rm install_triton.sh common_utils.sh triton-rocm.txt triton_version.txt
 # Install ccache/sccache (do this last, so we get priority in PATH)
 COPY ./common/install_cache.sh install_cache.sh
 ENV PATH /opt/cache/bin:$PATH
 RUN bash ./install_cache.sh && rm install_cache.sh
 # Include BUILD_ENVIRONMENT environment variable in image
 ARG BUILD_ENVIRONMENT
 ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
 USER jenkins
 CMD ["bash"]
--- a/.ci/docker/ci_commit_pins/executorch.txt
+++ b/.ci/docker/ci_commit_pins/executorch.txt
@ -1 +0,0 @@
 7f96f5a852ba452670255d28d59f1e6398141fbb
--- a/.ci/docker/ci_commit_pins/huggingface.txt
+++ b/.ci/docker/ci_commit_pins/huggingface.txt
@ -1 +0,0 @@
 243e186efbf7fb93328dd6b34927a4e8c8f24395
--- a/.ci/docker/ci_commit_pins/timm.txt
+++ b/.ci/docker/ci_commit_pins/timm.txt
@ -1 +0,0 @@
 730b907b4d45a4713cbc425cbf224c46089fd514
--- a/.ci/docker/ci_commit_pins/triton-rocm.txt
+++ b/.ci/docker/ci_commit_pins/triton-rocm.txt
@ -1 +0,0 @@
 0a22a91d04c2b4a029a69a198eac390089c3e891
--- a/.ci/docker/ci_commit_pins/triton.txt
+++ b/.ci/docker/ci_commit_pins/triton.txt
@ -1 +0,0 @@
 989adb9a29496c22a36ef82ca69cad5dad536b9c
--- a/.ci/docker/common/cache_vision_models.sh
+++ b/.ci/docker/common/cache_vision_models.sh
@ -1,18 +0,0 @@
 #!/bin/bash
 set -ex
 source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
 # Cache the test models at ~/.cache/torch/hub/
 IMPORT_SCRIPT_FILENAME="/tmp/torchvision_import_script.py"
 as_jenkins echo 'import torchvision; torchvision.models.mobilenet_v2(pretrained=True); torchvision.models.mobilenet_v3_large(pretrained=True);' > "${IMPORT_SCRIPT_FILENAME}"
 pip_install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cpu
 # Very weird quoting behavior here https://github.com/conda/conda/issues/10972,
 # so echo the command to a file and run the file instead
 conda_run python "${IMPORT_SCRIPT_FILENAME}"
 # Cleaning up
 conda_run pip uninstall -y torch torchvision
 rm "${IMPORT_SCRIPT_FILENAME}" || true
--- a/.ci/docker/common/common_utils.sh
+++ b/.ci/docker/common/common_utils.sh
@ -1,36 +0,0 @@
 #!/bin/bash
 # Work around bug where devtoolset replaces sudo and breaks it.
 if [ -n "$DEVTOOLSET_VERSION" ]; then
  export SUDO=/bin/sudo
 else
  export SUDO=sudo
 fi
 as_jenkins() {
  # NB: unsetting the environment variables works around a conda bug
  # https://github.com/conda/conda/issues/6576
  # NB: Pass on PATH and LD_LIBRARY_PATH to sudo invocation
  # NB: This must be run from a directory that jenkins has access to,
  # works around https://github.com/conda/conda-package-handling/pull/34
  $SUDO -E -H -u jenkins env -u SUDO_UID -u SUDO_GID -u SUDO_COMMAND -u SUDO_USER env "PATH=$PATH" "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" $*
 }
 conda_install() {
  # Ensure that the install command don't upgrade/downgrade Python
  # This should be called as
  #   conda_install pkg1 pkg2 ... [-c channel]
  as_jenkins conda install -q -n py_$ANACONDA_PYTHON_VERSION -y python="$ANACONDA_PYTHON_VERSION" $*
 }
 conda_run() {
  as_jenkins conda run -n py_$ANACONDA_PYTHON_VERSION --no-capture-output $*
 }
 pip_install() {
  as_jenkins conda run -n py_$ANACONDA_PYTHON_VERSION pip install --progress-bar off $*
 }
 get_pinned_commit() {
  cat "${1}".txt
 }
--- a/.ci/docker/common/install_acl.sh
+++ b/.ci/docker/common/install_acl.sh
@ -1,16 +0,0 @@
 set -euo pipefail
 readonly version=v23.08
 readonly src_host=https://review.mlplatform.org/ml
 readonly src_repo=ComputeLibrary
 # Clone ACL
 [[ ! -d ${src_repo} ]] && git clone ${src_host}/${src_repo}.git
 cd ${src_repo}
 git checkout $version
 # Build with scons
 scons -j8  Werror=0 debug=0 neon=1 opencl=0 embed_kernels=0 \
  os=linux arch=armv8a build=native multi_isa=1 \
  fixed_format_kernels=1 openmp=1 cppthreads=0
--- a/.ci/docker/common/install_base.sh
+++ b/.ci/docker/common/install_base.sh
@ -1,160 +0,0 @@
 #!/bin/bash
 set -ex
 install_ubuntu() {
  # NVIDIA dockers for RC releases use tag names like `11.0-cudnn8-devel-ubuntu18.04-rc`,
  # for this case we will set UBUNTU_VERSION to `18.04-rc` so that the Dockerfile could
  # find the correct image. As a result, here we have to check for
  #   "$UBUNTU_VERSION" == "18.04"*
  # instead of
  #   "$UBUNTU_VERSION" == "18.04"
  if [[ "$UBUNTU_VERSION" == "20.04"* ]]; then
    cmake3="cmake=3.16*"
    maybe_libiomp_dev=""
  elif [[ "$UBUNTU_VERSION" == "22.04"* ]]; then
    cmake3="cmake=3.22*"
    maybe_libiomp_dev=""
  else
    cmake3="cmake=3.5*"
    maybe_libiomp_dev="libiomp-dev"
  fi
  if [[ "$CLANG_VERSION" == 15 ]]; then
    maybe_libomp_dev="libomp-15-dev"
  elif [[ "$CLANG_VERSION" == 12 ]]; then
    maybe_libomp_dev="libomp-12-dev"
  elif [[ "$CLANG_VERSION" == 10 ]]; then
    maybe_libomp_dev="libomp-10-dev"
  else
    maybe_libomp_dev=""
  fi
  # HACK: UCC testing relies on libnccl library from NVIDIA repo, and version 2.16 crashes
  # See https://github.com/pytorch/pytorch/pull/105260#issuecomment-1673399729
  if [[ "$UBUNTU_VERSION" == "20.04"* && "$CUDA_VERSION" == "11.8"* ]]; then
    maybe_libnccl_dev="libnccl2=2.15.5-1+cuda11.8 libnccl-dev=2.15.5-1+cuda11.8 --allow-downgrades --allow-change-held-packages"
  else
    maybe_libnccl_dev=""
  fi
  # Install common dependencies
  apt-get update
  # TODO: Some of these may not be necessary
  ccache_deps="asciidoc docbook-xml docbook-xsl xsltproc"
  deploy_deps="libffi-dev libbz2-dev libreadline-dev libncurses5-dev libncursesw5-dev libgdbm-dev libsqlite3-dev uuid-dev tk-dev"
  numpy_deps="gfortran"
  apt-get install -y --no-install-recommends \
    $ccache_deps \
    $numpy_deps \
    ${deploy_deps} \
    ${cmake3} \
    apt-transport-https \
    autoconf \
    automake \
    build-essential \
    ca-certificates \
    curl \
    git \
    libatlas-base-dev \
    libc6-dbg \
    ${maybe_libiomp_dev} \
    libyaml-dev \
    libz-dev \
    libjemalloc2 \
    libjpeg-dev \
    libasound2-dev \
    libsndfile-dev \
    ${maybe_libomp_dev} \
    ${maybe_libnccl_dev} \
    software-properties-common \
    wget \
    sudo \
    vim \
    jq \
    libtool \
    vim \
    unzip \
    gpg-agent \
    gdb
  # Should resolve issues related to various apt package repository cert issues
  # see: https://github.com/pytorch/pytorch/issues/65931
  apt-get install -y libgnutls30
  # Cleanup package manager
  apt-get autoclean && apt-get clean
  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 }
 install_centos() {
  # Need EPEL for many packages we depend on.
  # See http://fedoraproject.org/wiki/EPEL
  yum --enablerepo=extras install -y epel-release
  ccache_deps="asciidoc docbook-dtds docbook-style-xsl libxslt"
  numpy_deps="gcc-gfortran"
  # Note: protobuf-c-{compiler,devel} on CentOS are too old to be used
  # for Caffe2. That said, we still install them to make sure the build
  # system opts to build/use protoc and libprotobuf from third-party.
  yum install -y \
    $ccache_deps \
    $numpy_deps \
    autoconf \
    automake \
    bzip2 \
    cmake \
    cmake3 \
    curl \
    gcc \
    gcc-c++ \
    gflags-devel \
    git \
    glibc-devel \
    glibc-headers \
    glog-devel \
    hiredis-devel \
    libstdc++-devel \
    libsndfile-devel \
    make \
    opencv-devel \
    sudo \
    wget \
    vim \
    unzip \
    gdb
  # Cleanup
  yum clean all
  rm -rf /var/cache/yum
  rm -rf /var/lib/yum/yumdb
  rm -rf /var/lib/yum/history
 }
 # Install base packages depending on the base OS
 ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
 case "$ID" in
  ubuntu)
    install_ubuntu
    ;;
  centos)
    install_centos
    ;;
  *)
    echo "Unable to determine OS..."
    exit 1
    ;;
 esac
 # Install Valgrind separately since the apt-get version is too old.
 mkdir valgrind_build && cd valgrind_build
 VALGRIND_VERSION=3.20.0
 wget https://ossci-linux.s3.amazonaws.com/valgrind-${VALGRIND_VERSION}.tar.bz2
 tar -xjf valgrind-${VALGRIND_VERSION}.tar.bz2
 cd valgrind-${VALGRIND_VERSION}
 ./configure --prefix=/usr/local
 make -j$[$(nproc) - 2]
 sudo make install
 cd ../../
 rm -rf valgrind_build
 alias valgrind="/usr/local/bin/valgrind"
--- a/.ci/docker/common/install_clang.sh
+++ b/.ci/docker/common/install_clang.sh
@ -1,44 +0,0 @@
 #!/bin/bash
 set -ex
 if [ -n "$CLANG_VERSION" ]; then
  if [[ $CLANG_VERSION == 9 && $UBUNTU_VERSION == 18.04 ]]; then
    sudo apt-get update
    # gpg-agent is not available by default on 18.04
    sudo apt-get install  -y --no-install-recommends gpg-agent
    wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add  -
    apt-add-repository "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-${CLANG_VERSION} main"
  elif [[ $UBUNTU_VERSION == 22.04 ]]; then
    # work around ubuntu apt-get conflicts
    sudo apt-get -y -f install
  fi
  sudo apt-get update
  apt-get install -y --no-install-recommends clang-"$CLANG_VERSION"
  apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION"
  # Install dev version of LLVM.
  if [ -n "$LLVMDEV" ]; then
    sudo apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION"-dev
  fi
  # Use update-alternatives to make this version the default
  update-alternatives --install /usr/bin/clang clang /usr/bin/clang-"$CLANG_VERSION" 50
  update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-"$CLANG_VERSION" 50
  # Override cc/c++ to clang as well
  update-alternatives --install /usr/bin/cc cc /usr/bin/clang 50
  update-alternatives --install /usr/bin/c++ c++ /usr/bin/clang++ 50
  # clang's packaging is a little messed up (the runtime libs aren't
  # added into the linker path), so give it a little help
  clang_lib=("/usr/lib/llvm-$CLANG_VERSION/lib/clang/"*"/lib/linux")
  echo "$clang_lib" > /etc/ld.so.conf.d/clang.conf
  ldconfig
  # Cleanup package manager
  apt-get autoclean && apt-get clean
  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 fi
--- a/.ci/docker/common/install_cmake.sh
+++ b/.ci/docker/common/install_cmake.sh
@ -1,31 +0,0 @@
 #!/bin/bash
 set -ex
 [ -n "$CMAKE_VERSION" ]
 # Remove system cmake install so it won't get used instead
 ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
 case "$ID" in
  ubuntu)
    apt-get remove cmake -y
    ;;
  centos)
    yum remove cmake -y
    ;;
  *)
    echo "Unable to determine OS..."
    exit 1
    ;;
 esac
 # Turn 3.6.3 into v3.6
 path=$(echo "${CMAKE_VERSION}" | sed -e 's/\([0-9].[0-9]\+\).*/v\1/')
 file="cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz"
 # Download and install specific CMake version in /usr/local
 pushd /tmp
 curl -Os --retry 3 "https://cmake.org/files/${path}/${file}"
 tar -C /usr/local --strip-components 1 --no-same-owner -zxf cmake-*.tar.gz
 rm -f cmake-*.tar.gz
 popd
--- a/.ci/docker/common/install_conda.sh
+++ b/.ci/docker/common/install_conda.sh
@ -1,127 +0,0 @@
 #!/bin/bash
 set -ex
 # Optionally install conda
 if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
  BASE_URL="https://repo.anaconda.com/miniconda"
  MAJOR_PYTHON_VERSION=$(echo "$ANACONDA_PYTHON_VERSION" | cut -d . -f 1)
  MINOR_PYTHON_VERSION=$(echo "$ANACONDA_PYTHON_VERSION" | cut -d . -f 2)
 if [[ $(uname -m) == "aarch64" ]]; then
  BASE_URL="https://github.com/conda-forge/miniforge/releases/latest/download"
  case "$MAJOR_PYTHON_VERSION" in
    3)
      CONDA_FILE="Miniforge3-Linux-aarch64.sh"
    ;;
    *)
      echo "Unsupported ANACONDA_PYTHON_VERSION: $ANACONDA_PYTHON_VERSION"
      exit 1
      ;;
  esac
 else
  case "$MAJOR_PYTHON_VERSION" in
    3)
      CONDA_FILE="Miniconda3-latest-Linux-x86_64.sh"
    ;;
    *)
      echo "Unsupported ANACONDA_PYTHON_VERSION: $ANACONDA_PYTHON_VERSION"
      exit 1
      ;;
  esac
 fi
  mkdir -p /opt/conda
  chown jenkins:jenkins /opt/conda
  source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
  pushd /tmp
  wget -q "${BASE_URL}/${CONDA_FILE}"
  # NB: Manually invoke bash per https://github.com/conda/conda/issues/10431
  as_jenkins bash "${CONDA_FILE}" -b -f -p "/opt/conda"
  popd
  # NB: Don't do this, rely on the rpath to get it right
  #echo "/opt/conda/lib" > /etc/ld.so.conf.d/conda-python.conf
  #ldconfig
  sed -e 's|PATH="\(.*\)"|PATH="/opt/conda/bin:\1"|g' -i /etc/environment
  export PATH="/opt/conda/bin:$PATH"
  # Ensure we run conda in a directory that jenkins has write access to
  pushd /opt/conda
  # Prevent conda from updating to 4.14.0, which causes docker build failures
  # See https://hud.pytorch.org/pytorch/pytorch/commit/754d7f05b6841e555cea5a4b2c505dd9e0baec1d
  # Uncomment the below when resolved to track the latest conda update
  # as_jenkins conda update -y -n base conda
  if [[ $(uname -m) == "aarch64" ]]; then
    export SYSROOT_DEP="sysroot_linux-aarch64=2.17"
  else
    export SYSROOT_DEP="sysroot_linux-64=2.17"
  fi
  # Install correct Python version
  # Also ensure sysroot is using a modern GLIBC to match system compilers
  as_jenkins conda create -n py_$ANACONDA_PYTHON_VERSION -y\
             python="$ANACONDA_PYTHON_VERSION" \
             ${SYSROOT_DEP}
  # libstdcxx from conda default channels are too old, we need GLIBCXX_3.4.30
  # which is provided in libstdcxx 12 and up.
  conda_install libstdcxx-ng=12.3.0 -c conda-forge
  # Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README
  if [[ $(uname -m) == "aarch64" ]]; then
    CONDA_COMMON_DEPS="astunparse pyyaml setuptools openblas==0.3.25=*openmp* ninja==1.11.1 scons==4.5.2"
    if [ "$ANACONDA_PYTHON_VERSION" = "3.8" ]; then
      conda_install numpy=1.24.4 ${CONDA_COMMON_DEPS}
    else
      conda_install numpy=1.26.2 ${CONDA_COMMON_DEPS}
    fi
  else
    CONDA_COMMON_DEPS="astunparse pyyaml mkl=2021.4.0 mkl-include=2021.4.0 setuptools"
    if [ "$ANACONDA_PYTHON_VERSION" = "3.11" ] || [ "$ANACONDA_PYTHON_VERSION" = "3.12" ]; then
      conda_install numpy=1.26.0 ${CONDA_COMMON_DEPS}
    else
      conda_install numpy=1.21.2 ${CONDA_COMMON_DEPS}
    fi
  fi
  # Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
  # and libpython-static for torch deploy
  conda_install llvmdev=8.0.0 "libpython-static=${ANACONDA_PYTHON_VERSION}"
  # Use conda cmake in some cases. Conda cmake will be newer than our supported
  # min version (3.5 for xenial and 3.10 for bionic), so we only do it in those
  # following builds that we know should use conda. Specifically, Ubuntu bionic
  # and focal cannot find conda mkl with stock cmake, so we need a cmake from conda
  if [ -n "${CONDA_CMAKE}" ]; then
    conda_install cmake
  fi
  # Magma package names are concatenation of CUDA major and minor ignoring revision
  # I.e. magma-cuda102 package corresponds to CUDA_VERSION=10.2 and CUDA_VERSION=10.2.89
  if [ -n "$CUDA_VERSION" ]; then
    conda_install magma-cuda$(TMP=${CUDA_VERSION/./};echo ${TMP%.*[0-9]}) -c pytorch
  fi
  # Install some other packages, including those needed for Python test reporting
  pip_install -r /opt/conda/requirements-ci.txt
  pip_install -U scikit-learn
  if [ -n "$DOCS" ]; then
    apt-get update
    apt-get -y install expect-dev
    # We are currently building docs with python 3.8 (min support version)
    pip_install -r /opt/conda/requirements-docs.txt
  fi
  popd
 fi
--- a/.ci/docker/common/install_cudnn.sh
+++ b/.ci/docker/common/install_cudnn.sh
@ -1,24 +0,0 @@
 #!/bin/bash
 if [[ ${CUDNN_VERSION} == 8 ]]; then
    # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
    mkdir tmp_cudnn
    pushd tmp_cudnn
    if [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then
        CUDNN_NAME="cudnn-linux-x86_64-8.9.2.26_cuda12-archive"
        curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/${CUDNN_NAME}.tar.xz
    elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then
        CUDNN_NAME="cudnn-linux-x86_64-8.7.0.84_cuda11-archive"
        curl --retry 3 -OLs https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/${CUDNN_NAME}.tar.xz
    else
        print "Unsupported CUDA version ${CUDA_VERSION}"
        exit 1
    fi
    tar xf ${CUDNN_NAME}.tar.xz
    cp -a ${CUDNN_NAME}/include/* /usr/local/cuda/include/
    cp -a ${CUDNN_NAME}/lib/* /usr/local/cuda/lib64/
    popd
    rm -rf tmp_cudnn
    ldconfig
 fi
--- a/.ci/docker/common/install_cusparselt.sh
+++ b/.ci/docker/common/install_cusparselt.sh
@ -1,21 +0,0 @@
 #!/bin/bash
 set -ex
 # cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html
 mkdir tmp_cusparselt && cd tmp_cusparselt
 if [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then
    CUSPARSELT_NAME="libcusparse_lt-linux-x86_64-0.5.2.1-archive"
    curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/${CUSPARSELT_NAME}.tar.xz
 elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then
    CUSPARSELT_NAME="libcusparse_lt-linux-x86_64-0.4.0.7-archive"
    curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/${CUSPARSELT_NAME}.tar.xz
 fi
 tar xf ${CUSPARSELT_NAME}.tar.xz
 cp -a ${CUSPARSELT_NAME}/include/* /usr/local/cuda/include/
 cp -a ${CUSPARSELT_NAME}/lib/* /usr/local/cuda/lib64/
 cd ..
 rm -rf tmp_cusparselt
 ldconfig
--- a/.ci/docker/common/install_docs_reqs.sh
+++ b/.ci/docker/common/install_docs_reqs.sh
@ -1,25 +0,0 @@
 #!/bin/bash
 set -ex
 if [ -n "$KATEX" ]; then
  apt-get update
  # Ignore error if gpg-agent doesn't exist (for Ubuntu 16.04)
  apt-get install -y gpg-agent || :
  curl --retry 3 -sL https://deb.nodesource.com/setup_16.x | sudo -E bash -
  sudo apt-get install -y nodejs
  curl --retry 3 -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add -
  echo "deb https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list
  apt-get update
  apt-get install -y --no-install-recommends yarn
  yarn global add katex --prefix /usr/local
  sudo apt-get -y install doxygen
  apt-get autoclean && apt-get clean
  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 fi
--- a/.ci/docker/common/install_executorch.sh
+++ b/.ci/docker/common/install_executorch.sh
@ -1,61 +0,0 @@
 #!/bin/bash
 set -ex
 source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
 clone_executorch() {
  EXECUTORCH_PINNED_COMMIT=$(get_pinned_commit executorch)
  # Clone the Executorch
  git clone https://github.com/pytorch/executorch.git
  # and fetch the target commit
  pushd executorch
  git checkout "${EXECUTORCH_PINNED_COMMIT}"
  git submodule update --init
  popd
  chown -R jenkins executorch
 }
 install_buck2() {
  pushd executorch/.ci/docker
  BUCK2_VERSION=$(cat ci_commit_pins/buck2.txt)
  source common/install_buck.sh
  popd
 }
 install_conda_dependencies() {
  pushd executorch/.ci/docker
  # Install conda dependencies like flatbuffer
  conda_install --file conda-env-ci.txt
  popd
 }
 install_pip_dependencies() {
  pushd executorch/.ci/docker
  # Install all Python dependencies
  pip_install -r requirements-ci.txt
  popd
 }
 setup_executorch() {
  pushd executorch
  source .ci/scripts/utils.sh
  install_flatc_from_source
  pip_install .
  # Make sure that all the newly generate files are owned by Jenkins
  chown -R jenkins .
  popd
 }
 clone_executorch
 install_buck2
 install_conda_dependencies
 install_pip_dependencies
 setup_executorch
--- a/.ci/docker/common/install_gcc.sh
+++ b/.ci/docker/common/install_gcc.sh
@ -1,20 +0,0 @@
 #!/bin/bash
 set -ex
 if [ -n "$GCC_VERSION" ]; then
  # Need the official toolchain repo to get alternate packages
  add-apt-repository ppa:ubuntu-toolchain-r/test
  apt-get update
  apt-get install -y g++-$GCC_VERSION
  update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-"$GCC_VERSION" 50
  update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-"$GCC_VERSION" 50
  update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-"$GCC_VERSION" 50
  # Cleanup package manager
  apt-get autoclean && apt-get clean
  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 fi
--- a/.ci/docker/common/install_inductor_benchmark_deps.sh
+++ b/.ci/docker/common/install_inductor_benchmark_deps.sh
@ -1,26 +0,0 @@
 #!/bin/bash
 set -ex
 source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
 function install_huggingface() {
  local version
  commit=$(get_pinned_commit huggingface)
  pip_install pandas==2.0.3
  pip_install "git+https://github.com/huggingface/transformers@${commit}"
 }
 function install_timm() {
  local commit
  commit=$(get_pinned_commit timm)
  pip_install pandas==2.0.3
  pip_install "git+https://github.com/huggingface/pytorch-image-models@${commit}"
  # Clean up
  conda_run pip uninstall -y cmake torch torchvision triton
 }
 # Pango is needed for weasyprint which is needed for doctr
 conda_install pango
 install_huggingface
 install_timm
--- a/.ci/docker/common/install_linter.sh
+++ b/.ci/docker/common/install_linter.sh
@ -1,29 +0,0 @@
 #!/bin/bash
 set -ex
 source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
 if [ -n "${UBUNTU_VERSION}" ]; then
  apt update
  apt-get install -y clang doxygen git graphviz nodejs npm libtinfo5
 fi
 # Do shallow clone of PyTorch so that we can init lintrunner in Docker build context
 git clone https://github.com/pytorch/pytorch.git --depth 1
 chown -R jenkins pytorch
 pushd pytorch
 # Install all linter dependencies
 pip_install -r requirements.txt
 conda_run lintrunner init
 # Cache .lintbin directory as part of the Docker image
 cp -r .lintbin /tmp
 popd
 # Node dependencies required by toc linter job
 npm install -g markdown-toc
 # Cleaning up
 rm -rf pytorch
--- a/.ci/docker/common/install_onnx.sh
+++ b/.ci/docker/common/install_onnx.sh
@ -1,51 +0,0 @@
 #!/bin/bash
 set -ex
 source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
 retry () {
    "$@" || (sleep 10 && "$@") || (sleep 20 && "$@") || (sleep 40 && "$@")
 }
 # A bunch of custom pip dependencies for ONNX
 pip_install \
  beartype==0.15.0 \
  filelock==3.9.0 \
  flatbuffers==2.0 \
  mock==5.0.1 \
  ninja==1.10.2 \
  networkx==2.0 \
  numpy==1.24.2
 # ONNXRuntime should be installed before installing
 # onnx-weekly. Otherwise, onnx-weekly could be
 # overwritten by onnx.
 pip_install \
  parameterized==0.8.1 \
  pytest-cov==4.0.0 \
  pytest-subtests==0.10.0 \
  tabulate==0.9.0 \
  transformers==4.36.2
 pip_install coloredlogs packaging
 pip_install onnxruntime==1.17.0
 pip_install onnx==1.15.0
 # pip_install "onnxscript@git+https://github.com/microsoft/onnxscript@3e869ef8ccf19b5ebd21c10d3e9c267c9a9fa729" --no-deps
 pip_install onnxscript==0.1.0.dev20240315 --no-deps
 # Cache the transformers model to be used later by ONNX tests. We need to run the transformers
 # package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/
 IMPORT_SCRIPT_FILENAME="/tmp/onnx_import_script.py"
 as_jenkins echo 'import transformers; transformers.AutoModel.from_pretrained("sshleifer/tiny-gpt2"); transformers.AutoTokenizer.from_pretrained("sshleifer/tiny-gpt2"); transformers.AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large-v3");' > "${IMPORT_SCRIPT_FILENAME}"
 # Need a PyTorch version for transformers to work
 pip_install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
 # Very weird quoting behavior here https://github.com/conda/conda/issues/10972,
 # so echo the command to a file and run the file instead
 conda_run python "${IMPORT_SCRIPT_FILENAME}"
 # Cleaning up
 conda_run pip uninstall -y torch
 rm "${IMPORT_SCRIPT_FILENAME}" || true
--- a/.ci/docker/common/install_openssl.sh
+++ b/.ci/docker/common/install_openssl.sh
@ -1,17 +0,0 @@
 #!/bin/bash
 set -ex
 OPENSSL=openssl-1.1.1k
 wget -q -O "${OPENSSL}.tar.gz" "https://ossci-linux.s3.amazonaws.com/${OPENSSL}.tar.gz"
 tar xf "${OPENSSL}.tar.gz"
 cd "${OPENSSL}"
 ./config --prefix=/opt/openssl -d '-Wl,--enable-new-dtags,-rpath,$(LIBRPATH)'
 # NOTE: openssl install errors out when built with the -j option
 NPROC=$[$(nproc) - 2]
 make -j${NPROC}; make install_sw
 # Link the ssl libraries to the /usr/lib folder.
 sudo ln -s /opt/openssl/lib/lib* /usr/lib
 cd ..
 rm -rf "${OPENSSL}"
--- a/.ci/docker/common/install_protobuf.sh
+++ b/.ci/docker/common/install_protobuf.sh
@ -1,19 +0,0 @@
 #!/bin/bash
 set -ex
 pb_dir="/usr/temp_pb_install_dir"
 mkdir -p $pb_dir
 # On the nvidia/cuda:9-cudnn7-devel-centos7 image we need this symlink or
 # else it will fail with
 #   g++: error: ./../lib64/crti.o: No such file or directory
 ln -s /usr/lib64 "$pb_dir/lib64"
 curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v3.17.3/protobuf-all-3.17.3.tar.gz" --retry 3
 tar -xvz --no-same-owner -C "$pb_dir" --strip-components 1 -f protobuf-all-3.17.3.tar.gz
 NPROC=$[$(nproc) - 2]
 pushd "$pb_dir" && ./configure && make -j${NPROC} && make -j${NPROC} check && sudo make -j${NRPOC} install && sudo ldconfig
 popd
 rm -rf $pb_dir
--- a/.ci/docker/common/install_rocm.sh
+++ b/.ci/docker/common/install_rocm.sh
@ -1,190 +0,0 @@
 #!/bin/bash
 set -ex
 ver() {
    printf "%3d%03d%03d%03d" $(echo "$1" | tr '.' ' ');
 }
 # Map ROCm version to AMDGPU version
 declare -A AMDGPU_VERSIONS=( ["5.0"]="21.50" ["5.1.1"]="22.10.1" ["5.2"]="22.20" )
 install_ubuntu() {
    apt-get update
    if [[ $UBUNTU_VERSION == 18.04 ]]; then
      # gpg-agent is not available by default on 18.04
      apt-get install -y --no-install-recommends gpg-agent
    fi
    if [[ $UBUNTU_VERSION == 20.04 ]]; then
      # gpg-agent is not available by default on 20.04
      apt-get install -y --no-install-recommends gpg-agent
    fi
    apt-get install -y kmod
    apt-get install -y wget
    # Need the libc++1 and libc++abi1 libraries to allow torch._C to load at runtime
    apt-get install -y libc++1
    apt-get install -y libc++abi1
    if [[ $(ver $ROCM_VERSION) -ge $(ver 4.5) ]]; then
        # Add amdgpu repository
        UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'`
        local amdgpu_baseurl
        if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then
          amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/ubuntu"
        else
          amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/ubuntu"
        fi
        echo "deb [arch=amd64] ${amdgpu_baseurl} ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list
    fi
    ROCM_REPO="ubuntu"
    if [[ $(ver $ROCM_VERSION) -lt $(ver 4.2) ]]; then
        ROCM_REPO="xenial"
    fi
    if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then
        ROCM_REPO="${UBUNTU_VERSION_NAME}"
    fi
    # Add rocm repository
    wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
    local rocm_baseurl="http://repo.radeon.com/rocm/apt/${ROCM_VERSION}"
    echo "deb [arch=amd64] ${rocm_baseurl} ${ROCM_REPO} main" > /etc/apt/sources.list.d/rocm.list
    apt-get update --allow-insecure-repositories
    DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
                   rocm-dev \
                   rocm-utils \
                   rocm-libs \
                   rccl \
                   rocprofiler-dev \
                   roctracer-dev
    # precompiled miopen kernels added in ROCm 3.5, renamed in ROCm 5.5
    # search for all unversioned packages
    # if search fails it will abort this script; use true to avoid case where search fails
    if [[ $(ver $ROCM_VERSION) -ge $(ver 5.5) ]]; then
        MIOPENHIPGFX=$(apt-cache search --names-only miopen-hip-gfx | awk '{print $1}' | grep -F -v . || true)
        if [[ "x${MIOPENHIPGFX}" = x ]]; then
          echo "miopen-hip-gfx package not available" && exit 1
        else
          DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated ${MIOPENHIPGFX}
        fi
    else
        MIOPENKERNELS=$(apt-cache search --names-only miopenkernels | awk '{print $1}' | grep -F -v . || true)
        if [[ "x${MIOPENKERNELS}" = x ]]; then
          echo "miopenkernels package not available" && exit 1
        else
          DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated ${MIOPENKERNELS}
        fi
    fi
    # ROCm 6.0 had a regression where journal_mode was enabled on the kdb files resulting in permission errors at runtime
    if [[ $(ver $ROCM_VERSION) -ge $(ver 6.0) ]]; then
        for kdb in /opt/rocm/share/miopen/db/*.kdb
        do
            sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
        done
    fi
    # Cleanup
    apt-get autoclean && apt-get clean
    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 }
 install_centos() {
  yum update -y
  yum install -y kmod
  yum install -y wget
  yum install -y openblas-devel
  yum install -y epel-release
  yum install -y dkms kernel-headers-`uname -r` kernel-devel-`uname -r`
  if [[ $(ver $ROCM_VERSION) -ge $(ver 4.5) ]]; then
      # Add amdgpu repository
      local amdgpu_baseurl
      if [[ $OS_VERSION == 9 ]]; then
          amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/rhel/9.0/main/x86_64"
      else
        if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then
          amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/7.9/main/x86_64"
        else
          amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/rhel/7.9/main/x86_64"
        fi
      fi
      echo "[AMDGPU]" > /etc/yum.repos.d/amdgpu.repo
      echo "name=AMDGPU" >> /etc/yum.repos.d/amdgpu.repo
      echo "baseurl=${amdgpu_baseurl}" >> /etc/yum.repos.d/amdgpu.repo
      echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo
      echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo
      echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo
  fi
  local rocm_baseurl="http://repo.radeon.com/rocm/yum/${ROCM_VERSION}"
  echo "[ROCm]" > /etc/yum.repos.d/rocm.repo
  echo "name=ROCm" >> /etc/yum.repos.d/rocm.repo
  echo "baseurl=${rocm_baseurl}" >> /etc/yum.repos.d/rocm.repo
  echo "enabled=1" >> /etc/yum.repos.d/rocm.repo
  echo "gpgcheck=1" >> /etc/yum.repos.d/rocm.repo
  echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/rocm.repo
  yum update -y
  yum install -y \
                   rocm-dev \
                   rocm-utils \
                   rocm-libs \
                   rccl \
                   rocprofiler-dev \
                   roctracer-dev
  # precompiled miopen kernels; search for all unversioned packages
  # if search fails it will abort this script; use true to avoid case where search fails
  if [[ $(ver $ROCM_VERSION) -ge $(ver 5.5) ]]; then
      MIOPENHIPGFX=$(yum -q search miopen-hip-gfx | grep miopen-hip-gfx | awk '{print $1}'| grep -F kdb. || true)
      if [[ "x${MIOPENHIPGFX}" = x ]]; then
        echo "miopen-hip-gfx package not available" && exit 1
      else
        yum install -y ${MIOPENHIPGFX}
      fi
  else
      MIOPENKERNELS=$(yum -q search miopenkernels | grep miopenkernels- | awk '{print $1}'| grep -F kdb. || true)
      if [[ "x${MIOPENKERNELS}" = x ]]; then
        echo "miopenkernels package not available" && exit 1
      else
        yum install -y ${MIOPENKERNELS}
      fi
  fi
  # ROCm 6.0 had a regression where journal_mode was enabled on the kdb files resulting in permission errors at runtime
  if [[ $(ver $ROCM_VERSION) -ge $(ver 6.0) ]]; then
      for kdb in /opt/rocm/share/miopen/db/*.kdb
      do
          sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
      done
  fi
  # Cleanup
  yum clean all
  rm -rf /var/cache/yum
  rm -rf /var/lib/yum/yumdb
  rm -rf /var/lib/yum/history
 }
 # Install Python packages depending on the base OS
 ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
 case "$ID" in
  ubuntu)
    install_ubuntu
    ;;
  centos)
    install_centos
    ;;
  *)
    echo "Unable to determine OS..."
    exit 1
    ;;
 esac
--- a/.ci/docker/common/install_rocm_magma.sh
+++ b/.ci/docker/common/install_rocm_magma.sh
@ -1,31 +0,0 @@
 #!/bin/bash
 set -ex
 # "install" hipMAGMA into /opt/rocm/magma by copying after build
 git clone https://bitbucket.org/icl/magma.git
 pushd magma
 # Version 2.7.2 + ROCm related updates
 git checkout a1625ff4d9bc362906bd01f805dbbe12612953f6
 cp make.inc-examples/make.inc.hip-gcc-mkl make.inc
 echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc
 echo 'LIB += -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib -Wl,--rpath,$(MKLROOT)/lib -Wl,--rpath,/opt/rocm/magma/lib' >> make.inc
 echo 'DEVCCFLAGS += --gpu-max-threads-per-block=256' >> make.inc
 export PATH="${PATH}:/opt/rocm/bin"
 if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then
  amdgpu_targets=`echo $PYTORCH_ROCM_ARCH | sed 's/;/ /g'`
 else
  amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs`
 fi
 for arch in $amdgpu_targets; do
  echo "DEVCCFLAGS += --offload-arch=$arch" >> make.inc
 done
 # hipcc with openmp flag may cause isnan() on __device__ not to be found; depending on context, compiler may attempt to match with host definition
 sed -i 's/^FOPENMP/#FOPENMP/g' make.inc
 make -f make.gen.hipMAGMA -j $(nproc)
 LANG=C.UTF-8 make lib/libmagma.so -j $(nproc) MKLROOT=/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION
 make testing/testing_dgemm -j $(nproc) MKLROOT=/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION
 popd
 mv magma /opt/rocm
--- a/.ci/docker/common/install_triton.sh
+++ b/.ci/docker/common/install_triton.sh
@ -1,69 +0,0 @@
 #!/bin/bash
 set -ex
 source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
 get_conda_version() {
  as_jenkins conda list -n py_$ANACONDA_PYTHON_VERSION | grep -w $* | head -n 1 | awk '{print $2}'
 }
 conda_reinstall() {
  as_jenkins conda install -q -n py_$ANACONDA_PYTHON_VERSION -y --force-reinstall $*
 }
 if [ -n "${ROCM_VERSION}" ]; then
  TRITON_REPO="https://github.com/ROCmSoftwarePlatform/triton"
  TRITON_TEXT_FILE="triton-rocm"
 else
  TRITON_REPO="https://github.com/openai/triton"
  TRITON_TEXT_FILE="triton"
 fi
 # The logic here is copied from .ci/pytorch/common_utils.sh
 TRITON_PINNED_COMMIT=$(get_pinned_commit ${TRITON_TEXT_FILE})
 if [ -n "${UBUNTU_VERSION}" ];then
    apt update
    apt-get install -y gpg-agent
 fi
 if [ -n "${CONDA_CMAKE}" ]; then
  # Keep the current cmake and numpy version here, so we can reinstall them later
  CMAKE_VERSION=$(get_conda_version cmake)
  NUMPY_VERSION=$(get_conda_version numpy)
 fi
 if [ -z "${MAX_JOBS}" ]; then
    export MAX_JOBS=$(nproc)
 fi
 if [ -n "${UBUNTU_VERSION}" ] && [ -n "${GCC_VERSION}" ] && [[ "${GCC_VERSION}" == "7" ]]; then
  # Triton needs at least gcc-9 to build
  apt-get install -y g++-9
  CXX=g++-9 pip_install "git+${TRITON_REPO}@${TRITON_PINNED_COMMIT}#subdirectory=python"
 elif [ -n "${UBUNTU_VERSION}" ] && [ -n "${CLANG_VERSION}" ]; then
  # Triton needs <filesystem> which surprisingly is not available with clang-9 toolchain
  add-apt-repository -y ppa:ubuntu-toolchain-r/test
  apt-get install -y g++-9
  CXX=g++-9 pip_install "git+${TRITON_REPO}@${TRITON_PINNED_COMMIT}#subdirectory=python"
 else
  pip_install "git+${TRITON_REPO}@${TRITON_PINNED_COMMIT}#subdirectory=python"
 fi
 if [ -n "${CONDA_CMAKE}" ]; then
  # TODO: This is to make sure that the same cmake and numpy version from install conda
  # script is used. Without this step, the newer cmake version (3.25.2) downloaded by
  # triton build step via pip will fail to detect conda MKL. Once that issue is fixed,
  # this can be removed.
  #
  # The correct numpy version also needs to be set here because conda claims that it
  # causes inconsistent environment.  Without this, conda will attempt to install the
  # latest numpy version, which fails ASAN tests with the following import error: Numba
  # needs NumPy 1.20 or less.
  conda_reinstall cmake="${CMAKE_VERSION}"
  # Note that we install numpy with pip as conda might not have the version we want
  pip_install --force-reinstall numpy=="${NUMPY_VERSION}"
 fi
--- a/.ci/docker/common/install_ucc.sh
+++ b/.ci/docker/common/install_ucc.sh
@ -1,53 +0,0 @@
 #!/bin/bash
 set -ex
 if [[ -d "/usr/local/cuda/" ]];  then
  with_cuda=/usr/local/cuda/
 else
  with_cuda=no
 fi
 function install_ucx() {
  set -ex
  git clone --recursive https://github.com/openucx/ucx.git
  pushd ucx
  git checkout ${UCX_COMMIT}
  git submodule update --init --recursive
  ./autogen.sh
  ./configure --prefix=$UCX_HOME      \
      --enable-mt                     \
      --with-cuda=$with_cuda          \
      --enable-profiling              \
      --enable-stats
  time make -j
  sudo make install
  popd
  rm -rf ucx
 }
 function install_ucc() {
  set -ex
  git clone --recursive https://github.com/openucx/ucc.git
  pushd ucc
  git checkout ${UCC_COMMIT}
  git submodule update --init --recursive
  ./autogen.sh
  # We only run distributed tests on Tesla M60 and A10G
  NVCC_GENCODE="-gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_86,code=compute_86"
  ./configure --prefix=$UCC_HOME          \
    --with-ucx=$UCX_HOME                  \
    --with-cuda=$with_cuda                \
    --with-nvcc-gencode="${NVCC_GENCODE}"
  time make -j
  sudo make install
  popd
  rm -rf ucc
 }
 install_ucx
 install_ucc
--- a/.ci/docker/common/install_xpu.sh
+++ b/.ci/docker/common/install_xpu.sh
@ -1,115 +0,0 @@
 #!/bin/bash
 set -xe
 # Intel® software for general purpose GPU capabilities.
 # Refer to https://dgpu-docs.intel.com/releases/stable_647_21_20230714.html
 # Intel® oneAPI Base Toolkit (version 2024.0.0) has been updated to include functional and security updates.
 # Refer to https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html
 # Users should update to the latest version as it becomes available
 function install_ubuntu() {
    apt-get update -y
    apt-get install -y gpg-agent wget
    # Set up the repository. To do this, download the key to the system keyring
    wget -qO - https://repositories.intel.com/gpu/intel-graphics.key \
        | gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg
    wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
        | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
    # Add the signed entry to APT sources and configure the APT client to use the Intel repository
    echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/production/2328 unified" \
        | tee /etc/apt/sources.list.d/intel-gpu-jammy.list
    echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" \
        | tee /etc/apt/sources.list.d/oneAPI.list
    # Update the packages list and repository index
    apt-get update
    # The xpu-smi packages
    apt-get install -y flex bison xpu-smi
    # Compute and Media Runtimes
    apt-get install -y \
        intel-opencl-icd intel-level-zero-gpu level-zero \
        intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \
        libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
        libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
        mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo
    # Development Packages
    apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev
    # Install Intel® oneAPI Base Toolkit
    if [ -n "$BASEKIT_VERSION" ]; then
        apt-get install intel-basekit=$BASEKIT_VERSION -y
    else
        apt-get install intel-basekit -y
    fi
    # Cleanup
    apt-get autoclean && apt-get clean
    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 }
 function install_centos() {
    dnf install -y 'dnf-command(config-manager)'
    dnf config-manager --add-repo \
        https://repositories.intel.com/gpu/rhel/8.6/production/2328/unified/intel-gpu-8.6.repo
    # To add the EPEL repository needed for DKMS
    dnf -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm
        # https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm
    # Create the YUM repository file in the /temp directory as a normal user
    tee > /tmp/oneAPI.repo << EOF
 [oneAPI]
 name=Intel® oneAPI repository
 baseurl=https://yum.repos.intel.com/oneapi
 enabled=1
 gpgcheck=1
 repo_gpgcheck=1
 gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
 EOF
    # Move the newly created oneAPI.repo file to the YUM configuration directory /etc/yum.repos.d
    mv /tmp/oneAPI.repo /etc/yum.repos.d
    # The xpu-smi packages
    dnf install -y flex bison xpu-smi
    # Compute and Media Runtimes
    dnf install -y \
        intel-opencl intel-media intel-mediasdk libmfxgen1 libvpl2\
        level-zero intel-level-zero-gpu mesa-dri-drivers mesa-vulkan-drivers \
        mesa-vdpau-drivers libdrm mesa-libEGL mesa-libgbm mesa-libGL \
        mesa-libxatracker libvpl-tools intel-metrics-discovery \
        intel-metrics-library intel-igc-core intel-igc-cm \
        libva libva-utils intel-gmmlib libmetee intel-gsc intel-ocloc hwinfo clinfo
    # Development packages
    dnf install -y --refresh \
        intel-igc-opencl-devel level-zero-devel intel-gsc-devel libmetee-devel \
        level-zero-devel
    # Install Intel® oneAPI Base Toolkit
    dnf install intel-basekit -y
    # Cleanup
    dnf clean all
    rm -rf /var/cache/yum
    rm -rf /var/lib/yum/yumdb
    rm -rf /var/lib/yum/history
 }
 # The installation depends on the base OS
 ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
 case "$ID" in
    ubuntu)
        install_ubuntu
    ;;
    centos)
        install_centos
    ;;
    *)
        echo "Unable to determine OS..."
        exit 1
    ;;
 esac
--- a/.ci/docker/linter-cuda/Dockerfile
+++ b/.ci/docker/linter-cuda/Dockerfile
@ -1,44 +0,0 @@
 ARG UBUNTU_VERSION
 FROM ubuntu:${UBUNTU_VERSION}
 ARG UBUNTU_VERSION
 ENV DEBIAN_FRONTEND noninteractive
 # Install common dependencies (so that this step can be cached separately)
 COPY ./common/install_base.sh install_base.sh
 RUN bash ./install_base.sh && rm install_base.sh
 # Install missing libomp-dev
 RUN apt-get update && apt-get install -y --no-install-recommends libomp-dev && apt-get autoclean && apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 # Install user
 COPY ./common/install_user.sh install_user.sh
 RUN bash ./install_user.sh && rm install_user.sh
 # Install conda and other packages (e.g., numpy, pytest)
 ARG ANACONDA_PYTHON_VERSION
 ARG CONDA_CMAKE
 ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
 ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
 COPY requirements-ci.txt /opt/conda/requirements-ci.txt
 COPY ./common/install_conda.sh install_conda.sh
 COPY ./common/common_utils.sh common_utils.sh
 RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
 # Install cuda and cudnn
 ARG CUDA_VERSION
 RUN wget -q https://raw.githubusercontent.com/pytorch/builder/main/common/install_cuda.sh -O install_cuda.sh
 RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh
 ENV DESIRED_CUDA ${CUDA_VERSION}
 ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
 # Note that Docker build forbids copying file outside the build context
 COPY ./common/install_linter.sh install_linter.sh
 COPY ./common/common_utils.sh common_utils.sh
 RUN bash ./install_linter.sh
 RUN rm install_linter.sh common_utils.sh
 USER jenkins
 CMD ["bash"]
--- a/.ci/docker/linter/Dockerfile
+++ b/.ci/docker/linter/Dockerfile
@ -1,34 +0,0 @@
 ARG UBUNTU_VERSION
 FROM ubuntu:${UBUNTU_VERSION}
 ARG UBUNTU_VERSION
 ENV DEBIAN_FRONTEND noninteractive
 # Install common dependencies (so that this step can be cached separately)
 COPY ./common/install_base.sh install_base.sh
 RUN bash ./install_base.sh && rm install_base.sh
 # Install user
 COPY ./common/install_user.sh install_user.sh
 RUN bash ./install_user.sh && rm install_user.sh
 # Install conda and other packages (e.g., numpy, pytest)
 ARG ANACONDA_PYTHON_VERSION
 ARG CONDA_CMAKE
 ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
 ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
 COPY requirements-ci.txt /opt/conda/requirements-ci.txt
 COPY ./common/install_conda.sh install_conda.sh
 COPY ./common/common_utils.sh common_utils.sh
 RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
 # Note that Docker build forbids copying file outside the build context
 COPY ./common/install_linter.sh install_linter.sh
 COPY ./common/common_utils.sh common_utils.sh
 RUN bash ./install_linter.sh
 RUN rm install_linter.sh common_utils.sh
 USER jenkins
 CMD ["bash"]
--- a/.ci/docker/requirements-ci.txt
+++ b/.ci/docker/requirements-ci.txt
@ -1,313 +0,0 @@
 # Python dependencies required for unit tests
 #awscli==1.6 #this breaks some platforms
 #Description: AWS command line interface
 #Pinned versions: 1.6
 #test that import:
 boto3==1.19.12
 #Description: AWS SDK for python
 #Pinned versions: 1.19.12, 1.16.34
 #test that import:
 click
 #Description: Command Line Interface Creation Kit
 #Pinned versions:
 #test that import:
 coremltools==5.0b5 ; python_version < "3.12"
 #Description: Apple framework for ML integration
 #Pinned versions: 5.0b5
 #test that import:
 #dataclasses #this breaks some platforms
 #Description: Provides decorators for auto adding special methods to user classes
 #Pinned versions:
 #test that import:
 dill==0.3.7
 #Description: dill extends pickle with serializing and de-serializing for most built-ins
 #Pinned versions: 0.3.7
 #test that import: dynamo/test_replay_record.py test_dataloader.py test_datapipe.py test_serialization.py
 expecttest==0.1.6
 #Description: method for writing tests where test framework auto populates
 # the expected output based on previous runs
 #Pinned versions: 0.1.6
 #test that import:
 flatbuffers==2.0
 #Description: cross platform serialization library
 #Pinned versions: 2.0
 #test that import:
 hypothesis==5.35.1
 # Pin hypothesis to avoid flakiness: https://github.com/pytorch/pytorch/issues/31136
 #Description: advanced library for generating parametrized tests
 #Pinned versions: 3.44.6, 4.53.2
 #test that import: test_xnnpack_integration.py, test_pruning_op.py, test_nn.py
 junitparser==2.1.1
 #Description: unitparser handles JUnit/xUnit Result XML files
 #Pinned versions: 2.1.1
 #test that import:
 lark==0.12.0
 #Description: parser
 #Pinned versions: 0.12.0
 #test that import:
 librosa>=0.6.2 ; python_version < "3.11"
 #Description: A python package for music and audio analysis
 #Pinned versions: >=0.6.2
 #test that import: test_spectral_ops.py
 #mkl #this breaks linux-bionic-rocm4.5-py3.7
 #Description: Intel oneAPI Math Kernel Library
 #Pinned versions:
 #test that import: test_profiler.py, test_public_bindings.py, test_testing.py,
 #test_nn.py, test_mkldnn.py, test_jit.py, test_fx_experimental.py,
 #test_autograd.py
 #mkl-devel
 # see mkl
 #mock
 #Description: A testing library that allows you to replace parts of your
 #system under test with mock objects
 #Pinned versions:
 #test that import: test_modules.py, test_nn.py,
 #test_testing.py
 #MonkeyType # breaks pytorch-xla-linux-bionic-py3.7-clang8
 #Description: collects runtime types of function arguments and return
 #values, and can automatically generate stub files
 #Pinned versions:
 #test that import:
 mypy==1.8.0
 # Pin MyPy version because new errors are likely to appear with each release
 #Description: linter
 #Pinned versions: 1.8.0
 #test that import: test_typing.py, test_type_hints.py
 networkx==2.8.8
 #Description: creation, manipulation, and study of
 #the structure, dynamics, and functions of complex networks
 #Pinned versions: 2.8.8
 #test that import: functorch
 #ninja
 #Description: build system.  Note that it install from
 #here breaks things so it is commented out
 #Pinned versions: 1.10.0.post1
 #test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py
 numba==0.49.0 ; python_version < "3.9"
 numba==0.54.1 ; python_version == "3.9"
 numba==0.55.2 ; python_version == "3.10"
 #Description: Just-In-Time Compiler for Numerical Functions
 #Pinned versions: 0.54.1, 0.49.0, <=0.49.1
 #test that import: test_numba_integration.py
 #For numba issue see https://github.com/pytorch/pytorch/issues/51511
 #numpy
 #Description: Provides N-dimensional arrays and linear algebra
 #Pinned versions: 1.20
 #test that import: test_view_ops.py, test_unary_ufuncs.py, test_type_promotion.py,
 #test_type_info.py, test_torch.py, test_tensorexpr_pybind.py, test_tensorexpr.py,
 #test_tensorboard.py, test_tensor_creation_ops.py, test_static_runtime.py,
 #test_spectral_ops.py, test_sort_and_select.py, test_shape_ops.py,
 #test_segment_reductions.py, test_reductions.py, test_pruning_op.py,
 #test_overrides.py, test_numpy_interop.py, test_numba_integration.py
 #test_nn.py, test_namedtensor.py, test_linalg.py, test_jit_cuda_fuser.py,
 #test_jit.py, test_indexing.py, test_datapipe.py, test_dataloader.py,
 #test_binary_ufuncs.py
 #onnxruntime
 #Description: scoring engine for Open Neural Network Exchange (ONNX) models
 #Pinned versions: 1.9.0
 #test that import:
 opt-einsum==3.3
 #Description: Python library to optimize tensor contraction order, used in einsum
 #Pinned versions: 3.3
 #test that import: test_linalg.py
 optree==0.9.1
 #Description: A library for tree manipulation
 #Pinned versions: 0.9.1
 #test that import: test_vmap.py, test_aotdispatch.py, test_dynamic_shapes.py,
 #test_pytree.py, test_ops.py, test_control_flow.py, test_modules.py,
 #common_utils.py, test_eager_transforms.py, test_python_dispatch.py,
 #test_expanded_weights.py, test_decomp.py, test_overrides.py, test_masked.py,
 #test_ops.py, test_prims.py, test_subclass.py, test_functionalization.py,
 #test_schema_check.py, test_profiler_tree.py, test_meta.py, test_torchxla_num_output.py,
 #test_utils.py, test_proxy_tensor.py, test_memory_profiler.py, test_view_ops.py,
 #test_pointwise_ops.py, test_dtensor_ops.py, test_torchinductor.py, test_fx.py,
 #test_fake_tensor.py, test_mps.py
 pillow==10.2.0
 #Description:  Python Imaging Library fork
 #Pinned versions: 10.2.0
 #test that import:
 protobuf==3.20.2
 #Description:  Google’s data interchange format
 #Pinned versions: 3.20.1
 #test that import: test_tensorboard.py
 psutil
 #Description: information on running processes and system utilization
 #Pinned versions:
 #test that import: test_profiler.py, test_openmp.py, test_dataloader.py
 pytest==7.3.2
 #Description: testing framework
 #Pinned versions:
 #test that import: test_typing.py, test_cpp_extensions_aot.py, run_test.py
 pytest-xdist==3.3.1
 #Description: plugin for running pytest in parallel
 #Pinned versions:
 #test that import:
 pytest-flakefinder==1.1.0
 #Description: plugin for rerunning tests a fixed number of times in pytest
 #Pinned versions: 1.1.0
 #test that import:
 pytest-rerunfailures>=10.3
 #Description: plugin for rerunning failure tests in pytest
 #Pinned versions:
 #test that import:
 #pytest-benchmark
 #Description: fixture for benchmarking code
 #Pinned versions: 3.2.3
 #test that import:
 #pytest-sugar
 #Description: shows failures and errors instantly
 #Pinned versions:
 #test that import:
 xdoctest==1.1.0
 #Description: runs doctests in pytest
 #Pinned versions: 1.1.0
 #test that import:
 pygments==2.15.0
 #Description: support doctest highlighting
 #Pinned versions: 2.12.0
 #test that import: the doctests
 #PyYAML
 #Description: data serialization format
 #Pinned versions:
 #test that import:
 #requests
 #Description: HTTP library
 #Pinned versions:
 #test that import: test_type_promotion.py
 #rich
 #Description: rich text and beautiful formatting in the terminal
 #Pinned versions: 10.9.0
 #test that import:
 scikit-image==0.19.3 ; python_version < "3.10"
 scikit-image==0.20.0 ; python_version >= "3.10"
 #Description: image processing routines
 #Pinned versions:
 #test that import: test_nn.py
 #scikit-learn
 #Description: machine learning package
 #Pinned versions: 0.20.3
 #test that import:
 scipy==1.6.3 ; python_version < "3.10"
 scipy==1.8.1 ; python_version == "3.10"
 scipy==1.10.1 ; python_version == "3.11"
 # Pin SciPy because of failing distribution tests (see #60347)
 #Description: scientific python
 #Pinned versions: 1.6.3
 #test that import: test_unary_ufuncs.py, test_torch.py,test_tensor_creation_ops.py
 #test_spectral_ops.py, test_sparse_csr.py, test_reductions.py,test_nn.py
 #test_linalg.py, test_binary_ufuncs.py
 #tabulate
 #Description: Pretty-print tabular data
 #Pinned versions:
 #test that import:
 tb-nightly==2.13.0a20230426
 #Description: TensorBoard
 #Pinned versions:
 #test that import:
 # needed by torchgen utils
 typing-extensions
 #Description: type hints for python
 #Pinned versions:
 #test that import:
 #virtualenv
 #Description: virtual environment for python
 #Pinned versions:
 #test that import:
 unittest-xml-reporting<=3.2.0,>=2.0.0
 #Description: saves unit test results to xml
 #Pinned versions:
 #test that import:
 #wheel not found on aarch64, and source build requires rust
 lintrunner==0.10.7 ; platform_machine == "x86_64"
 #Description: all about linters!
 #Pinned versions: 0.10.7
 #test that import:
 rockset==1.0.3
 #Description: queries Rockset
 #Pinned versions: 1.0.3
 #test that import:
 ghstack==0.8.0
 #Description: ghstack tool
 #Pinned versions: 0.8.0
 #test that import:
 jinja2==3.1.3
 #Description: jinja2 template engine
 #Pinned versions: 3.1.3
 #test that import:
 pytest-cpp==2.3.0
 #Description: This is used by pytest to invoke C++ tests
 #Pinned versions: 2.3.0
 #test that import:
 z3-solver==4.12.2.0
 #Description: The Z3 Theorem Prover Project
 #Pinned versions:
 #test that import:
 tensorboard==2.13.0
 #Description: Also included in .ci/docker/requirements-docs.txt
 #Pinned versions:
 #test that import: test_tensorboard
 pywavelets==1.4.1 ; python_version < "3.12"
 pywavelets==1.5.0 ; python_version >= "3.12"
 #Description: This is a requirement of scikit-image, we need to pin
 # it here because 1.5.0 conflicts with numpy 1.21.2 used in CI
 #Pinned versions: 1.4.1
 #test that import:
 lxml==5.0.0.
 #Description: This is a requirement of unittest-xml-reporting
 # Python-3.9 binaries
--- a/.ci/docker/requirements-docs.txt
+++ b/.ci/docker/requirements-docs.txt
@ -1,49 +0,0 @@
 sphinx==5.3.0
 #Description: This is used to generate PyTorch docs
 #Pinned versions: 5.3.0
 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
 # TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
 # but it doesn't seem to work and hangs around idly. The initial thought is probably
 # something related to Docker setup. We can investigate this later
 sphinxcontrib.katex==0.8.6
 #Description: This is used to generate PyTorch docs
 #Pinned versions: 0.8.6
 matplotlib==3.5.3
 #Description: This is used to generate PyTorch docs
 #Pinned versions: 3.5.3
 tensorboard==2.13.0
 #Description: This is used to generate PyTorch docs
 #Pinned versions: 2.13.0
 breathe==4.34.0
 #Description: This is used to generate PyTorch C++ docs
 #Pinned versions: 4.34.0
 exhale==0.2.3
 #Description: This is used to generate PyTorch C++ docs
 #Pinned versions: 0.2.3
 docutils==0.16
 #Description: This is used to generate PyTorch C++ docs
 #Pinned versions: 0.16
 bs4==0.0.1
 #Description: This is used to generate PyTorch C++ docs
 #Pinned versions: 0.0.1
 IPython==8.12.0
 #Description: This is used to generate PyTorch functorch docs
 #Pinned versions: 8.12.0
 myst-nb==0.17.2
 #Description: This is used to generate PyTorch functorch docs
 #Pinned versions: 0.13.2
 # The following are required to build torch.distributed.elastic.rendezvous.etcd* docs
 python-etcd==0.4.5
 sphinx-copybutton==0.5.0
 sphinx-panels==0.4.1
 myst-parser==0.18.1
--- a/.ci/docker/triton_version.txt
+++ b/.ci/docker/triton_version.txt
@ -1 +0,0 @@
 3.0.0
--- a/.ci/docker/ubuntu-cuda/Dockerfile
+++ b/.ci/docker/ubuntu-cuda/Dockerfile
@ -1,157 +0,0 @@
 ARG UBUNTU_VERSION
 ARG CUDA_VERSION
 ARG IMAGE_NAME
 FROM ${IMAGE_NAME}
 ARG UBUNTU_VERSION
 ARG CUDA_VERSION
 ENV DEBIAN_FRONTEND noninteractive
 # Install common dependencies (so that this step can be cached separately)
 COPY ./common/install_base.sh install_base.sh
 RUN bash ./install_base.sh && rm install_base.sh
 # Install user
 COPY ./common/install_user.sh install_user.sh
 RUN bash ./install_user.sh && rm install_user.sh
 # Install katex
 ARG KATEX
 COPY ./common/install_docs_reqs.sh install_docs_reqs.sh
 RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh
 # Install conda and other packages (e.g., numpy, pytest)
 ARG ANACONDA_PYTHON_VERSION
 ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
 ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
 ARG CONDA_CMAKE
 COPY requirements-ci.txt /opt/conda/requirements-ci.txt
 COPY ./common/install_conda.sh install_conda.sh
 COPY ./common/common_utils.sh common_utils.sh
 RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
 # Install gcc
 ARG GCC_VERSION
 COPY ./common/install_gcc.sh install_gcc.sh
 RUN bash ./install_gcc.sh && rm install_gcc.sh
 # Install clang
 ARG CLANG_VERSION
 COPY ./common/install_clang.sh install_clang.sh
 RUN bash ./install_clang.sh && rm install_clang.sh
 # (optional) Install protobuf for ONNX
 ARG PROTOBUF
 COPY ./common/install_protobuf.sh install_protobuf.sh
 RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
 RUN rm install_protobuf.sh
 ENV INSTALLED_PROTOBUF ${PROTOBUF}
 # (optional) Install database packages like LMDB and LevelDB
 ARG DB
 COPY ./common/install_db.sh install_db.sh
 RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
 RUN rm install_db.sh
 ENV INSTALLED_DB ${DB}
 # (optional) Install vision packages like OpenCV and ffmpeg
 ARG VISION
 COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
 RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
 RUN rm install_vision.sh cache_vision_models.sh common_utils.sh
 ENV INSTALLED_VISION ${VISION}
 # (optional) Install UCC
 ARG UCX_COMMIT
 ARG UCC_COMMIT
 ENV UCX_COMMIT $UCX_COMMIT
 ENV UCC_COMMIT $UCC_COMMIT
 ENV UCX_HOME /usr
 ENV UCC_HOME /usr
 ADD ./common/install_ucc.sh install_ucc.sh
 RUN if [ -n "${UCX_COMMIT}" ] && [ -n "${UCC_COMMIT}" ]; then bash ./install_ucc.sh; fi
 RUN rm install_ucc.sh
 COPY ./common/install_openssl.sh install_openssl.sh
 ENV OPENSSL_ROOT_DIR /opt/openssl
 RUN bash ./install_openssl.sh
 ENV OPENSSL_DIR /opt/openssl
 ARG INDUCTOR_BENCHMARKS
 COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
 COPY ./common/common_utils.sh common_utils.sh
 COPY ci_commit_pins/huggingface.txt huggingface.txt
 COPY ci_commit_pins/timm.txt timm.txt
 RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
 RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt
 # (optional) Install non-default CMake version
 ARG CMAKE_VERSION
 COPY ./common/install_cmake.sh install_cmake.sh
 RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
 RUN rm install_cmake.sh
 ARG TRITON
 # Install triton, this needs to be done before sccache because the latter will
 # try to reach out to S3, which docker build runners don't have access
 COPY ./common/install_triton.sh install_triton.sh
 COPY ./common/common_utils.sh common_utils.sh
 COPY ci_commit_pins/triton.txt triton.txt
 COPY triton_version.txt triton_version.txt
 RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
 RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt
 # Install ccache/sccache (do this last, so we get priority in PATH)
 COPY ./common/install_cache.sh install_cache.sh
 ENV PATH /opt/cache/bin:$PATH
 # See https://github.com/pytorch/pytorch/issues/82174
 # TODO(sdym@fb.com):
 # check if this is needed after full off Xenial migration
 ENV CARGO_NET_GIT_FETCH_WITH_CLI true
 RUN bash ./install_cache.sh && rm install_cache.sh
 ENV CMAKE_CUDA_COMPILER_LAUNCHER=/opt/cache/bin/sccache
 # Add jni.h for java host build
 COPY ./common/install_jni.sh install_jni.sh
 COPY ./java/jni.h jni.h
 RUN bash ./install_jni.sh && rm install_jni.sh
 # Install Open MPI for CUDA
 COPY ./common/install_openmpi.sh install_openmpi.sh
 RUN if [ -n "${CUDA_VERSION}" ]; then bash install_openmpi.sh; fi
 RUN rm install_openmpi.sh
 # Include BUILD_ENVIRONMENT environment variable in image
 ARG BUILD_ENVIRONMENT
 ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
 # AWS specific CUDA build guidance
 ENV TORCH_CUDA_ARCH_LIST Maxwell
 ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all"
 ENV CUDA_PATH /usr/local/cuda
 # Install LLVM dev version (Defined in the pytorch/builder github repository)
 COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
 # Install CUDNN
 ARG CUDNN_VERSION
 ARG CUDA_VERSION
 COPY ./common/install_cudnn.sh install_cudnn.sh
 RUN if [ "${CUDNN_VERSION}" -eq 8 ]; then bash install_cudnn.sh; fi
 RUN rm install_cudnn.sh
 # Install CUSPARSELT
 ARG CUDA_VERSION
 COPY ./common/install_cusparselt.sh install_cusparselt.sh
 RUN bash install_cusparselt.sh
 RUN rm install_cusparselt.sh
 # Delete /usr/local/cuda-11.X/cuda-11.X symlinks
 RUN if [ -h /usr/local/cuda-11.6/cuda-11.6 ]; then rm /usr/local/cuda-11.6/cuda-11.6; fi
 RUN if [ -h /usr/local/cuda-11.7/cuda-11.7 ]; then rm /usr/local/cuda-11.7/cuda-11.7; fi
 RUN if [ -h /usr/local/cuda-12.1/cuda-12.1 ]; then rm /usr/local/cuda-12.1/cuda-12.1; fi
 USER jenkins
 CMD ["bash"]
--- a/.ci/docker/ubuntu-rocm/Dockerfile
+++ b/.ci/docker/ubuntu-rocm/Dockerfile
@ -1,113 +0,0 @@
 ARG UBUNTU_VERSION
 FROM ubuntu:${UBUNTU_VERSION}
 ARG UBUNTU_VERSION
 ENV DEBIAN_FRONTEND noninteractive
 # Set AMD gpu targets to build for
 ARG PYTORCH_ROCM_ARCH
 ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
 # Install common dependencies (so that this step can be cached separately)
 COPY ./common/install_base.sh install_base.sh
 RUN bash ./install_base.sh && rm install_base.sh
 # Install clang
 ARG LLVMDEV
 ARG CLANG_VERSION
 COPY ./common/install_clang.sh install_clang.sh
 RUN bash ./install_clang.sh && rm install_clang.sh
 # Install user
 COPY ./common/install_user.sh install_user.sh
 RUN bash ./install_user.sh && rm install_user.sh
 # Install conda and other packages (e.g., numpy, pytest)
 ARG ANACONDA_PYTHON_VERSION
 ARG CONDA_CMAKE
 ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
 ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
 COPY requirements-ci.txt /opt/conda/requirements-ci.txt
 COPY ./common/install_conda.sh install_conda.sh
 COPY ./common/common_utils.sh common_utils.sh
 RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
 # Install gcc
 ARG GCC_VERSION
 COPY ./common/install_gcc.sh install_gcc.sh
 RUN bash ./install_gcc.sh && rm install_gcc.sh
 # (optional) Install protobuf for ONNX
 ARG PROTOBUF
 COPY ./common/install_protobuf.sh install_protobuf.sh
 RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
 RUN rm install_protobuf.sh
 ENV INSTALLED_PROTOBUF ${PROTOBUF}
 # (optional) Install database packages like LMDB and LevelDB
 ARG DB
 COPY ./common/install_db.sh install_db.sh
 RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
 RUN rm install_db.sh
 ENV INSTALLED_DB ${DB}
 # (optional) Install vision packages like OpenCV and ffmpeg
 ARG VISION
 COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
 RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
 RUN rm install_vision.sh cache_vision_models.sh common_utils.sh
 ENV INSTALLED_VISION ${VISION}
 # Install rocm
 ARG ROCM_VERSION
 COPY ./common/install_rocm.sh install_rocm.sh
 RUN bash ./install_rocm.sh
 RUN rm install_rocm.sh
 COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
 RUN bash ./install_rocm_magma.sh
 RUN rm install_rocm_magma.sh
 ENV ROCM_PATH /opt/rocm
 ENV PATH /opt/rocm/bin:$PATH
 ENV PATH /opt/rocm/hcc/bin:$PATH
 ENV PATH /opt/rocm/hip/bin:$PATH
 ENV PATH /opt/rocm/opencl/bin:$PATH
 ENV PATH /opt/rocm/llvm/bin:$PATH
 ENV MAGMA_HOME /opt/rocm/magma
 ENV LANG C.UTF-8
 ENV LC_ALL C.UTF-8
 # (optional) Install non-default CMake version
 ARG CMAKE_VERSION
 COPY ./common/install_cmake.sh install_cmake.sh
 RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
 RUN rm install_cmake.sh
 # (optional) Install non-default Ninja version
 ARG NINJA_VERSION
 COPY ./common/install_ninja.sh install_ninja.sh
 RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
 RUN rm install_ninja.sh
 ARG TRITON
 # Install triton, this needs to be done before sccache because the latter will
 # try to reach out to S3, which docker build runners don't have access
 COPY ./common/install_triton.sh install_triton.sh
 COPY ./common/common_utils.sh common_utils.sh
 COPY ci_commit_pins/triton-rocm.txt triton-rocm.txt
 COPY triton_version.txt triton_version.txt
 RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
 RUN rm install_triton.sh common_utils.sh triton-rocm.txt triton_version.txt
 # Install ccache/sccache (do this last, so we get priority in PATH)
 COPY ./common/install_cache.sh install_cache.sh
 ENV PATH /opt/cache/bin:$PATH
 RUN bash ./install_cache.sh && rm install_cache.sh
 # Include BUILD_ENVIRONMENT environment variable in image
 ARG BUILD_ENVIRONMENT
 ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
 USER jenkins
 CMD ["bash"]
--- a/.ci/docker/ubuntu-xpu/Dockerfile
+++ b/.ci/docker/ubuntu-xpu/Dockerfile
@ -1,118 +0,0 @@
 ARG UBUNTU_VERSION
 FROM ubuntu:${UBUNTU_VERSION}
 ARG UBUNTU_VERSION
 ENV DEBIAN_FRONTEND noninteractive
 ARG CLANG_VERSION
 # Install common dependencies (so that this step can be cached separately)
 COPY ./common/install_base.sh install_base.sh
 RUN bash ./install_base.sh && rm install_base.sh
 # Install clang
 ARG LLVMDEV
 COPY ./common/install_clang.sh install_clang.sh
 RUN bash ./install_clang.sh && rm install_clang.sh
 # Install user
 COPY ./common/install_user.sh install_user.sh
 RUN bash ./install_user.sh && rm install_user.sh
 # Install katex
 ARG KATEX
 COPY ./common/install_docs_reqs.sh install_docs_reqs.sh
 RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh
 # Install conda and other packages (e.g., numpy, pytest)
 ARG ANACONDA_PYTHON_VERSION
 ARG CONDA_CMAKE
 ARG DOCS
 ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
 ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
 ENV DOCS=$DOCS
 COPY requirements-ci.txt requirements-docs.txt /opt/conda/
 COPY ./common/install_conda.sh install_conda.sh
 COPY ./common/common_utils.sh common_utils.sh
 RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt /opt/conda/requirements-docs.txt
 # Install gcc
 ARG GCC_VERSION
 COPY ./common/install_gcc.sh install_gcc.sh
 RUN bash ./install_gcc.sh && rm install_gcc.sh
 # Install lcov for C++ code coverage
 COPY ./common/install_lcov.sh install_lcov.sh
 RUN  bash ./install_lcov.sh && rm install_lcov.sh
 COPY ./common/install_openssl.sh install_openssl.sh
 RUN bash ./install_openssl.sh
 ENV OPENSSL_ROOT_DIR /opt/openssl
 ENV OPENSSL_DIR /opt/openssl
 RUN rm install_openssl.sh
 ARG INDUCTOR_BENCHMARKS
 COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
 COPY ./common/common_utils.sh common_utils.sh
 COPY ci_commit_pins/huggingface.txt huggingface.txt
 COPY ci_commit_pins/timm.txt timm.txt
 RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
 RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt
 ARG TRITON
 # Install triton, this needs to be done before sccache because the latter will
 # try to reach out to S3, which docker build runners don't have access
 COPY ./common/install_triton.sh install_triton.sh
 COPY ./common/common_utils.sh common_utils.sh
 # TODO: will add triton xpu commit
 COPY ci_commit_pins/triton.txt triton.txt
 RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
 RUN rm install_triton.sh common_utils.sh triton.txt
 # (optional) Install database packages like LMDB and LevelDB
 ARG DB
 COPY ./common/install_db.sh install_db.sh
 RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
 RUN rm install_db.sh
 ENV INSTALLED_DB ${DB}
 # (optional) Install vision packages like OpenCV and ffmpeg
 ARG VISION
 COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
 RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
 RUN rm install_vision.sh cache_vision_models.sh common_utils.sh
 ENV INSTALLED_VISION ${VISION}
 # Install XPU Dependencies
 ARG BASEKIT_VERSION
 COPY ./common/install_xpu.sh install_xpu.sh
 RUN bash ./install_xpu.sh && rm install_xpu.sh
 # (optional) Install non-default CMake version
 ARG CMAKE_VERSION
 COPY ./common/install_cmake.sh install_cmake.sh
 RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
 RUN rm install_cmake.sh
 # (optional) Install non-default Ninja version
 ARG NINJA_VERSION
 COPY ./common/install_ninja.sh install_ninja.sh
 RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
 RUN rm install_ninja.sh
 # Install ccache/sccache (do this last, so we get priority in PATH)
 COPY ./common/install_cache.sh install_cache.sh
 ENV PATH /opt/cache/bin:$PATH
 RUN bash ./install_cache.sh && rm install_cache.sh
 # Include BUILD_ENVIRONMENT environment variable in image
 ARG BUILD_ENVIRONMENT
 ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
 # Install LLVM dev version (Defined in the pytorch/builder github repository)
 COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
 USER jenkins
 CMD ["bash"]
--- a/.ci/docker/ubuntu/Dockerfile
+++ b/.ci/docker/ubuntu/Dockerfile
@ -1,199 +0,0 @@
 ARG UBUNTU_VERSION
 FROM ubuntu:${UBUNTU_VERSION}
 ARG UBUNTU_VERSION
 ENV DEBIAN_FRONTEND noninteractive
 ARG CLANG_VERSION
 # Install common dependencies (so that this step can be cached separately)
 COPY ./common/install_base.sh install_base.sh
 RUN bash ./install_base.sh && rm install_base.sh
 # Install clang
 ARG LLVMDEV
 COPY ./common/install_clang.sh install_clang.sh
 RUN bash ./install_clang.sh && rm install_clang.sh
 # Install user
 COPY ./common/install_user.sh install_user.sh
 RUN bash ./install_user.sh && rm install_user.sh
 # Install katex
 ARG KATEX
 COPY ./common/install_docs_reqs.sh install_docs_reqs.sh
 RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh
 # Install conda and other packages (e.g., numpy, pytest)
 ARG ANACONDA_PYTHON_VERSION
 ARG CONDA_CMAKE
 ARG DOCS
 ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
 ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
 ENV DOCS=$DOCS
 COPY requirements-ci.txt requirements-docs.txt /opt/conda/
 COPY ./common/install_conda.sh install_conda.sh
 COPY ./common/common_utils.sh common_utils.sh
 RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt /opt/conda/requirements-docs.txt
 RUN if [ -n "${UNINSTALL_DILL}" ]; then pip uninstall -y dill; fi
 # Install gcc
 ARG GCC_VERSION
 COPY ./common/install_gcc.sh install_gcc.sh
 RUN bash ./install_gcc.sh && rm install_gcc.sh
 # Install lcov for C++ code coverage
 COPY ./common/install_lcov.sh install_lcov.sh
 RUN  bash ./install_lcov.sh && rm install_lcov.sh
 # Install cuda and cudnn
 ARG CUDA_VERSION
 RUN wget -q https://raw.githubusercontent.com/pytorch/builder/main/common/install_cuda.sh -O install_cuda.sh
 RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh
 ENV DESIRED_CUDA ${CUDA_VERSION}
 ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
 # (optional) Install UCC
 ARG UCX_COMMIT
 ARG UCC_COMMIT
 ENV UCX_COMMIT $UCX_COMMIT
 ENV UCC_COMMIT $UCC_COMMIT
 ENV UCX_HOME /usr
 ENV UCC_HOME /usr
 ADD ./common/install_ucc.sh install_ucc.sh
 RUN if [ -n "${UCX_COMMIT}" ] && [ -n "${UCC_COMMIT}" ]; then bash ./install_ucc.sh; fi
 RUN rm install_ucc.sh
 # (optional) Install protobuf for ONNX
 ARG PROTOBUF
 COPY ./common/install_protobuf.sh install_protobuf.sh
 RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
 RUN rm install_protobuf.sh
 ENV INSTALLED_PROTOBUF ${PROTOBUF}
 # (optional) Install database packages like LMDB and LevelDB
 ARG DB
 COPY ./common/install_db.sh install_db.sh
 RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
 RUN rm install_db.sh
 ENV INSTALLED_DB ${DB}
 # (optional) Install vision packages like OpenCV and ffmpeg
 ARG VISION
 COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
 RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
 RUN rm install_vision.sh cache_vision_models.sh common_utils.sh
 ENV INSTALLED_VISION ${VISION}
 # (optional) Install Android NDK
 ARG ANDROID
 ARG ANDROID_NDK
 ARG GRADLE_VERSION
 COPY ./common/install_android.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
 COPY ./android/AndroidManifest.xml AndroidManifest.xml
 COPY ./android/build.gradle build.gradle
 RUN if [ -n "${ANDROID}" ]; then bash ./install_android.sh; fi
 RUN rm install_android.sh cache_vision_models.sh common_utils.sh
 RUN rm AndroidManifest.xml
 RUN rm build.gradle
 ENV INSTALLED_ANDROID ${ANDROID}
 # (optional) Install Vulkan SDK
 ARG VULKAN_SDK_VERSION
 COPY ./common/install_vulkan_sdk.sh install_vulkan_sdk.sh
 RUN if [ -n "${VULKAN_SDK_VERSION}" ]; then bash ./install_vulkan_sdk.sh; fi
 RUN rm install_vulkan_sdk.sh
 # (optional) Install swiftshader
 ARG SWIFTSHADER
 COPY ./common/install_swiftshader.sh install_swiftshader.sh
 RUN if [ -n "${SWIFTSHADER}" ]; then bash ./install_swiftshader.sh; fi
 RUN rm install_swiftshader.sh
 # (optional) Install non-default CMake version
 ARG CMAKE_VERSION
 COPY ./common/install_cmake.sh install_cmake.sh
 RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
 RUN rm install_cmake.sh
 # (optional) Install non-default Ninja version
 ARG NINJA_VERSION
 COPY ./common/install_ninja.sh install_ninja.sh
 RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
 RUN rm install_ninja.sh
 COPY ./common/install_openssl.sh install_openssl.sh
 RUN bash ./install_openssl.sh
 ENV OPENSSL_ROOT_DIR /opt/openssl
 ENV OPENSSL_DIR /opt/openssl
 RUN rm install_openssl.sh
 ARG INDUCTOR_BENCHMARKS
 COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
 COPY ./common/common_utils.sh common_utils.sh
 COPY ci_commit_pins/huggingface.txt huggingface.txt
 COPY ci_commit_pins/timm.txt timm.txt
 RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
 RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt
 ARG TRITON
 # Install triton, this needs to be done before sccache because the latter will
 # try to reach out to S3, which docker build runners don't have access
 COPY ./common/install_triton.sh install_triton.sh
 COPY ./common/common_utils.sh common_utils.sh
 COPY ci_commit_pins/triton.txt triton.txt
 RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
 RUN rm install_triton.sh common_utils.sh triton.txt
 ARG EXECUTORCH
 # Build and install executorch
 COPY ./common/install_executorch.sh install_executorch.sh
 COPY ./common/common_utils.sh common_utils.sh
 COPY ci_commit_pins/executorch.txt executorch.txt
 RUN if [ -n "${EXECUTORCH}" ]; then bash ./install_executorch.sh; fi
 RUN rm install_executorch.sh common_utils.sh executorch.txt
 ARG ONNX
 # Install ONNX dependencies
 COPY ./common/install_onnx.sh ./common/common_utils.sh ./
 RUN if [ -n "${ONNX}" ]; then bash ./install_onnx.sh; fi
 RUN rm install_onnx.sh common_utils.sh
 # (optional) Build ACL
 ARG ACL
 COPY ./common/install_acl.sh install_acl.sh
 RUN if [ -n "${ACL}" ]; then bash ./install_acl.sh; fi
 RUN rm install_acl.sh
 ENV INSTALLED_ACL ${ACL}
 # Install ccache/sccache (do this last, so we get priority in PATH)
 COPY ./common/install_cache.sh install_cache.sh
 ENV PATH /opt/cache/bin:$PATH
 RUN bash ./install_cache.sh && rm install_cache.sh
 # Add jni.h for java host build
 COPY ./common/install_jni.sh install_jni.sh
 COPY ./java/jni.h jni.h
 RUN bash ./install_jni.sh && rm install_jni.sh
 # Install Open MPI for CUDA
 COPY ./common/install_openmpi.sh install_openmpi.sh
 RUN if [ -n "${CUDA_VERSION}" ]; then bash install_openmpi.sh; fi
 RUN rm install_openmpi.sh
 # Include BUILD_ENVIRONMENT environment variable in image
 ARG BUILD_ENVIRONMENT
 ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
 # Install LLVM dev version (Defined in the pytorch/builder github repository)
 COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
 # AWS specific CUDA build guidance
 ENV TORCH_CUDA_ARCH_LIST Maxwell
 ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all"
 ENV CUDA_PATH /usr/local/cuda
 USER jenkins
 CMD ["bash"]
--- a/.ci/onnx/README.md
+++ b/.ci/onnx/README.md
@ -1,14 +0,0 @@
 # Jenkins
 The scripts in this directory are the entrypoint for testing ONNX exporter.
 The environment variable `BUILD_ENVIRONMENT` is expected to be set to
 the build environment you intend to test. It is a hint for the build
 and test scripts to configure Caffe2 a certain way and include/exclude
 tests. Docker images, they equal the name of the image itself. For
 example: `py2-cuda9.0-cudnn7-ubuntu16.04`. The Docker images that are
 built on Jenkins and are used in triggered builds already have this
 environment variable set in their manifest. Also see
 `./docker/jenkins/*/Dockerfile` and search for `BUILD_ENVIRONMENT`.
 Our Jenkins installation is located at https://ci.pytorch.org/jenkins/.
--- a/.ci/onnx/common.sh
+++ b/.ci/onnx/common.sh
@ -1,19 +0,0 @@
 set -ex
 LOCAL_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 ROOT_DIR=$(cd "$LOCAL_DIR"/../.. && pwd)
 TEST_DIR="$ROOT_DIR/test"
 pytest_reports_dir="${TEST_DIR}/test-reports/python"
 # Figure out which Python to use
 PYTHON="$(which python)"
 if [[ "${BUILD_ENVIRONMENT}" =~ py((2|3)\.?[0-9]?\.?[0-9]?) ]]; then
  PYTHON=$(which "python${BASH_REMATCH[1]}")
 fi
 if [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
    # HIP_PLATFORM is auto-detected by hipcc; unset to avoid build errors
    unset HIP_PLATFORM
 fi
 mkdir -p "$pytest_reports_dir" || true
--- a/.ci/onnx/test.sh
+++ b/.ci/onnx/test.sh
@ -1,15 +0,0 @@
 #!/bin/bash
 # shellcheck source=./common.sh
 source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
 if [[ "$BUILD_ENVIRONMENT" == *onnx* ]]; then
  # TODO: This can be removed later once vision is also part of the Docker image
  pip install -q --user --no-use-pep517 "git+https://github.com/pytorch/vision.git@$(cat .github/ci_commit_pins/vision.txt)"
  # JIT C++ extensions require ninja, so put it into PATH.
  export PATH="/var/lib/jenkins/.local/bin:$PATH"
  # NB: ONNX test is fast (~15m) so it's ok to retry it few more times to avoid any flaky issue, we
  # need to bring this to the standard PyTorch run_test eventually. The issue will be tracked in
  # https://github.com/pytorch/pytorch/issues/98626
  "$ROOT_DIR/scripts/onnx/test.sh"
 fi
--- a/.ci/pytorch/README.md
+++ b/.ci/pytorch/README.md
@ -1,42 +0,0 @@
 This directory contains scripts for our continuous integration.
 One important thing to keep in mind when reading the scripts here is
 that they are all based off of Docker images, which we build for each of
 the various system configurations we want to run on Jenkins.  This means
 it is very easy to run these tests yourself:
 1. Figure out what Docker image you want.  The general template for our
   images look like:
   ``registry.pytorch.org/pytorch/pytorch-$BUILD_ENVIRONMENT:$DOCKER_VERSION``,
   where ``$BUILD_ENVIRONMENT`` is one of the build environments
   enumerated in
   [pytorch-dockerfiles](https://github.com/pytorch/pytorch/blob/master/.ci/docker/build.sh). The dockerfile used by jenkins can be found under the `.ci` [directory](https://github.com/pytorch/pytorch/blob/master/.ci/docker)
 2. Run ``docker run -it -u jenkins $DOCKER_IMAGE``, clone PyTorch and
   run one of the scripts in this directory.
 The Docker images are designed so that any "reasonable" build commands
 will work; if you look in [build.sh](build.sh) you will see that it is a
 very simple script.  This is intentional.  Idiomatic build instructions
 should work inside all of our Docker images.  You can tweak the commands
 however you need (e.g., in case you want to rebuild with DEBUG, or rerun
 the build with higher verbosity, etc.).
 We have to do some work to make this so.  Here is a summary of the
 mechanisms we use:
 - We install binaries to directories like `/usr/local/bin` which
  are automatically part of your PATH.
 - We add entries to the PATH using Docker ENV variables (so
  they apply when you enter Docker) and `/etc/environment` (so they
  continue to apply even if you sudo), instead of modifying
  `PATH` in our build scripts.
 - We use `/etc/ld.so.conf.d` to register directories containing
  shared libraries, instead of modifying `LD_LIBRARY_PATH` in our
  build scripts.
 - We reroute well known paths like `/usr/bin/gcc` to alternate
  implementations with `update-alternatives`, instead of setting
  `CC` and `CXX` in our implementations.
--- a/.ci/pytorch/build.sh
+++ b/.ci/pytorch/build.sh
@ -1,367 +0,0 @@
 #!/bin/bash
 set -ex
 # Required environment variable: $BUILD_ENVIRONMENT
 # (This is set by default in the Docker images we build, so you don't
 # need to set it yourself.
 # shellcheck source=./common.sh
 source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
 # shellcheck source=./common-build.sh
 source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
 if [[ "$BUILD_ENVIRONMENT" == *-mobile-*build* ]]; then
  exec "$(dirname "${BASH_SOURCE[0]}")/build-mobile.sh" "$@"
 fi
 echo "Python version:"
 python --version
 echo "GCC version:"
 gcc --version
 echo "CMake version:"
 cmake --version
 echo "Environment variables:"
 env
 if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
  # Use jemalloc during compilation to mitigate https://github.com/pytorch/pytorch/issues/116289
  export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libjemalloc.so.2
  echo "NVCC version:"
  nvcc --version
 fi
 if [[ "$BUILD_ENVIRONMENT" == *cuda11* ]]; then
  if [[ "$BUILD_ENVIRONMENT" != *cuda11.3* && "$BUILD_ENVIRONMENT" != *clang* ]]; then
    # TODO: there is a linking issue when building with UCC using clang,
    # disable it for now and to be fix later.
    # TODO: disable UCC temporarily to enable CUDA 12.1 in CI
    export USE_UCC=1
    export USE_SYSTEM_UCC=1
  fi
 fi
 if [[ ${BUILD_ENVIRONMENT} == *"caffe2"* ]]; then
  echo "Caffe2 build is ON"
  export BUILD_CAFFE2=ON
 fi
 if [[ ${BUILD_ENVIRONMENT} == *"paralleltbb"* ]]; then
  export ATEN_THREADING=TBB
  export USE_TBB=1
 elif [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
  export ATEN_THREADING=NATIVE
 fi
 # Enable LLVM dependency for TensorExpr testing
 if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
  export USE_LLVM=/opt/rocm/llvm
  export LLVM_DIR=/opt/rocm/llvm/lib/cmake/llvm
 else
  export USE_LLVM=/opt/llvm
  export LLVM_DIR=/opt/llvm/lib/cmake/llvm
 fi
 if [[ "$BUILD_ENVIRONMENT" == *executorch* ]]; then
  # To build test_edge_op_registration
  export BUILD_EXECUTORCH=ON
  export USE_CUDA=0
 fi
 if ! which conda; then
  # In ROCm CIs, we are doing cross compilation on build machines with
  # intel cpu and later run tests on machines with amd cpu.
  # Also leave out two builds to make sure non-mkldnn builds still work.
  if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
    export USE_MKLDNN=1
  else
    export USE_MKLDNN=0
  fi
 else
  export CMAKE_PREFIX_PATH=/opt/conda
  # Workaround required for MKL library linkage
  # https://github.com/pytorch/pytorch/issues/119557
  if [ "$ANACONDA_PYTHON_VERSION" = "3.12" ]; then
    export CMAKE_LIBRARY_PATH="/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/lib/"
    export CMAKE_INCLUDE_PATH="/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/include/"
  fi
 fi
 if [[ "$BUILD_ENVIRONMENT" == *aarch64* ]]; then
  export USE_MKLDNN=1
  export USE_MKLDNN_ACL=1
  export ACL_ROOT_DIR=/ComputeLibrary
 fi
 if [[ "$BUILD_ENVIRONMENT" == *libtorch* ]]; then
  POSSIBLE_JAVA_HOMES=()
  POSSIBLE_JAVA_HOMES+=(/usr/local)
  POSSIBLE_JAVA_HOMES+=(/usr/lib/jvm/java-8-openjdk-amd64)
  POSSIBLE_JAVA_HOMES+=(/Library/Java/JavaVirtualMachines/*.jdk/Contents/Home)
  # Add the Windows-specific JNI
  POSSIBLE_JAVA_HOMES+=("$PWD/.circleci/windows-jni/")
  for JH in "${POSSIBLE_JAVA_HOMES[@]}" ; do
    if [[ -e "$JH/include/jni.h" ]] ; then
      # Skip if we're not on Windows but haven't found a JAVA_HOME
      if [[ "$JH" == "$PWD/.circleci/windows-jni/" && "$OSTYPE" != "msys" ]] ; then
        break
      fi
      echo "Found jni.h under $JH"
      export JAVA_HOME="$JH"
      export BUILD_JNI=ON
      break
    fi
  done
  if [ -z "$JAVA_HOME" ]; then
    echo "Did not find jni.h"
  fi
 fi
 # Use special scripts for Android builds
 if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
  export ANDROID_NDK=/opt/ndk
  build_args=()
  if [[ "${BUILD_ENVIRONMENT}" == *-arm-v7a* ]]; then
    build_args+=("-DANDROID_ABI=armeabi-v7a")
  elif [[ "${BUILD_ENVIRONMENT}" == *-arm-v8a* ]]; then
    build_args+=("-DANDROID_ABI=arm64-v8a")
  elif [[ "${BUILD_ENVIRONMENT}" == *-x86_32* ]]; then
    build_args+=("-DANDROID_ABI=x86")
  elif [[ "${BUILD_ENVIRONMENT}" == *-x86_64* ]]; then
    build_args+=("-DANDROID_ABI=x86_64")
  fi
  if [[ "${BUILD_ENVIRONMENT}" == *vulkan* ]]; then
    build_args+=("-DUSE_VULKAN=ON")
  fi
  build_args+=("-DUSE_LITE_INTERPRETER_PROFILER=OFF")
  exec ./scripts/build_android.sh "${build_args[@]}" "$@"
 fi
 if [[ "$BUILD_ENVIRONMENT" != *android* && "$BUILD_ENVIRONMENT" == *vulkan* ]]; then
  export USE_VULKAN=1
  # shellcheck disable=SC1091
  source /var/lib/jenkins/vulkansdk/setup-env.sh
 fi
 if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
  # hcc used to run out of memory, silently exiting without stopping
  # the build process, leaving undefined symbols in the shared lib,
  # causing undefined symbol errors when later running tests.
  # We used to set MAX_JOBS to 4 to avoid, but this is no longer an issue.
  if [ -z "$MAX_JOBS" ]; then
    export MAX_JOBS=$(($(nproc) - 1))
  fi
  if [[ -n "$CI" && -z "$PYTORCH_ROCM_ARCH" ]]; then
      # Set ROCM_ARCH to gfx906 for CI builds, if user doesn't override.
      echo "Limiting PYTORCH_ROCM_ARCH to gfx906 for CI builds"
      export PYTORCH_ROCM_ARCH="gfx906"
  fi
  # hipify sources
  python tools/amd_build/build_amd.py
 fi
 if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
  # shellcheck disable=SC1091
  source /opt/intel/oneapi/compiler/latest/env/vars.sh
  export USE_XPU=1
 fi
 # sccache will fail for CUDA builds if all cores are used for compiling
 # gcc 7 with sccache seems to have intermittent OOM issue if all cores are used
 if [ -z "$MAX_JOBS" ]; then
  if { [[ "$BUILD_ENVIRONMENT" == *cuda* ]] || [[ "$BUILD_ENVIRONMENT" == *gcc7* ]]; } && which sccache > /dev/null; then
    export MAX_JOBS=$(($(nproc) - 1))
  fi
 fi
 # TORCH_CUDA_ARCH_LIST must be passed from an environment variable
 if [[ "$BUILD_ENVIRONMENT" == *cuda* && -z "$TORCH_CUDA_ARCH_LIST" ]]; then
  echo "TORCH_CUDA_ARCH_LIST must be defined"
  exit 1
 fi
 # We only build FlashAttention files for CUDA 8.0+, and they require large amounts of
 # memory to build and will OOM
 if [[ "$BUILD_ENVIRONMENT" == *cuda* ]] && [[ "$TORCH_CUDA_ARCH_LIST" == *"8.6"* || "$TORCH_CUDA_ARCH_LIST" == *"8.0"* ]]; then
  echo "WARNING: FlashAttention files require large amounts of memory to build and will OOM"
  echo "Setting MAX_JOBS=(nproc-2)/3 to reduce memory usage"
  export MAX_JOBS="$(( $(nproc --ignore=2) / 3 ))"
 fi
 if [[ "${BUILD_ENVIRONMENT}" == *clang* ]]; then
  export CC=clang
  export CXX=clang++
 fi
 if [[ "$BUILD_ENVIRONMENT" == *-clang*-asan* ]]; then
  export LDSHARED="clang --shared"
  export USE_CUDA=0
  export USE_ASAN=1
  export UBSAN_FLAGS="-fno-sanitize-recover=all;-fno-sanitize=float-divide-by-zero;-fno-sanitize=float-cast-overflow"
  unset USE_LLVM
 fi
 if [[ "${BUILD_ENVIRONMENT}" == *no-ops* ]]; then
  export USE_PER_OPERATOR_HEADERS=0
 fi
 if [[ "${BUILD_ENVIRONMENT}" == *-pch* ]]; then
    export USE_PRECOMPILED_HEADERS=1
 fi
 if [[ "${BUILD_ENVIRONMENT}" == *linux-focal-py3.7-gcc7-build*  ]]; then
  export USE_GLOO_WITH_OPENSSL=ON
 fi
 if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* ]]; then
  export BUILD_STATIC_RUNTIME_BENCHMARK=ON
 fi
 WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace")
 sudo chown -R jenkins /var/lib/jenkins/workspace
 git config --global --add safe.directory /var/lib/jenkins/workspace
 if [[ "$BUILD_ENVIRONMENT" == *-bazel-* ]]; then
  set -e
  get_bazel
  install_sccache_nvcc_for_bazel
  # Leave 1 CPU free and use only up to 80% of memory to reduce the change of crashing
  # the runner
  BAZEL_MEM_LIMIT="--local_ram_resources=HOST_RAM*.8"
  BAZEL_CPU_LIMIT="--local_cpu_resources=HOST_CPUS-1"
  if [[ "$CUDA_VERSION" == "cpu" ]]; then
    # Build torch, the Python module, and tests for CPU-only
    tools/bazel build --config=no-tty "${BAZEL_MEM_LIMIT}" "${BAZEL_CPU_LIMIT}" --config=cpu-only :torch :torch/_C.so :all_tests
  else
    tools/bazel build --config=no-tty "${BAZEL_MEM_LIMIT}" "${BAZEL_CPU_LIMIT}" //...
  fi
 else
  # check that setup.py would fail with bad arguments
  echo "The next three invocations are expected to fail with invalid command error messages."
  ( ! get_exit_code python setup.py bad_argument )
  ( ! get_exit_code python setup.py clean] )
  ( ! get_exit_code python setup.py clean bad_argument )
  if [[ "$BUILD_ENVIRONMENT" != *libtorch* ]]; then
    # rocm builds fail when WERROR=1
    # XLA test build fails when WERROR=1
    # set only when building other architectures
    # or building non-XLA tests.
    if [[ "$BUILD_ENVIRONMENT" != *rocm*  &&
          "$BUILD_ENVIRONMENT" != *xla* ]]; then
      if [[ "$BUILD_ENVIRONMENT" != *py3.8* ]]; then
        # Install numpy-2.0 release candidate for builds
        # Which should be backward compatible with Numpy-1.X
        python -mpip install --pre numpy==2.0.0b1
      fi
      WERROR=1 python setup.py bdist_wheel
    else
      python setup.py bdist_wheel
    fi
    pip_install_whl "$(echo dist/*.whl)"
    # TODO: I'm not sure why, but somehow we lose verbose commands
    set -x
    assert_git_not_dirty
    # Copy ninja build logs to dist folder
    mkdir -p dist
    if [ -f build/.ninja_log ]; then
      cp build/.ninja_log dist
    fi
    if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
      # remove sccache wrappers post-build; runtime compilation of MIOpen kernels does not yet fully support them
      sudo rm -f /opt/cache/bin/cc
      sudo rm -f /opt/cache/bin/c++
      sudo rm -f /opt/cache/bin/gcc
      sudo rm -f /opt/cache/bin/g++
      pushd /opt/rocm/llvm/bin
      if [[ -d original ]]; then
        sudo mv original/clang .
        sudo mv original/clang++ .
      fi
      sudo rm -rf original
      popd
    fi
    CUSTOM_TEST_ARTIFACT_BUILD_DIR=${CUSTOM_TEST_ARTIFACT_BUILD_DIR:-"build/custom_test_artifacts"}
    CUSTOM_TEST_USE_ROCM=$([[ "$BUILD_ENVIRONMENT" == *rocm* ]] && echo "ON" || echo "OFF")
    CUSTOM_TEST_MODULE_PATH="${PWD}/cmake/public"
    mkdir -pv "${CUSTOM_TEST_ARTIFACT_BUILD_DIR}"
    # Build custom operator tests.
    CUSTOM_OP_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-op-build"
    CUSTOM_OP_TEST="$PWD/test/custom_operator"
    python --version
    SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
    mkdir -p "$CUSTOM_OP_BUILD"
    pushd "$CUSTOM_OP_BUILD"
    cmake "$CUSTOM_OP_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)" \
          -DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
    make VERBOSE=1
    popd
    assert_git_not_dirty
    # Build jit hook tests
    JIT_HOOK_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/jit-hook-build"
    JIT_HOOK_TEST="$PWD/test/jit_hooks"
    python --version
    SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
    mkdir -p "$JIT_HOOK_BUILD"
    pushd "$JIT_HOOK_BUILD"
    cmake "$JIT_HOOK_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)" \
          -DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
    make VERBOSE=1
    popd
    assert_git_not_dirty
    # Build custom backend tests.
    CUSTOM_BACKEND_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-backend-build"
    CUSTOM_BACKEND_TEST="$PWD/test/custom_backend"
    python --version
    mkdir -p "$CUSTOM_BACKEND_BUILD"
    pushd "$CUSTOM_BACKEND_BUILD"
    cmake "$CUSTOM_BACKEND_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" -DPYTHON_EXECUTABLE="$(which python)" \
          -DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM"
    make VERBOSE=1
    popd
    assert_git_not_dirty
  else
    # Test no-Python build
    echo "Building libtorch"
    # This is an attempt to mitigate flaky libtorch build OOM error. By default, the build parallelization
    # is set to be the number of CPU minus 2. So, let's try a more conservative value here. A 4xlarge has
    # 16 CPUs
    MAX_JOBS=$(nproc --ignore=4)
    export MAX_JOBS
    # NB: Install outside of source directory (at the same level as the root
    # pytorch folder) so that it doesn't get cleaned away prior to docker push.
    BUILD_LIBTORCH_PY=$PWD/tools/build_libtorch.py
    mkdir -p ../cpp-build/caffe2
    pushd ../cpp-build/caffe2
    WERROR=1 VERBOSE=1 DEBUG=1 python "$BUILD_LIBTORCH_PY"
    popd
  fi
 fi
 if [[ "$BUILD_ENVIRONMENT" != *libtorch* && "$BUILD_ENVIRONMENT" != *bazel* ]]; then
  # export test times so that potential sharded tests that'll branch off this build will use consistent data
  # don't do this for libtorch as libtorch is C++ only and thus won't have python tests run on its build
  python tools/stats/export_test_times.py
 fi
 print_sccache_stats
 sudo chown -R "$WORKSPACE_ORIGINAL_OWNER_ID" /var/lib/jenkins/workspace
--- a/.ci/pytorch/common-build.sh
+++ b/.ci/pytorch/common-build.sh
@ -1,59 +0,0 @@
 #!/bin/bash
 # Required environment variables:
 #   $BUILD_ENVIRONMENT (should be set by your Docker image)
 if [[ "$BUILD_ENVIRONMENT" != *win-* ]]; then
    # Save the absolute path in case later we chdir (as occurs in the gpu perf test)
    script_dir="$( cd "$(dirname "${BASH_SOURCE[0]}")" || exit ; pwd -P )"
    if which sccache > /dev/null; then
        # Save sccache logs to file
        sccache --stop-server > /dev/null  2>&1 || true
        rm -f ~/sccache_error.log || true
        function sccache_epilogue() {
            echo "::group::Sccache Compilation Log"
            echo '=================== sccache compilation log ==================='
            python "$script_dir/print_sccache_log.py" ~/sccache_error.log 2>/dev/null || true
            echo '=========== If your build fails, please take a look at the log above for possible reasons ==========='
            sccache --show-stats
            sccache --stop-server || true
            echo "::endgroup::"
        }
        # Register the function here so that the error log can be printed even when
        # sccache fails to start, i.e. timeout error
        trap_add sccache_epilogue EXIT
        if [[ -n "${SKIP_SCCACHE_INITIALIZATION:-}" ]]; then
            # sccache --start-server seems to hang forever on self hosted runners for GHA
            # so let's just go ahead and skip the --start-server altogether since it seems
            # as though sccache still gets used even when the sscache server isn't started
            # explicitly
            echo "Skipping sccache server initialization, setting environment variables"
            export SCCACHE_IDLE_TIMEOUT=0
            export SCCACHE_ERROR_LOG=~/sccache_error.log
            export RUST_LOG=sccache::server=error
        elif [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
            SCCACHE_ERROR_LOG=~/sccache_error.log SCCACHE_IDLE_TIMEOUT=0 sccache --start-server
        else
            # increasing SCCACHE_IDLE_TIMEOUT so that extension_backend_test.cpp can build after this PR:
            # https://github.com/pytorch/pytorch/pull/16645
            SCCACHE_ERROR_LOG=~/sccache_error.log SCCACHE_IDLE_TIMEOUT=0 RUST_LOG=sccache::server=error sccache --start-server
        fi
        # Report sccache stats for easier debugging. It's ok if this commands
        # timeouts and fails on MacOS
        sccache --zero-stats || true
    fi
    if which ccache > /dev/null; then
        # Report ccache stats for easier debugging
        ccache --zero-stats
        ccache --show-stats
        function ccache_epilogue() {
            ccache --show-stats
        }
        trap_add ccache_epilogue EXIT
    fi
 fi
--- a/.ci/pytorch/common.sh
+++ b/.ci/pytorch/common.sh
@ -1,24 +0,0 @@
 #!/bin/bash
 # Common setup for all Jenkins scripts
 # shellcheck source=./common_utils.sh
 source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
 set -ex
 # Required environment variables:
 #   $BUILD_ENVIRONMENT (should be set by your Docker image)
 # Figure out which Python to use for ROCm
 if [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
  # HIP_PLATFORM is auto-detected by hipcc; unset to avoid build errors
  unset HIP_PLATFORM
  export PYTORCH_TEST_WITH_ROCM=1
  # temporary to locate some kernel issues on the CI nodes
  export HSAKMT_DEBUG_LEVEL=4
  # improve rccl performance for distributed tests
  export HSA_FORCE_FINE_GRAIN_PCIE=1
 fi
 # TODO: Renable libtorch testing for MacOS, see https://github.com/pytorch/pytorch/issues/62598
 # shellcheck disable=SC2034
 BUILD_TEST_LIBTORCH=0
--- a/.ci/pytorch/common_utils.sh
+++ b/.ci/pytorch/common_utils.sh
@ -1,240 +0,0 @@
 #!/bin/bash
 # Common util **functions** that can be sourced in other scripts.
 # note: printf is used instead of echo to avoid backslash
 # processing and to properly handle values that begin with a '-'.
 log() { printf '%s\n' "$*"; }
 error() { log "ERROR: $*" >&2; }
 fatal() { error "$@"; exit 1; }
 retry () {
    "$@" || (sleep 10 && "$@") || (sleep 20 && "$@") || (sleep 40 && "$@")
 }
 # compositional trap taken from https://stackoverflow.com/a/7287873/23845
 # appends a command to a trap
 #
 # - 1st arg:  code to add
 # - remaining args:  names of traps to modify
 #
 trap_add() {
    trap_add_cmd=$1; shift || fatal "${FUNCNAME[0]} usage error"
    for trap_add_name in "$@"; do
        trap -- "$(
            # helper fn to get existing trap command from output
            # of trap -p
            extract_trap_cmd() { printf '%s\n' "$3"; }
            # print existing trap command with newline
            eval "extract_trap_cmd $(trap -p "${trap_add_name}")"
            # print the new trap command
            printf '%s\n' "${trap_add_cmd}"
        )" "${trap_add_name}" \
            || fatal "unable to add to trap ${trap_add_name}"
    done
 }
 # set the trace attribute for the above function.  this is
 # required to modify DEBUG or RETURN traps because functions don't
 # inherit them unless the trace attribute is set
 declare -f -t trap_add
 function assert_git_not_dirty() {
    # TODO: we should add an option to `build_amd.py` that reverts the repo to
    #       an unmodified state.
    if [[ "$BUILD_ENVIRONMENT" != *rocm* ]] && [[ "$BUILD_ENVIRONMENT" != *xla* ]] ; then
        git_status=$(git status --porcelain | grep -v '?? third_party' || true)
        if [[ $git_status ]]; then
            echo "Build left local git repository checkout dirty"
            echo "git status --porcelain:"
            echo "${git_status}"
            exit 1
        fi
    fi
 }
 function pip_install_whl() {
  # This is used to install PyTorch and other build artifacts wheel locally
  # without using any network connection
  python3 -mpip install --no-index --no-deps "$@"
 }
 function pip_install() {
  # retry 3 times
  # old versions of pip don't have the "--progress-bar" flag
  pip install --progress-bar off "$@" || pip install --progress-bar off "$@" || pip install --progress-bar off "$@" ||\
  pip install "$@" || pip install "$@" || pip install "$@"
 }
 function pip_uninstall() {
  # uninstall 2 times
  pip uninstall -y "$@" || pip uninstall -y "$@"
 }
 function get_exit_code() {
  set +e
  "$@"
  retcode=$?
  set -e
  return $retcode
 }
 function get_bazel() {
  # Download and use the cross-platform, dependency-free Python
  # version of Bazelisk to fetch the platform specific version of
  # Bazel to use from .bazelversion.
  retry curl --location --output tools/bazel \
    https://raw.githubusercontent.com/bazelbuild/bazelisk/v1.16.0/bazelisk.py
  shasum --algorithm=1 --check \
    <(echo 'd4369c3d293814d3188019c9f7527a948972d9f8  tools/bazel')
  chmod u+x tools/bazel
 }
 # This function is bazel specific because of the bug
 # in the bazel that requires some special paths massaging
 # as a workaround. See
 # https://github.com/bazelbuild/bazel/issues/10167
 function install_sccache_nvcc_for_bazel() {
  sudo mv /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc-real
  # Write the `/usr/local/cuda/bin/nvcc`
  cat << EOF | sudo tee /usr/local/cuda/bin/nvcc
 #!/bin/sh
 if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
  exec sccache /usr/local/cuda/bin/nvcc "\$@"
 else
  exec external/local_cuda/cuda/bin/nvcc-real "\$@"
 fi
 EOF
  sudo chmod +x /usr/local/cuda/bin/nvcc
 }
 function install_monkeytype {
  # Install MonkeyType
  pip_install MonkeyType
 }
 function get_pinned_commit() {
  cat .github/ci_commit_pins/"${1}".txt
 }
 function install_torchaudio() {
  local commit
  commit=$(get_pinned_commit audio)
  if [[ "$1" == "cuda" ]]; then
    # TODO: This is better to be passed as a parameter from _linux-test workflow
    # so that it can be consistent with what is set in build
    TORCH_CUDA_ARCH_LIST="8.0;8.6" pip_install --no-use-pep517 --user "git+https://github.com/pytorch/audio.git@${commit}"
  else
    pip_install --no-use-pep517 --user "git+https://github.com/pytorch/audio.git@${commit}"
  fi
 }
 function install_torchtext() {
  local data_commit
  local text_commit
  data_commit=$(get_pinned_commit data)
  text_commit=$(get_pinned_commit text)
  pip_install --no-use-pep517 --user "git+https://github.com/pytorch/data.git@${data_commit}"
  pip_install --no-use-pep517 --user "git+https://github.com/pytorch/text.git@${text_commit}"
 }
 function install_torchvision() {
  local orig_preload
  local commit
  commit=$(get_pinned_commit vision)
  orig_preload=${LD_PRELOAD}
  if [ -n "${LD_PRELOAD}" ]; then
    # Silence dlerror to work-around glibc ASAN bug, see https://sourceware.org/bugzilla/show_bug.cgi?id=27653#c9
    echo 'char* dlerror(void) { return "";}'|gcc -fpic -shared -o "${HOME}/dlerror.so" -x c -
    LD_PRELOAD=${orig_preload}:${HOME}/dlerror.so
  fi
  pip_install --no-use-pep517 --user "git+https://github.com/pytorch/vision.git@${commit}"
  if [ -n "${LD_PRELOAD}" ]; then
    LD_PRELOAD=${orig_preload}
  fi
 }
 function install_tlparse() {
  pip_install --user "tlparse==0.3.7"
  PATH="$(python -m site --user-base)/bin:$PATH"
 }
 function install_torchrec_and_fbgemm() {
  local torchrec_commit
  torchrec_commit=$(get_pinned_commit torchrec)
  local fbgemm_commit
  fbgemm_commit=$(get_pinned_commit fbgemm)
  pip_uninstall torchrec-nightly
  pip_uninstall fbgemm-gpu-nightly
  pip_install setuptools-git-versioning scikit-build pyre-extensions
  # See https://github.com/pytorch/pytorch/issues/106971
  CUDA_PATH=/usr/local/cuda-12.1 pip_install --no-use-pep517 --user "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#egg=fbgemm-gpu&subdirectory=fbgemm_gpu"
  pip_install --no-use-pep517 --user "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}"
 }
 function clone_pytorch_xla() {
  if [[ ! -d ./xla ]]; then
    git clone --recursive --quiet https://github.com/pytorch/xla.git
    pushd xla
    # pin the xla hash so that we don't get broken by changes to xla
    git checkout "$(cat ../.github/ci_commit_pins/xla.txt)"
    git submodule sync
    git submodule update --init --recursive
    popd
  fi
 }
 function checkout_install_torchdeploy() {
  local commit
  commit=$(get_pinned_commit multipy)
  pushd ..
  git clone --recurse-submodules https://github.com/pytorch/multipy.git
  pushd multipy
  git checkout "${commit}"
  python multipy/runtime/example/generate_examples.py
  BUILD_CUDA_TESTS=1 pip install -e .
  popd
  popd
 }
 function test_torch_deploy(){
 pushd ..
 pushd multipy
 ./multipy/runtime/build/test_deploy
 ./multipy/runtime/build/test_deploy_gpu
 popd
 popd
 }
 function checkout_install_torchbench() {
  local commit
  commit=$(get_pinned_commit torchbench)
  git clone https://github.com/pytorch/benchmark torchbench
  pushd torchbench
  git checkout "$commit"
  if [ "$1" ]; then
    python install.py --continue_on_fail models "$@"
  else
    # Occasionally the installation may fail on one model but it is ok to continue
    # to install and test other models
    python install.py --continue_on_fail
  fi
  popd
 }
 function print_sccache_stats() {
  echo 'PyTorch Build Statistics'
  sccache --show-stats
  if [[ -n "${OUR_GITHUB_JOB_ID}" ]]; then
    sccache --show-stats --stats-format json | jq .stats \
      > "sccache-stats-${BUILD_ENVIRONMENT}-${OUR_GITHUB_JOB_ID}.json"
  else
    echo "env var OUR_GITHUB_JOB_ID not set, will not write sccache stats to json"
  fi
 }
--- a/.ci/pytorch/create_test_cert.py
+++ b/.ci/pytorch/create_test_cert.py
@ -1,124 +0,0 @@
 from datetime import datetime, timedelta
 from tempfile import mkdtemp
 from cryptography import x509
 from cryptography.hazmat.primitives import hashes, serialization
 from cryptography.hazmat.primitives.asymmetric import rsa
 from cryptography.x509.oid import NameOID
 temp_dir = mkdtemp()
 print(temp_dir)
 def genrsa(path):
    key = rsa.generate_private_key(
        public_exponent=65537,
        key_size=2048,
    )
    with open(path, "wb") as f:
        f.write(
            key.private_bytes(
                encoding=serialization.Encoding.PEM,
                format=serialization.PrivateFormat.TraditionalOpenSSL,
                encryption_algorithm=serialization.NoEncryption(),
            )
        )
    return key
 def create_cert(path, C, ST, L, O, key):
    subject = issuer = x509.Name(
        [
            x509.NameAttribute(NameOID.COUNTRY_NAME, C),
            x509.NameAttribute(NameOID.STATE_OR_PROVINCE_NAME, ST),
            x509.NameAttribute(NameOID.LOCALITY_NAME, L),
            x509.NameAttribute(NameOID.ORGANIZATION_NAME, O),
        ]
    )
    cert = (
        x509.CertificateBuilder()
        .subject_name(subject)
        .issuer_name(issuer)
        .public_key(key.public_key())
        .serial_number(x509.random_serial_number())
        .not_valid_before(datetime.utcnow())
        .not_valid_after(
            # Our certificate will be valid for 10 days
            datetime.utcnow()
            + timedelta(days=10)
        )
        .add_extension(
            x509.BasicConstraints(ca=True, path_length=None),
            critical=True,
        )
        .sign(key, hashes.SHA256())
    )
    # Write our certificate out to disk.
    with open(path, "wb") as f:
        f.write(cert.public_bytes(serialization.Encoding.PEM))
    return cert
 def create_req(path, C, ST, L, O, key):
    csr = (
        x509.CertificateSigningRequestBuilder()
        .subject_name(
            x509.Name(
                [
                    # Provide various details about who we are.
                    x509.NameAttribute(NameOID.COUNTRY_NAME, C),
                    x509.NameAttribute(NameOID.STATE_OR_PROVINCE_NAME, ST),
                    x509.NameAttribute(NameOID.LOCALITY_NAME, L),
                    x509.NameAttribute(NameOID.ORGANIZATION_NAME, O),
                ]
            )
        )
        .sign(key, hashes.SHA256())
    )
    with open(path, "wb") as f:
        f.write(csr.public_bytes(serialization.Encoding.PEM))
    return csr
 def sign_certificate_request(path, csr_cert, ca_cert, private_ca_key):
    cert = (
        x509.CertificateBuilder()
        .subject_name(csr_cert.subject)
        .issuer_name(ca_cert.subject)
        .public_key(csr_cert.public_key())
        .serial_number(x509.random_serial_number())
        .not_valid_before(datetime.utcnow())
        .not_valid_after(
            # Our certificate will be valid for 10 days
            datetime.utcnow()
            + timedelta(days=10)
            # Sign our certificate with our private key
        )
        .sign(private_ca_key, hashes.SHA256())
    )
    with open(path, "wb") as f:
        f.write(cert.public_bytes(serialization.Encoding.PEM))
    return cert
 ca_key = genrsa(temp_dir + "/ca.key")
 ca_cert = create_cert(
    temp_dir + "/ca.pem",
    "US",
    "New York",
    "New York",
    "Gloo Certificate Authority",
    ca_key,
 )
 pkey = genrsa(temp_dir + "/pkey.key")
 csr = create_req(
    temp_dir + "/csr.csr",
    "US",
    "California",
    "San Francisco",
    "Gloo Testing Company",
    pkey,
 )
 cert = sign_certificate_request(temp_dir + "/cert.pem", csr, ca_cert, ca_key)
--- a/.ci/pytorch/docker-build-test.sh
+++ b/.ci/pytorch/docker-build-test.sh
@ -1,6 +0,0 @@
 #!/bin/bash
 # shellcheck source=./common.sh
 source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
 docker build -t pytorch .
--- a/.ci/pytorch/docs-test.sh
+++ b/.ci/pytorch/docs-test.sh
@ -1,9 +0,0 @@
 #!/bin/bash
 # shellcheck source=./common.sh
 source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
 echo "Testing pytorch docs"
 cd docs
 make doctest
--- a/.ci/pytorch/functorch_doc_push_script.sh
+++ b/.ci/pytorch/functorch_doc_push_script.sh
@ -1,40 +0,0 @@
 #!/bin/bash
 # This is where the local pytorch install in the docker image is located
 pt_checkout="/var/lib/jenkins/workspace"
 source "$pt_checkout/.ci/pytorch/common_utils.sh"
 echo "functorch_doc_push_script.sh: Invoked with $*"
 set -ex
 version=${DOCS_VERSION:-nightly}
 echo "version: $version"
 # Build functorch docs
 pushd $pt_checkout/functorch/docs
 make html
 popd
 git clone https://github.com/pytorch/functorch -b gh-pages --depth 1 functorch_ghpages
 pushd functorch_ghpages
 if [ "$version" == "main" ]; then
  version=nightly
 fi
 git rm -rf "$version" || true
 mv "$pt_checkout/functorch/docs/build/html" "$version"
 git add "$version" || true
 git status
 git config user.email "soumith+bot@pytorch.org"
 git config user.name "pytorchbot"
 # If there aren't changes, don't make a commit; push is no-op
 git commit -m "Generate Python docs from pytorch/pytorch@${GITHUB_SHA}" || true
 git status
 if [[ "${WITH_PUSH:-}" == true ]]; then
  git push -u origin gh-pages
 fi
 popd
--- a/.ci/pytorch/macos-build.sh
+++ b/.ci/pytorch/macos-build.sh
@ -1,92 +0,0 @@
 #!/bin/bash
 # shellcheck disable=SC2034
 # shellcheck source=./macos-common.sh
 source "$(dirname "${BASH_SOURCE[0]}")/macos-common.sh"
 # shellcheck source=./common-build.sh
 source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
 # Build PyTorch
 if [ -z "${CI}" ]; then
  export DEVELOPER_DIR=/Applications/Xcode9.app/Contents/Developer
 fi
 # This helper function wraps calls to binaries with sccache, but only if they're not already wrapped with sccache.
 # For example, `clang` will be `sccache clang`, but `sccache clang` will not become `sccache sccache clang`.
 # The way this is done is by detecting the command of the parent pid of the current process and checking whether
 # that is sccache, and wrapping sccache around the process if its parent were not already sccache.
 function write_sccache_stub() {
  output=$1
  binary=$(basename "${output}")
  printf "#!/bin/sh\nif [ \$(ps auxc \$(ps auxc -o ppid \$\$ | grep \$\$ | rev | cut -d' ' -f1 | rev) | tr '\\\\n' ' ' | rev | cut -d' ' -f2 | rev) != sccache ]; then\n  exec sccache %s \"\$@\"\nelse\n  exec %s \"\$@\"\nfi" "$(which "${binary}")" "$(which "${binary}")" > "${output}"
  chmod a+x "${output}"
 }
 if which sccache > /dev/null; then
  # Create temp directory for sccache shims
  tmp_dir=$(mktemp -d)
  trap 'rm -rfv ${tmp_dir}' EXIT
  write_sccache_stub "${tmp_dir}/clang++"
  write_sccache_stub "${tmp_dir}/clang"
  export PATH="${tmp_dir}:$PATH"
 fi
 cross_compile_arm64() {
  # Cross compilation for arm64
  # Explicitly set USE_DISTRIBUTED=0 to align with the default build config on mac. This also serves as the sole CI config that tests
  # that building with USE_DISTRIBUTED=0 works at all. See https://github.com/pytorch/pytorch/issues/86448
  USE_DISTRIBUTED=0 CMAKE_OSX_ARCHITECTURES=arm64 MACOSX_DEPLOYMENT_TARGET=11.0 USE_MKLDNN=OFF USE_QNNPACK=OFF WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel
 }
 compile_arm64() {
  # Compilation for arm64
  # TODO: Compile with OpenMP support (but this causes CI regressions as cross-compilation were done with OpenMP disabled)
  USE_DISTRIBUTED=0 USE_OPENMP=1 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel
 }
 compile_x86_64() {
  USE_DISTRIBUTED=0 WERROR=1 python setup.py bdist_wheel --plat-name=macosx_10_9_x86_64
 }
 build_lite_interpreter() {
    echo "Testing libtorch (lite interpreter)."
    CPP_BUILD="$(pwd)/../cpp_build"
    # Ensure the removal of the tmp directory
    trap 'rm -rfv ${CPP_BUILD}' EXIT
    rm -rf "${CPP_BUILD}"
    mkdir -p "${CPP_BUILD}/caffe2"
    # It looks libtorch need to be built in "${CPP_BUILD}/caffe2 folder.
    BUILD_LIBTORCH_PY=$PWD/tools/build_libtorch.py
    pushd "${CPP_BUILD}/caffe2" || exit
    VERBOSE=1 DEBUG=1 python "${BUILD_LIBTORCH_PY}"
    popd || exit
    "${CPP_BUILD}/caffe2/build/bin/test_lite_interpreter_runtime"
 }
 print_cmake_info
 if [[ ${BUILD_ENVIRONMENT} = *arm64* ]]; then
  if [[ $(uname -m) == "arm64" ]]; then
    compile_arm64
  else
    cross_compile_arm64
  fi
 elif [[ ${BUILD_ENVIRONMENT} = *lite-interpreter* ]]; then
  export BUILD_LITE_INTERPRETER=1
  build_lite_interpreter
 else
  compile_x86_64
 fi
 if which sccache > /dev/null; then
  print_sccache_stats
 fi
 python tools/stats/export_test_times.py
 assert_git_not_dirty
--- a/.ci/pytorch/macos-common.sh
+++ b/.ci/pytorch/macos-common.sh
@ -1,33 +0,0 @@
 #!/bin/bash
 # Common prelude for macos-build.sh and macos-test.sh
 # shellcheck source=./common.sh
 source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
 sysctl -a | grep machdep.cpu
 # These are required for both the build job and the test job.
 # In the latter to test cpp extensions.
 export MACOSX_DEPLOYMENT_TARGET=11.1
 export CXX=clang++
 export CC=clang
 print_cmake_info() {
  CMAKE_EXEC=$(which cmake)
  echo "$CMAKE_EXEC"
  CONDA_INSTALLATION_DIR=$(dirname "$CMAKE_EXEC")
  # Print all libraries under cmake rpath for debugging
  ls -la "$CONDA_INSTALLATION_DIR/../lib"
  export CMAKE_EXEC
  # Explicitly add conda env lib folder to cmake rpath to address the flaky issue
  # where cmake dependencies couldn't be found. This seems to point to how conda
  # links $CMAKE_EXEC to its package cache when cloning a new environment
  install_name_tool -add_rpath @executable_path/../lib "${CMAKE_EXEC}" || true
  # Adding the rpath will invalidate cmake signature, so signing it again here
  # to trust the executable. EXC_BAD_ACCESS (SIGKILL (Code Signature Invalid))
  # with an exit code 137 otherwise
  codesign -f -s - "${CMAKE_EXEC}" || true
 }
--- a/.ci/pytorch/macos-test.sh
+++ b/.ci/pytorch/macos-test.sh
@ -1,169 +0,0 @@
 #!/bin/bash
 # shellcheck disable=SC2034
 # shellcheck source=./macos-common.sh
 source "$(dirname "${BASH_SOURCE[0]}")/macos-common.sh"
 if [[ -n "$CONDA_ENV" ]]; then
  # Use binaries under conda environment
  export PATH="$CONDA_ENV/bin":$PATH
 fi
 # Test that OpenMP is enabled for non-arm64 build
 if [[ ${BUILD_ENVIRONMENT} != *arm64* ]]; then
  pushd test
  if [[ ! $(python -c "import torch; print(int(torch.backends.openmp.is_available()))") == "1" ]]; then
    echo "Build should have OpenMP enabled, but torch.backends.openmp.is_available() is False"
    exit 1
  fi
  popd
 fi
 setup_test_python() {
  # The CircleCI worker hostname doesn't resolve to an address.
  # This environment variable makes ProcessGroupGloo default to
  # using the address associated with the loopback interface.
  export GLOO_SOCKET_IFNAME=lo0
  echo "Ninja version: $(ninja --version)"
  echo "Python version: $(which python) ($(python --version))"
  # Increase default limit on open file handles from 256 to 1024
  ulimit -n 1024
 }
 test_python_all() {
  setup_test_python
  time python test/run_test.py --verbose --exclude-jit-executor
  assert_git_not_dirty
 }
 test_python_shard() {
  if [[ -z "$NUM_TEST_SHARDS" ]]; then
    echo "NUM_TEST_SHARDS must be defined to run a Python test shard"
    exit 1
  fi
  setup_test_python
  time python test/run_test.py --verbose --exclude-jit-executor --exclude-distributed-tests --shard "$1" "$NUM_TEST_SHARDS"
  assert_git_not_dirty
 }
 test_libtorch() {
  # C++ API
  if [[ "$BUILD_TEST_LIBTORCH" == "1" ]]; then
    # NB: Install outside of source directory (at the same level as the root
    # pytorch folder) so that it doesn't get cleaned away prior to docker push.
    # But still clean it before we perform our own build.
    echo "Testing libtorch"
    CPP_BUILD="$PWD/../cpp-build"
    rm -rf "$CPP_BUILD"
    mkdir -p "$CPP_BUILD"/caffe2
    BUILD_LIBTORCH_PY=$PWD/tools/build_libtorch.py
    pushd "$CPP_BUILD"/caffe2
    VERBOSE=1 DEBUG=1 python "$BUILD_LIBTORCH_PY"
    popd
    MNIST_DIR="${PWD}/test/cpp/api/mnist"
    python tools/download_mnist.py --quiet -d "${MNIST_DIR}"
    # Unfortunately it seems like the test can't load from miniconda3
    # without these paths being set
    export DYLD_LIBRARY_PATH="$DYLD_LIBRARY_PATH:$PWD/miniconda3/lib"
    export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$PWD/miniconda3/lib"
    TORCH_CPP_TEST_MNIST_PATH="${MNIST_DIR}" CPP_TESTS_DIR="${CPP_BUILD}/caffe2/bin" python test/run_test.py --cpp --verbose -i cpp/test_api
    assert_git_not_dirty
  fi
 }
 test_custom_backend() {
  print_cmake_info
  echo "Testing custom backends"
  pushd test/custom_backend
  rm -rf build && mkdir build
  pushd build
  SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
  CMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" "${CMAKE_EXEC}" ..
  make VERBOSE=1
  popd
  # Run Python tests and export a lowered module.
  python test_custom_backend.py -v
  python backend.py --export-module-to=model.pt
  # Run C++ tests using the exported module.
  build/test_custom_backend ./model.pt
  rm -f ./model.pt
  popd
  assert_git_not_dirty
 }
 test_custom_script_ops() {
  print_cmake_info
  echo "Testing custom script operators"
  pushd test/custom_operator
  # Build the custom operator library.
  rm -rf build && mkdir build
  pushd build
  SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
  CMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" "${CMAKE_EXEC}" ..
  make VERBOSE=1
  popd
  # Run tests Python-side and export a script module.
  python test_custom_ops.py -v
  python model.py --export-script-module=model.pt
  # Run tests C++-side and load the exported script module.
  build/test_custom_ops ./model.pt
  popd
  assert_git_not_dirty
 }
 test_jit_hooks() {
  print_cmake_info
  echo "Testing jit hooks in cpp"
  pushd test/jit_hooks
  # Build the custom operator library.
  rm -rf build && mkdir build
  pushd build
  SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
  CMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" "${CMAKE_EXEC}" ..
  make VERBOSE=1
  popd
  # Run tests Python-side and export a script module.
  python model.py --export-script-module=model
  # Run tests C++-side and load the exported script module.
  build/test_jit_hooks ./model
  popd
  assert_git_not_dirty
 }
 install_tlparse
 if [[ $NUM_TEST_SHARDS -gt 1 ]]; then
  test_python_shard "${SHARD_NUMBER}"
  if [[ "${SHARD_NUMBER}" == 1 ]]; then
    test_libtorch
    test_custom_script_ops
  elif [[ "${SHARD_NUMBER}" == 2 ]]; then
    test_jit_hooks
    test_custom_backend
  fi
 else
  test_python_all
  test_libtorch
  test_custom_script_ops
  test_jit_hooks
  test_custom_backend
 fi
--- a/.ci/pytorch/multigpu-test.sh
+++ b/.ci/pytorch/multigpu-test.sh
@ -1,55 +0,0 @@
 #!/bin/bash
 # Required environment variable: $BUILD_ENVIRONMENT
 # (This is set by default in the Docker images we build, so you don't
 # need to set it yourself.
 # shellcheck source=./common.sh
 source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
 echo "Testing pytorch"
 time python test/run_test.py --include test_cuda_multigpu test_cuda_primary_ctx --verbose
 # Disabling tests to see if they solve timeout issues; see https://github.com/pytorch/pytorch/issues/70015
 # python tools/download_mnist.py --quiet -d test/cpp/api/mnist
 # OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="test/cpp/api/mnist" build/bin/test_api
 time python test/run_test.py --verbose -i distributed/test_c10d_common
 time python test/run_test.py --verbose -i distributed/test_c10d_gloo
 time python test/run_test.py --verbose -i distributed/test_c10d_nccl
 time python test/run_test.py --verbose -i distributed/test_c10d_spawn_gloo
 time python test/run_test.py --verbose -i distributed/test_c10d_spawn_nccl
 time python test/run_test.py --verbose -i distributed/test_store
 time python test/run_test.py --verbose -i distributed/test_pg_wrapper
 time python test/run_test.py --verbose -i distributed/rpc/cuda/test_tensorpipe_agent
 # FSDP tests
 for f in test/distributed/fsdp/*.py ; do time python test/run_test.py --verbose -i "${f#*/}" ; done
 # ShardedTensor tests
 time python test/run_test.py --verbose -i distributed/checkpoint/test_checkpoint
 time python test/run_test.py --verbose -i distributed/checkpoint/test_file_system_checkpoint
 time python test/run_test.py --verbose -i distributed/_shard/sharding_spec/test_sharding_spec
 time python test/run_test.py --verbose -i distributed/_shard/sharding_plan/test_sharding_plan
 time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor
 time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor_reshard
 # functional collective tests
 time python test/run_test.py --verbose -i distributed/test_functional_api
 # DTensor tests
 time python test/run_test.py --verbose -i distributed/_tensor/test_random_ops
 time python test/run_test.py --verbose -i distributed/_tensor/test_dtensor_compile
 # DeviceMesh test
 time python test/run_test.py --verbose -i distributed/test_device_mesh
 # DTensor/TP tests
 time python test/run_test.py --verbose -i distributed/tensor/parallel/test_ddp_2d_parallel
 time python test/run_test.py --verbose -i distributed/tensor/parallel/test_fsdp_2d_parallel
 time python test/run_test.py --verbose -i distributed/tensor/parallel/test_tp_examples
 time python test/run_test.py --verbose -i distributed/tensor/parallel/test_tp_random_state.py
 # Other tests
 time python test/run_test.py --verbose -i test_cuda_primary_ctx
 time python test/run_test.py --verbose -i test_optim -- -k test_forloop_goes_right_direction_multigpu
 time python test/run_test.py --verbose -i test_optim -- -k test_mixed_device_dtype
 time python test/run_test.py --verbose -i test_foreach -- -k test_tensors_grouping
 assert_git_not_dirty
--- a/.ci/pytorch/perf_test/compare_with_baseline.py
+++ b/.ci/pytorch/perf_test/compare_with_baseline.py
@ -1,90 +0,0 @@
 import argparse
 import json
 import math
 import sys
 parser = argparse.ArgumentParser()
 parser.add_argument(
    "--test-name", dest="test_name", action="store", required=True, help="test name"
 )
 parser.add_argument(
    "--sample-stats",
    dest="sample_stats",
    action="store",
    required=True,
    help="stats from sample",
 )
 parser.add_argument(
    "--update",
    action="store_true",
    help="whether to update baseline using stats from sample",
 )
 args = parser.parse_args()
 test_name = args.test_name
 if "cpu" in test_name:
    backend = "cpu"
 elif "gpu" in test_name:
    backend = "gpu"
 data_file_path = f"../{backend}_runtime.json"
 with open(data_file_path) as data_file:
    data = json.load(data_file)
 if test_name in data:
    mean = float(data[test_name]["mean"])
    sigma = float(data[test_name]["sigma"])
 else:
    # Let the test pass if baseline number doesn't exist
    mean = sys.maxsize
    sigma = 0.001
 print("population mean: ", mean)
 print("population sigma: ", sigma)
 # Let the test pass if baseline number is NaN (which happened in
 # the past when we didn't have logic for catching NaN numbers)
 if math.isnan(mean) or math.isnan(sigma):
    mean = sys.maxsize
    sigma = 0.001
 sample_stats_data = json.loads(args.sample_stats)
 sample_mean = float(sample_stats_data["mean"])
 sample_sigma = float(sample_stats_data["sigma"])
 print("sample mean: ", sample_mean)
 print("sample sigma: ", sample_sigma)
 if math.isnan(sample_mean):
    raise Exception("""Error: sample mean is NaN""")
 elif math.isnan(sample_sigma):
    raise Exception("""Error: sample sigma is NaN""")
 z_value = (sample_mean - mean) / sigma
 print("z-value: ", z_value)
 if z_value >= 3:
    raise Exception(
        f"""\n
 z-value >= 3, there is high chance of perf regression.\n
 To reproduce this regression, run
 `cd .ci/pytorch/perf_test/ && bash {test_name}.sh` on your local machine
 and compare the runtime before/after your code change.
 """
    )
 else:
    print("z-value < 3, no perf regression detected.")
    if args.update:
        print("We will use these numbers as new baseline.")
        new_data_file_path = f"../new_{backend}_runtime.json"
        with open(new_data_file_path) as new_data_file:
            new_data = json.load(new_data_file)
        new_data[test_name] = {}
        new_data[test_name]["mean"] = sample_mean
        new_data[test_name]["sigma"] = max(sample_sigma, sample_mean * 0.1)
        with open(new_data_file_path, "w") as new_data_file:
            json.dump(new_data, new_data_file, indent=4)
--- a/.ci/pytorch/perf_test/update_commit_hash.py
+++ b/.ci/pytorch/perf_test/update_commit_hash.py
@ -1,13 +0,0 @@
 import json
 import sys
 data_file_path = sys.argv[1]
 commit_hash = sys.argv[2]
 with open(data_file_path) as data_file:
    data = json.load(data_file)
 data["commit"] = commit_hash
 with open(data_file_path, "w") as data_file:
    json.dump(data, data_file)
--- a/.ci/pytorch/print_sccache_log.py
+++ b/.ci/pytorch/print_sccache_log.py
@ -1,17 +0,0 @@
 import sys
 log_file_path = sys.argv[1]
 with open(log_file_path) as f:
    lines = f.readlines()
 for line in lines:
    # Ignore errors from CPU instruction set, symbol existing testing,
    # or compilation error formatting
    ignored_keywords = [
        "src.c",
        "CheckSymbolExists.c",
        "test_compilation_error_formatting",
    ]
    if all(keyword not in line for keyword in ignored_keywords):
        print(line)
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
--- a/.ci/pytorch/win-build.sh
+++ b/.ci/pytorch/win-build.sh
@ -1,47 +0,0 @@
 #!/bin/bash
 # If you want to rebuild, run this with REBUILD=1
 # If you want to build with CUDA, run this with USE_CUDA=1
 # If you want to build without CUDA, run this with USE_CUDA=0
 if [ ! -f setup.py ]; then
  echo "ERROR: Please run this build script from PyTorch root directory."
  exit 1
 fi
 SCRIPT_PARENT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
 # shellcheck source=./common.sh
 source "$SCRIPT_PARENT_DIR/common.sh"
 # shellcheck source=./common-build.sh
 source "$SCRIPT_PARENT_DIR/common-build.sh"
 export TMP_DIR="${PWD}/build/win_tmp"
 TMP_DIR_WIN=$(cygpath -w "${TMP_DIR}")
 export TMP_DIR_WIN
 export PYTORCH_FINAL_PACKAGE_DIR=${PYTORCH_FINAL_PACKAGE_DIR:-/c/w/build-results}
 if [[ -n "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
    mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
 fi
 export SCRIPT_HELPERS_DIR=$SCRIPT_PARENT_DIR/win-test-helpers
 set +ex
 grep -E -R 'PyLong_(From|As)(Unsigned|)Long\(' --exclude=python_numbers.h --exclude=eval_frame.c torch/
 PYLONG_API_CHECK=$?
 if [[ $PYLONG_API_CHECK == 0 ]]; then
  echo "Usage of PyLong_{From,As}{Unsigned}Long API may lead to overflow errors on Windows"
  echo "because \`sizeof(long) == 4\` and \`sizeof(unsigned long) == 4\`."
  echo "Please include \"torch/csrc/utils/python_numbers.h\" and use the correspoding APIs instead."
  echo "PyLong_FromLong -> THPUtils_packInt32 / THPUtils_packInt64"
  echo "PyLong_AsLong -> THPUtils_unpackInt (32-bit) / THPUtils_unpackLong (64-bit)"
  echo "PyLong_FromUnsignedLong -> THPUtils_packUInt32 / THPUtils_packUInt64"
  echo "PyLong_AsUnsignedLong -> THPUtils_unpackUInt32 / THPUtils_unpackUInt64"
  exit 1
 fi
 set -ex
 "$SCRIPT_HELPERS_DIR"/build_pytorch.bat
 assert_git_not_dirty
 echo "BUILD PASSED"
--- a/.ci/pytorch/win-test-helpers/build_pytorch.bat
+++ b/.ci/pytorch/win-test-helpers/build_pytorch.bat
@ -1,137 +0,0 @@
 if "%DEBUG%" == "1" (
  set BUILD_TYPE=debug
 ) ELSE (
  set BUILD_TYPE=release
 )
 set PATH=C:\Program Files\CMake\bin;C:\Program Files\7-Zip;C:\ProgramData\chocolatey\bin;C:\Program Files\Git\cmd;C:\Program Files\Amazon\AWSCLI;C:\Program Files\Amazon\AWSCLI\bin;%PATH%
 :: This inflates our log size slightly, but it is REALLY useful to be
 :: able to see what our cl.exe commands are (since you can actually
 :: just copy-paste them into a local Windows setup to just rebuild a
 :: single file.)
 :: log sizes are too long, but leaving this here incase someone wants to use it locally
 :: set CMAKE_VERBOSE_MAKEFILE=1
 set INSTALLER_DIR=%SCRIPT_HELPERS_DIR%\installation-helpers
 call %INSTALLER_DIR%\install_magma.bat
 if errorlevel 1 exit /b
 if not errorlevel 0 exit /b
 call %INSTALLER_DIR%\install_sccache.bat
 if errorlevel 1 exit /b
 if not errorlevel 0 exit /b
 :: Miniconda has been installed as part of the Windows AMI with all the dependencies.
 :: We just need to activate it here
 call %INSTALLER_DIR%\activate_miniconda3.bat
 if errorlevel 1 exit /b
 if not errorlevel 0 exit /b
 call pip install mkl-include==2021.4.0 mkl-devel==2021.4.0
 if errorlevel 1 exit /b
 if not errorlevel 0 exit /b
 :: Override VS env here
 pushd .
 if "%VC_VERSION%" == "" (
    call "C:\Program Files (x86)\Microsoft Visual Studio\%VC_YEAR%\%VC_PRODUCT%\VC\Auxiliary\Build\vcvarsall.bat" x64
 ) else (
    call "C:\Program Files (x86)\Microsoft Visual Studio\%VC_YEAR%\%VC_PRODUCT%\VC\Auxiliary\Build\vcvarsall.bat" x64 -vcvars_ver=%VC_VERSION%
 )
 if errorlevel 1 exit /b
 if not errorlevel 0 exit /b
@echo on
 popd
 if not "%USE_CUDA%"=="1" goto cuda_build_end
 set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION%
 if x%CUDA_VERSION:.=%==x%CUDA_VERSION% (
    echo CUDA version %CUDA_VERSION% format isn't correct, which doesn't contain '.'
    exit /b 1
 )
 rem version transformer, for example 10.1 to 10_1.
 if x%CUDA_VERSION:.=%==x%CUDA_VERSION% (
    echo CUDA version %CUDA_VERSION% format isn't correct, which doesn't contain '.'
    exit /b 1
 )
 set VERSION_SUFFIX=%CUDA_VERSION:.=_%
 set CUDA_PATH_V%VERSION_SUFFIX%=%CUDA_PATH%
 set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64
 set CUDA_TOOLKIT_ROOT_DIR=%CUDA_PATH%
 set CUDNN_ROOT_DIR=%CUDA_PATH%
 set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
 set PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH%
 set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64
 set CUDA_TOOLKIT_ROOT_DIR=%CUDA_PATH%
 set CUDNN_ROOT_DIR=%CUDA_PATH%
 set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
 set PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH%
 :cuda_build_end
 set DISTUTILS_USE_SDK=1
 set PATH=%TMP_DIR_WIN%\bin;%PATH%
 :: The latest Windows CUDA test is running on AWS G5 runner with A10G GPU
 if "%TORCH_CUDA_ARCH_LIST%" == "" set TORCH_CUDA_ARCH_LIST=8.6
 :: The default sccache idle timeout is 600, which is too short and leads to intermittent build errors.
 set SCCACHE_IDLE_TIMEOUT=0
 set SCCACHE_IGNORE_SERVER_IO_ERROR=1
 sccache --stop-server
 sccache --start-server
 sccache --zero-stats
 set CMAKE_C_COMPILER_LAUNCHER=sccache
 set CMAKE_CXX_COMPILER_LAUNCHER=sccache
 set CMAKE_GENERATOR=Ninja
 if "%USE_CUDA%"=="1" (
  :: randomtemp is used to resolve the intermittent build error related to CUDA.
  :: code: https://github.com/peterjc123/randomtemp-rust
  :: issue: https://github.com/pytorch/pytorch/issues/25393
  ::
  :: CMake requires a single command as CUDA_NVCC_EXECUTABLE, so we push the wrappers
  :: randomtemp.exe and sccache.exe into a batch file which CMake invokes.
  curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.4/randomtemp.exe --output %TMP_DIR_WIN%\bin\randomtemp.exe
  if errorlevel 1 exit /b
  if not errorlevel 0 exit /b
  echo @"%TMP_DIR_WIN%\bin\randomtemp.exe" "%TMP_DIR_WIN%\bin\sccache.exe" "%CUDA_PATH%\bin\nvcc.exe" %%* > "%TMP_DIR%/bin/nvcc.bat"
  cat %TMP_DIR%/bin/nvcc.bat
  set CUDA_NVCC_EXECUTABLE=%TMP_DIR%/bin/nvcc.bat
  for /F "usebackq delims=" %%n in (`cygpath -m "%CUDA_PATH%\bin\nvcc.exe"`) do set CMAKE_CUDA_COMPILER=%%n
  set CMAKE_CUDA_COMPILER_LAUNCHER=%TMP_DIR%/bin/randomtemp.exe;%TMP_DIR%\bin\sccache.exe
 )
 :: Print all existing environment variable for debugging
 set
 python setup.py bdist_wheel
 if errorlevel 1 exit /b
 if not errorlevel 0 exit /b
 sccache --show-stats
 python -c "import os, glob; os.system('python -mpip install --no-index --no-deps ' + glob.glob('dist/*.whl')[0])"
 (
  if "%BUILD_ENVIRONMENT%"=="" (
    echo NOTE: To run `import torch`, please make sure to activate the conda environment by running `call %CONDA_PARENT_DIR%\Miniconda3\Scripts\activate.bat %CONDA_PARENT_DIR%\Miniconda3` in Command Prompt before running Git Bash.
  ) else (
    copy /Y "dist\*.whl" "%PYTORCH_FINAL_PACKAGE_DIR%"
    :: export test times so that potential sharded tests that'll branch off this build will use consistent data
    python tools/stats/export_test_times.py
    robocopy /E ".additional_ci_files" "%PYTORCH_FINAL_PACKAGE_DIR%\.additional_ci_files"
    :: Also save build/.ninja_log as an artifact
    copy /Y "build\.ninja_log" "%PYTORCH_FINAL_PACKAGE_DIR%\"
  )
 )
 sccache --show-stats --stats-format json | jq .stats > sccache-stats-%BUILD_ENVIRONMENT%-%OUR_GITHUB_JOB_ID%.json
 sccache --stop-server
--- a/.ci/pytorch/win-test-helpers/installation-helpers/activate_miniconda3.bat
+++ b/.ci/pytorch/win-test-helpers/installation-helpers/activate_miniconda3.bat
@ -1,26 +0,0 @@
 if "%BUILD_ENVIRONMENT%"=="" (
  set CONDA_PARENT_DIR=%CD%
 ) else (
  set CONDA_PARENT_DIR=C:\Jenkins
 )
 :: Be conservative here when rolling out the new AMI with conda. This will try
 :: to install conda as before if it couldn't find the conda installation. This
 :: can be removed eventually after we gain enough confidence in the AMI
 if not exist %CONDA_PARENT_DIR%\Miniconda3 (
  set INSTALL_FRESH_CONDA=1
 )
 if "%INSTALL_FRESH_CONDA%"=="1" (
  curl --retry 3 --retry-all-errors -k https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe --output %TMP_DIR_WIN%\Miniconda3-latest-Windows-x86_64.exe
  if errorlevel 1 exit /b
  if not errorlevel 0 exit /b
  %TMP_DIR_WIN%\Miniconda3-latest-Windows-x86_64.exe /InstallationType=JustMe /RegisterPython=0 /S /AddToPath=0 /D=%CONDA_PARENT_DIR%\Miniconda3
  if errorlevel 1 exit /b
  if not errorlevel 0 exit /b
 )
 :: Activate conda so that we can use its commands, i.e. conda, python, pip
 call %CONDA_PARENT_DIR%\Miniconda3\Scripts\activate.bat %CONDA_PARENT_DIR%\Miniconda3
--- a/.ci/pytorch/win-test-helpers/installation-helpers/install_sccache.bat
+++ b/.ci/pytorch/win-test-helpers/installation-helpers/install_sccache.bat
@ -1,13 +0,0 @@
 mkdir %TMP_DIR_WIN%\bin
 if "%REBUILD%"=="" (
  IF EXIST %TMP_DIR_WIN%\bin\sccache.exe (
    taskkill /im sccache.exe /f /t || ver > nul
    del %TMP_DIR_WIN%\bin\sccache.exe || ver > nul
  )
  if "%BUILD_ENVIRONMENT%"=="" (
    curl --retry 3 --retry-all-errors -k https://s3.amazonaws.com/ossci-windows/sccache-v0.7.4.exe --output %TMP_DIR_WIN%\bin\sccache.exe
  ) else (
    aws s3 cp s3://ossci-windows/sccache-v0.7.4.exe %TMP_DIR_WIN%\bin\sccache.exe
  )
 )
--- a/.ci/pytorch/win-test-helpers/setup_pytorch_env.bat
+++ b/.ci/pytorch/win-test-helpers/setup_pytorch_env.bat
@ -1,54 +0,0 @@
 set PATH=C:\Program Files\CMake\bin;C:\Program Files\7-Zip;C:\ProgramData\chocolatey\bin;C:\Program Files\Git\cmd;C:\Program Files\Amazon\AWSCLI;C:\Program Files\Amazon\AWSCLI\bin;%PATH%
 :: Install Miniconda3
 set INSTALLER_DIR=%SCRIPT_HELPERS_DIR%\installation-helpers
 :: Miniconda has been installed as part of the Windows AMI with all the dependencies.
 :: We just need to activate it here
 call %INSTALLER_DIR%\activate_miniconda3.bat
 if errorlevel 1 exit /b
 if not errorlevel 0 exit /b
 :: PyTorch is now installed using the standard wheel on Windows into the conda environment.
 :: However, the test scripts are still frequently referring to the workspace temp directory
 :: build\torch. Rather than changing all these references, making a copy of torch folder
 :: from conda to the current workspace is easier. The workspace will be cleaned up after
 :: the job anyway
 xcopy /s %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torch %TMP_DIR_WIN%\build\torch\
 pushd .
 if "%VC_VERSION%" == "" (
    call "C:\Program Files (x86)\Microsoft Visual Studio\%VC_YEAR%\%VC_PRODUCT%\VC\Auxiliary\Build\vcvarsall.bat" x64
 ) else (
    call "C:\Program Files (x86)\Microsoft Visual Studio\%VC_YEAR%\%VC_PRODUCT%\VC\Auxiliary\Build\vcvarsall.bat" x64 -vcvars_ver=%VC_VERSION%
 )
 if errorlevel 1 exit /b
 if not errorlevel 0 exit /b
@echo on
 popd
 set DISTUTILS_USE_SDK=1
 if not "%USE_CUDA%"=="1" goto cuda_build_end
 set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION%
 rem version transformer, for example 10.1 to 10_1.
 set VERSION_SUFFIX=%CUDA_VERSION:.=_%
 set CUDA_PATH_V%VERSION_SUFFIX%=%CUDA_PATH%
 set CUDNN_LIB_DIR=%CUDA_PATH%\lib\x64
 set CUDA_TOOLKIT_ROOT_DIR=%CUDA_PATH%
 set CUDNN_ROOT_DIR=%CUDA_PATH%
 set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
 set PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH%
 set NUMBAPRO_CUDALIB=%CUDA_PATH%\bin
 set NUMBAPRO_LIBDEVICE=%CUDA_PATH%\nvvm\libdevice
 set NUMBAPRO_NVVM=%CUDA_PATH%\nvvm\bin\nvvm64_32_0.dll
 :cuda_build_end
 set PYTHONPATH=%TMP_DIR_WIN%\build;%PYTHONPATH%
 :: Print all existing environment variable for debugging
 set
--- a/.ci/pytorch/win-test-helpers/test_libtorch.bat
+++ b/.ci/pytorch/win-test-helpers/test_libtorch.bat
@ -1,54 +0,0 @@
 call %SCRIPT_HELPERS_DIR%\setup_pytorch_env.bat
 if errorlevel 1 exit /b 1
 :: Save the current working directory so that we can go back there
 set CWD=%cd%
 set CPP_TESTS_DIR=%TMP_DIR_WIN%\build\torch\bin
 set PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt\bin\x64;%TMP_DIR_WIN%\build\torch\lib;%PATH%
 set TORCH_CPP_TEST_MNIST_PATH=%CWD%\test\cpp\api\mnist
 python tools\download_mnist.py --quiet -d %TORCH_CPP_TEST_MNIST_PATH%
 python test\run_test.py --cpp --verbose -i cpp/test_api
 if errorlevel 1 exit /b 1
 if not errorlevel 0 exit /b 1
 cd %TMP_DIR_WIN%\build\torch\test
 for /r "." %%a in (*.exe) do (
    call :libtorch_check "%%~na" "%%~fa"
    if errorlevel 1 goto fail
 )
 goto :eof
 :libtorch_check
 cd %CWD%
 set CPP_TESTS_DIR=%TMP_DIR_WIN%\build\torch\test
 :: Skip verify_api_visibility as it a compile level test
 if "%~1" == "verify_api_visibility" goto :eof
 echo Running "%~2"
 if "%~1" == "c10_intrusive_ptr_benchmark" (
  :: NB: This is not a gtest executable file, thus couldn't be handled by pytest-cpp
  call "%~2"
  goto :eof
 )
 python test\run_test.py --cpp --verbose -i "cpp/%~1"
 if errorlevel 1 (
  echo %1 failed with exit code %errorlevel%
  goto fail
 )
 if not errorlevel 0 (
  echo %1 failed with exit code %errorlevel%
  goto fail
 )
 :eof
 exit /b 0
 :fail
 exit /b 1
--- a/.ci/pytorch/win-test-helpers/test_python_jit_legacy.bat
+++ b/.ci/pytorch/win-test-helpers/test_python_jit_legacy.bat
@ -1,12 +0,0 @@
 call %SCRIPT_HELPERS_DIR%\setup_pytorch_env.bat
 echo Copying over test times file
 robocopy /E "%PYTORCH_FINAL_PACKAGE_DIR_WIN%\.additional_ci_files" "%PROJECT_DIR_WIN%\.additional_ci_files"
 pushd test
 echo Run jit_profiling tests
 python run_test.py --include test_jit_legacy test_jit_fuser_legacy --verbose
 if ERRORLEVEL 1 exit /b 1
 popd
--- a/.ci/pytorch/win-test.sh
+++ b/.ci/pytorch/win-test.sh
@ -1,73 +0,0 @@
 #!/bin/bash
 set -ex
 SCRIPT_PARENT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
 # shellcheck source=./common.sh
 source "$SCRIPT_PARENT_DIR/common.sh"
 export TMP_DIR="${PWD}/build/win_tmp"
 TMP_DIR_WIN=$(cygpath -w "${TMP_DIR}")
 export TMP_DIR_WIN
 export PROJECT_DIR="${PWD}"
 PROJECT_DIR_WIN=$(cygpath -w "${PROJECT_DIR}")
 export PROJECT_DIR_WIN
 export TEST_DIR="${PWD}/test"
 TEST_DIR_WIN=$(cygpath -w "${TEST_DIR}")
 export TEST_DIR_WIN
 export PYTORCH_FINAL_PACKAGE_DIR="${PYTORCH_FINAL_PACKAGE_DIR:-/c/w/build-results}"
 PYTORCH_FINAL_PACKAGE_DIR_WIN=$(cygpath -w "${PYTORCH_FINAL_PACKAGE_DIR}")
 export PYTORCH_FINAL_PACKAGE_DIR_WIN
 mkdir -p "$TMP_DIR"/build/torch
 export SCRIPT_HELPERS_DIR=$SCRIPT_PARENT_DIR/win-test-helpers
 if [[ "$TEST_CONFIG" = "force_on_cpu" ]]; then
  # run the full test suite for force_on_cpu test
  export USE_CUDA=0
 fi
 if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
  # Used so that only cuda/rocm specific versions of tests are generated
  # mainly used so that we're not spending extra cycles testing cpu
  # devices on expensive gpu machines
  export PYTORCH_TESTING_DEVICE_ONLY_FOR="cuda"
 fi
 # TODO: Move both of them to Windows AMI
 python -m pip install pytest-rerunfailures==10.3 pytest-cpp==2.3.0 tensorboard==2.13.0
 # Install Z3 optional dependency for Windows builds.
 python -m pip install z3-solver==4.12.2.0
 run_tests() {
    # Run nvidia-smi if available
    for path in '/c/Program Files/NVIDIA Corporation/NVSMI/nvidia-smi.exe' /c/Windows/System32/nvidia-smi.exe; do
        if [[ -x "$path" ]]; then
            "$path" || echo "true";
            break
        fi
    done
    if [[ $NUM_TEST_SHARDS -eq 1 ]]; then
        "$SCRIPT_HELPERS_DIR"/test_python_shard.bat
        "$SCRIPT_HELPERS_DIR"/test_custom_script_ops.bat
        "$SCRIPT_HELPERS_DIR"/test_custom_backend.bat
        "$SCRIPT_HELPERS_DIR"/test_libtorch.bat
    else
        "$SCRIPT_HELPERS_DIR"/test_python_shard.bat
        if [[ "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
            "$SCRIPT_HELPERS_DIR"/test_libtorch.bat
            if [[ "${USE_CUDA}" == "1" ]]; then
              "$SCRIPT_HELPERS_DIR"/test_python_jit_legacy.bat
            fi
        elif [[ "${SHARD_NUMBER}" == 2 && $NUM_TEST_SHARDS -gt 1 ]]; then
            "$SCRIPT_HELPERS_DIR"/test_custom_backend.bat
            "$SCRIPT_HELPERS_DIR"/test_custom_script_ops.bat
        fi
    fi
 }
 run_tests
 assert_git_not_dirty
 echo "TEST PASSED"
--- a/.circleci/README.md
+++ b/.circleci/README.md
@ -1,4 +0,0 @@
 Warning
 =======
 PyTorch migration from CircleCI to github actions has been completed. All continuous integration & deployment workflows are defined in  `.github/workflows` folder
--- a/.circleci/cimodel/init.py
+++ b/.circleci/cimodel/init.py
--- a/benchmarks/dynamo/microbenchmarks/init.py
+++ b/benchmarks/dynamo/microbenchmarks/init.py
--- a/.circleci/cimodel/data/binary_build_data.py
+++ b/.circleci/cimodel/data/binary_build_data.py
@ -0,0 +1,171 @@
 """
 This module models the tree of configuration variants
 for "smoketest" builds.
 Each subclass of ConfigNode represents a layer of the configuration hierarchy.
 These tree nodes encapsulate the logic for whether a branch of the hierarchy
 should be "pruned".
 """
 from collections import OrderedDict
 from cimodel.lib.conf_tree import ConfigNode
 import cimodel.data.dimensions as dimensions
 LINKING_DIMENSIONS = [
    "shared",
    "static",
 ]
 DEPS_INCLUSION_DIMENSIONS = [
    "with-deps",
    "without-deps",
 ]
 def get_processor_arch_name(gpu_version):
    return "cpu" if not gpu_version else (
        "cu" + gpu_version.strip("cuda") if gpu_version.startswith("cuda") else gpu_version
    )
 CONFIG_TREE_DATA = OrderedDict(
 )
 # GCC config variants:
 #
 # All the nightlies (except libtorch with new gcc ABI) are built with devtoolset7,
 # which can only build with old gcc ABI. It is better than devtoolset3
 # because it understands avx512, which is needed for good fbgemm performance.
 #
 # Libtorch with new gcc ABI is built with gcc 5.4 on Ubuntu 16.04.
 LINUX_GCC_CONFIG_VARIANTS = OrderedDict(
    manywheel=['devtoolset7'],
    conda=['devtoolset7'],
    libtorch=[
        "devtoolset7",
        "gcc5.4_cxx11-abi",
    ],
 )
 WINDOWS_LIBTORCH_CONFIG_VARIANTS = [
    "debug",
    "release",
 ]
 class TopLevelNode(ConfigNode):
    def __init__(self, node_name, config_tree_data, smoke):
        super(TopLevelNode, self).__init__(None, node_name)
        self.config_tree_data = config_tree_data
        self.props["smoke"] = smoke
    def get_children(self):
        return [OSConfigNode(self, x, c, p) for (x, (c, p)) in self.config_tree_data.items()]
 class OSConfigNode(ConfigNode):
    def __init__(self, parent, os_name, gpu_versions, py_tree):
        super(OSConfigNode, self).__init__(parent, os_name)
        self.py_tree = py_tree
        self.props["os_name"] = os_name
        self.props["gpu_versions"] = gpu_versions
    def get_children(self):
        return [PackageFormatConfigNode(self, k, v) for k, v in self.py_tree.items()]
 class PackageFormatConfigNode(ConfigNode):
    def __init__(self, parent, package_format, python_versions):
        super(PackageFormatConfigNode, self).__init__(parent, package_format)
        self.props["python_versions"] = python_versions
        self.props["package_format"] = package_format
    def get_children(self):
        if self.find_prop("os_name") == "linux":
            return [LinuxGccConfigNode(self, v) for v in LINUX_GCC_CONFIG_VARIANTS[self.find_prop("package_format")]]
        elif self.find_prop("os_name") == "windows" and self.find_prop("package_format") == "libtorch":
            return [WindowsLibtorchConfigNode(self, v) for v in WINDOWS_LIBTORCH_CONFIG_VARIANTS]
        else:
            return [ArchConfigNode(self, v) for v in self.find_prop("gpu_versions")]
 class LinuxGccConfigNode(ConfigNode):
    def __init__(self, parent, gcc_config_variant):
        super(LinuxGccConfigNode, self).__init__(parent, "GCC_CONFIG_VARIANT=" + str(gcc_config_variant))
        self.props["gcc_config_variant"] = gcc_config_variant
    def get_children(self):
        gpu_versions = self.find_prop("gpu_versions")
        # XXX devtoolset7 on CUDA 9.0 is temporarily disabled
        # see https://github.com/pytorch/pytorch/issues/20066
        if self.find_prop("gcc_config_variant") == 'devtoolset7':
            gpu_versions = filter(lambda x: x != "cuda_90", gpu_versions)
        # XXX disabling conda rocm build since docker images are not there
        if self.find_prop("package_format") == 'conda':
            gpu_versions = filter(lambda x: x not in dimensions.ROCM_VERSION_LABELS, gpu_versions)
        # XXX libtorch rocm build  is temporarily disabled
        if self.find_prop("package_format") == 'libtorch':
            gpu_versions = filter(lambda x: x not in dimensions.ROCM_VERSION_LABELS, gpu_versions)
        return [ArchConfigNode(self, v) for v in gpu_versions]
 class WindowsLibtorchConfigNode(ConfigNode):
    def __init__(self, parent, libtorch_config_variant):
        super(WindowsLibtorchConfigNode, self).__init__(parent, "LIBTORCH_CONFIG_VARIANT=" + str(libtorch_config_variant))
        self.props["libtorch_config_variant"] = libtorch_config_variant
    def get_children(self):
        return [ArchConfigNode(self, v) for v in self.find_prop("gpu_versions")]
 class ArchConfigNode(ConfigNode):
    def __init__(self, parent, gpu):
        super(ArchConfigNode, self).__init__(parent, get_processor_arch_name(gpu))
        self.props["gpu"] = gpu
    def get_children(self):
        return [PyVersionConfigNode(self, v) for v in self.find_prop("python_versions")]
 class PyVersionConfigNode(ConfigNode):
    def __init__(self, parent, pyver):
        super(PyVersionConfigNode, self).__init__(parent, pyver)
        self.props["pyver"] = pyver
    def get_children(self):
        package_format = self.find_prop("package_format")
        os_name = self.find_prop("os_name")
        has_libtorch_variants = package_format == "libtorch" and os_name == "linux"
        linking_variants = LINKING_DIMENSIONS if has_libtorch_variants else []
        return [LinkingVariantConfigNode(self, v) for v in linking_variants]
 class LinkingVariantConfigNode(ConfigNode):
    def __init__(self, parent, linking_variant):
        super(LinkingVariantConfigNode, self).__init__(parent, linking_variant)
    def get_children(self):
        return [DependencyInclusionConfigNode(self, v) for v in DEPS_INCLUSION_DIMENSIONS]
 class DependencyInclusionConfigNode(ConfigNode):
    def __init__(self, parent, deps_variant):
        super(DependencyInclusionConfigNode, self).__init__(parent, deps_variant)
        self.props["libtorch_variant"] = "-".join([self.parent.get_label(), self.get_label()])
--- a/.circleci/cimodel/data/binary_build_definitions.py
+++ b/.circleci/cimodel/data/binary_build_definitions.py
@ -0,0 +1,243 @@
 from collections import OrderedDict
 import cimodel.data.simple.util.branch_filters as branch_filters
 import cimodel.data.binary_build_data as binary_build_data
 import cimodel.lib.conf_tree as conf_tree
 import cimodel.lib.miniutils as miniutils
 class Conf(object):
    def __init__(self, os, gpu_version, pydistro, parms, smoke, libtorch_variant, gcc_config_variant, libtorch_config_variant):
        self.os = os
        self.gpu_version = gpu_version
        self.pydistro = pydistro
        self.parms = parms
        self.smoke = smoke
        self.libtorch_variant = libtorch_variant
        self.gcc_config_variant = gcc_config_variant
        self.libtorch_config_variant = libtorch_config_variant
    def gen_build_env_parms(self):
        elems = [self.pydistro] + self.parms + [binary_build_data.get_processor_arch_name(self.gpu_version)]
        if self.gcc_config_variant is not None:
            elems.append(str(self.gcc_config_variant))
        if self.libtorch_config_variant is not None:
            elems.append(str(self.libtorch_config_variant))
        return elems
    def gen_docker_image(self):
        if self.gcc_config_variant == 'gcc5.4_cxx11-abi':
            if self.gpu_version is None:
                return miniutils.quote("pytorch/libtorch-cxx11-builder:cpu")
            else:
                return miniutils.quote(
                    f"pytorch/libtorch-cxx11-builder:{self.gpu_version}"
                )
        if self.pydistro == "conda":
            if self.gpu_version is None:
                return miniutils.quote("pytorch/conda-builder:cpu")
            else:
                return miniutils.quote(
                    f"pytorch/conda-builder:{self.gpu_version}"
                )
        docker_word_substitution = {
            "manywheel": "manylinux",
            "libtorch": "manylinux",
        }
        docker_distro_prefix = miniutils.override(self.pydistro, docker_word_substitution)
        # The cpu nightlies are built on the pytorch/manylinux-cuda102 docker image
        # TODO cuda images should consolidate into tag-base images similar to rocm
        alt_docker_suffix = "cuda102" if not self.gpu_version else (
            "rocm:" + self.gpu_version.strip("rocm") if self.gpu_version.startswith("rocm") else self.gpu_version)
        docker_distro_suffix = alt_docker_suffix if self.pydistro != "conda" else (
            "cuda" if alt_docker_suffix.startswith("cuda") else "rocm")
        return miniutils.quote("pytorch/" + docker_distro_prefix + "-" + docker_distro_suffix)
    def get_name_prefix(self):
        return "smoke" if self.smoke else "binary"
    def gen_build_name(self, build_or_test, nightly):
        parts = [self.get_name_prefix(), self.os] + self.gen_build_env_parms()
        if nightly:
            parts.append("nightly")
        if self.libtorch_variant:
            parts.append(self.libtorch_variant)
        if not self.smoke:
            parts.append(build_or_test)
        joined = "_".join(parts)
        return joined.replace(".", "_")
    def gen_workflow_job(self, phase, upload_phase_dependency=None, nightly=False):
        job_def = OrderedDict()
        job_def["name"] = self.gen_build_name(phase, nightly)
        job_def["build_environment"] = miniutils.quote(" ".join(self.gen_build_env_parms()))
        if self.smoke:
            job_def["requires"] = [
                "update_s3_htmls",
            ]
            job_def["filters"] = branch_filters.gen_filter_dict(
                branches_list=["postnightly"],
            )
        else:
            filter_branch = r"/.*/"
            job_def["filters"] = branch_filters.gen_filter_dict(
                branches_list=[filter_branch],
                tags_list=[branch_filters.RC_PATTERN],
            )
        if self.libtorch_variant:
            job_def["libtorch_variant"] = miniutils.quote(self.libtorch_variant)
        if phase == "test":
            if not self.smoke:
                job_def["requires"] = [self.gen_build_name("build", nightly)]
            if not (self.smoke and self.os == "macos") and self.os != "windows":
                job_def["docker_image"] = self.gen_docker_image()
            # fix this. only works on cuda not rocm
            if self.os != "windows" and self.gpu_version:
                job_def["use_cuda_docker_runtime"] = miniutils.quote("1")
        else:
            if self.os == "linux" and phase != "upload":
                job_def["docker_image"] = self.gen_docker_image()
        if phase == "test":
            if self.gpu_version:
                if self.os == "windows":
                    job_def["executor"] = "windows-with-nvidia-gpu"
                else:
                    job_def["resource_class"] = "gpu.medium"
        os_name = miniutils.override(self.os, {"macos": "mac"})
        job_name = "_".join([self.get_name_prefix(), os_name, phase])
        return {job_name : job_def}
    def gen_upload_job(self, phase, requires_dependency):
        """Generate binary_upload job for configuration
        Output looks similar to:
      - binary_upload:
          name: binary_linux_manywheel_3_7m_cu113_devtoolset7_nightly_upload
          context: org-member
          requires: binary_linux_manywheel_3_7m_cu113_devtoolset7_nightly_test
          filters:
            branches:
              only:
                - nightly
            tags:
              only: /v[0-9]+(\\.[0-9]+)*-rc[0-9]+/
          package_type: manywheel
          upload_subfolder: cu113
        """
        return {
            "binary_upload": OrderedDict({
                "name": self.gen_build_name(phase, nightly=True),
                "context": "org-member",
                "requires": [self.gen_build_name(
                    requires_dependency,
                    nightly=True
                )],
                "filters": branch_filters.gen_filter_dict(
                    branches_list=["nightly"],
                    tags_list=[branch_filters.RC_PATTERN],
                ),
                "package_type": self.pydistro,
                "upload_subfolder": binary_build_data.get_processor_arch_name(
                    self.gpu_version,
                ),
            })
        }
 def get_root(smoke, name):
    return binary_build_data.TopLevelNode(
        name,
        binary_build_data.CONFIG_TREE_DATA,
        smoke,
    )
 def gen_build_env_list(smoke):
    root = get_root(smoke, "N/A")
    config_list = conf_tree.dfs(root)
    newlist = []
    for c in config_list:
        conf = Conf(
            c.find_prop("os_name"),
            c.find_prop("gpu"),
            c.find_prop("package_format"),
            [c.find_prop("pyver")],
            c.find_prop("smoke") and not (c.find_prop("os_name") == "macos_arm64"),  # don't test arm64
            c.find_prop("libtorch_variant"),
            c.find_prop("gcc_config_variant"),
            c.find_prop("libtorch_config_variant"),
        )
        newlist.append(conf)
    return newlist
 def predicate_exclude_macos(config):
    return config.os == "linux" or config.os == "windows"
 def get_nightly_uploads():
    configs = gen_build_env_list(False)
    mylist = []
    for conf in configs:
        phase_dependency = "test" if predicate_exclude_macos(conf) else "build"
        mylist.append(conf.gen_upload_job("upload", phase_dependency))
    return mylist
 def get_post_upload_jobs():
    return [
        {
            "update_s3_htmls": {
                "name": "update_s3_htmls",
                "context": "org-member",
                "filters": branch_filters.gen_filter_dict(
                    branches_list=["postnightly"],
                ),
            },
        },
    ]
 def get_nightly_tests():
    configs = gen_build_env_list(False)
    filtered_configs = filter(predicate_exclude_macos, configs)
    tests = []
    for conf_options in filtered_configs:
        yaml_item = conf_options.gen_workflow_job("test", nightly=True)
        tests.append(yaml_item)
    return tests
 def get_jobs(toplevel_key, smoke):
    jobs_list = []
    configs = gen_build_env_list(smoke)
    phase = "build" if toplevel_key == "binarybuilds" else "test"
    for build_config in configs:
        # don't test for macos_arm64 as it's cross compiled
        if phase != "test" or build_config.os != "macos_arm64":
            jobs_list.append(build_config.gen_workflow_job(phase, nightly=True))
    return jobs_list
 def get_binary_build_jobs():
    return get_jobs("binarybuilds", False)
 def get_binary_smoke_test_jobs():
    return get_jobs("binarysmoketests", True)
--- a/.circleci/cimodel/data/dimensions.py
+++ b/.circleci/cimodel/data/dimensions.py
@ -0,0 +1,23 @@
 PHASES = ["build", "test"]
 CUDA_VERSIONS = [
    "102",
    "113",
    "116",
 ]
 ROCM_VERSIONS = [
    "4.3.1",
    "4.5.2",
 ]
 ROCM_VERSION_LABELS = ["rocm" + v for v in ROCM_VERSIONS]
 GPU_VERSIONS = [None] + ["cuda" + v for v in CUDA_VERSIONS] + ROCM_VERSION_LABELS
 STANDARD_PYTHON_VERSIONS = [
    "3.7",
    "3.8",
    "3.9",
    "3.10"
 ]
--- a/.circleci/cimodel/data/pytorch_build_data.py
+++ b/.circleci/cimodel/data/pytorch_build_data.py
@ -0,0 +1,280 @@
 from cimodel.lib.conf_tree import ConfigNode
 CONFIG_TREE_DATA = [
 ]
 def get_major_pyver(dotted_version):
    parts = dotted_version.split(".")
    return "py" + parts[0]
 class TreeConfigNode(ConfigNode):
    def __init__(self, parent, node_name, subtree):
        super(TreeConfigNode, self).__init__(parent, self.modify_label(node_name))
        self.subtree = subtree
        self.init2(node_name)
    def modify_label(self, label):
        return label
    def init2(self, node_name):
        pass
    def get_children(self):
        return [self.child_constructor()(self, k, v) for (k, v) in self.subtree]
 class TopLevelNode(TreeConfigNode):
    def __init__(self, node_name, subtree):
        super(TopLevelNode, self).__init__(None, node_name, subtree)
    # noinspection PyMethodMayBeStatic
    def child_constructor(self):
        return DistroConfigNode
 class DistroConfigNode(TreeConfigNode):
    def init2(self, node_name):
        self.props["distro_name"] = node_name
    def child_constructor(self):
        distro = self.find_prop("distro_name")
        next_nodes = {
            "xenial": XenialCompilerConfigNode,
            "bionic": BionicCompilerConfigNode,
        }
        return next_nodes[distro]
 class PyVerConfigNode(TreeConfigNode):
    def init2(self, node_name):
        self.props["pyver"] = node_name
        self.props["abbreviated_pyver"] = get_major_pyver(node_name)
        if node_name == "3.9":
            self.props["abbreviated_pyver"] = "py3.9"
    # noinspection PyMethodMayBeStatic
    def child_constructor(self):
        return ExperimentalFeatureConfigNode
 class ExperimentalFeatureConfigNode(TreeConfigNode):
    def init2(self, node_name):
        self.props["experimental_feature"] = node_name
    def child_constructor(self):
        experimental_feature = self.find_prop("experimental_feature")
        next_nodes = {
            "asan": AsanConfigNode,
            "xla": XlaConfigNode,
            "mps": MPSConfigNode,
            "vulkan": VulkanConfigNode,
            "parallel_tbb": ParallelTBBConfigNode,
            "crossref": CrossRefConfigNode,
            "parallel_native": ParallelNativeConfigNode,
            "onnx": ONNXConfigNode,
            "libtorch": LibTorchConfigNode,
            "important": ImportantConfigNode,
            "build_only": BuildOnlyConfigNode,
            "shard_test": ShardTestConfigNode,
            "cuda_gcc_override": CudaGccOverrideConfigNode,
            "pure_torch": PureTorchConfigNode,
            "slow_gradcheck": SlowGradcheckConfigNode,
        }
        return next_nodes[experimental_feature]
 class SlowGradcheckConfigNode(TreeConfigNode):
    def init2(self, node_name):
        self.props["is_slow_gradcheck"] = True
    def child_constructor(self):
        return ExperimentalFeatureConfigNode
 class PureTorchConfigNode(TreeConfigNode):
    def modify_label(self, label):
        return "PURE_TORCH=" + str(label)
    def init2(self, node_name):
        self.props["is_pure_torch"] = node_name
    def child_constructor(self):
        return ImportantConfigNode
 class XlaConfigNode(TreeConfigNode):
    def modify_label(self, label):
        return "XLA=" + str(label)
    def init2(self, node_name):
        self.props["is_xla"] = node_name
    def child_constructor(self):
        return ImportantConfigNode
 class MPSConfigNode(TreeConfigNode):
    def modify_label(self, label):
        return "MPS=" + str(label)
    def init2(self, node_name):
        self.props["is_mps"] = node_name
    def child_constructor(self):
        return ImportantConfigNode
 class AsanConfigNode(TreeConfigNode):
    def modify_label(self, label):
        return "Asan=" + str(label)
    def init2(self, node_name):
        self.props["is_asan"] = node_name
    def child_constructor(self):
        return ExperimentalFeatureConfigNode
 class ONNXConfigNode(TreeConfigNode):
    def modify_label(self, label):
        return "Onnx=" + str(label)
    def init2(self, node_name):
        self.props["is_onnx"] = node_name
    def child_constructor(self):
        return ImportantConfigNode
 class VulkanConfigNode(TreeConfigNode):
    def modify_label(self, label):
        return "Vulkan=" + str(label)
    def init2(self, node_name):
        self.props["is_vulkan"] = node_name
    def child_constructor(self):
        return ImportantConfigNode
 class ParallelTBBConfigNode(TreeConfigNode):
    def modify_label(self, label):
        return "PARALLELTBB=" + str(label)
    def init2(self, node_name):
        self.props["parallel_backend"] = "paralleltbb"
    def child_constructor(self):
        return ImportantConfigNode
 class CrossRefConfigNode(TreeConfigNode):
    def init2(self, node_name):
        self.props["is_crossref"] = node_name
    def child_constructor(self):
        return ImportantConfigNode
 class ParallelNativeConfigNode(TreeConfigNode):
    def modify_label(self, label):
        return "PARALLELNATIVE=" + str(label)
    def init2(self, node_name):
        self.props["parallel_backend"] = "parallelnative"
    def child_constructor(self):
        return ImportantConfigNode
 class LibTorchConfigNode(TreeConfigNode):
    def modify_label(self, label):
        return "BUILD_TEST_LIBTORCH=" + str(label)
    def init2(self, node_name):
        self.props["is_libtorch"] = node_name
    def child_constructor(self):
        return ExperimentalFeatureConfigNode
 class CudaGccOverrideConfigNode(TreeConfigNode):
    def init2(self, node_name):
        self.props["cuda_gcc_override"] = node_name
    def child_constructor(self):
        return ExperimentalFeatureConfigNode
 class BuildOnlyConfigNode(TreeConfigNode):
    def init2(self, node_name):
        self.props["build_only"] = node_name
    def child_constructor(self):
        return ExperimentalFeatureConfigNode
 class ShardTestConfigNode(TreeConfigNode):
    def init2(self, node_name):
        self.props["shard_test"] = node_name
    def child_constructor(self):
        return ImportantConfigNode
 class ImportantConfigNode(TreeConfigNode):
    def modify_label(self, label):
        return "IMPORTANT=" + str(label)
    def init2(self, node_name):
        self.props["is_important"] = node_name
    def get_children(self):
        return []
 class XenialCompilerConfigNode(TreeConfigNode):
    def modify_label(self, label):
        return label or "<unspecified>"
    def init2(self, node_name):
        self.props["compiler_name"] = node_name
    # noinspection PyMethodMayBeStatic
    def child_constructor(self):
        return XenialCompilerVersionConfigNode if self.props["compiler_name"] else PyVerConfigNode
 class BionicCompilerConfigNode(TreeConfigNode):
    def modify_label(self, label):
        return label or "<unspecified>"
    def init2(self, node_name):
        self.props["compiler_name"] = node_name
    # noinspection PyMethodMayBeStatic
    def child_constructor(self):
        return BionicCompilerVersionConfigNode if self.props["compiler_name"] else PyVerConfigNode
 class XenialCompilerVersionConfigNode(TreeConfigNode):
    def init2(self, node_name):
        self.props["compiler_version"] = node_name
    # noinspection PyMethodMayBeStatic
    def child_constructor(self):
        return PyVerConfigNode
 class BionicCompilerVersionConfigNode(TreeConfigNode):
    def init2(self, node_name):
        self.props["compiler_version"] = node_name
    # noinspection PyMethodMayBeStatic
    def child_constructor(self):
        return PyVerConfigNode
--- a/.circleci/cimodel/data/pytorch_build_definitions.py
+++ b/.circleci/cimodel/data/pytorch_build_definitions.py
@ -0,0 +1,380 @@
 from collections import OrderedDict
 from dataclasses import dataclass, field
 from typing import List, Optional
 import cimodel.data.dimensions as dimensions
 import cimodel.lib.conf_tree as conf_tree
 import cimodel.lib.miniutils as miniutils
 from cimodel.data.pytorch_build_data import CONFIG_TREE_DATA, TopLevelNode
 from cimodel.data.simple.util.branch_filters import gen_filter_dict, RC_PATTERN
 from cimodel.data.simple.util.docker_constants import gen_docker_image
@dataclass
 class Conf:
    distro: str
    parms: List[str]
    parms_list_ignored_for_docker_image: Optional[List[str]] = None
    pyver: Optional[str] = None
    cuda_version: Optional[str] = None
    rocm_version: Optional[str] = None
    # TODO expand this to cover all the USE_* that we want to test for
    #  tesnrorrt, leveldb, lmdb, redis, opencv, mkldnn, ideep, etc.
    # (from https://github.com/pytorch/pytorch/pull/17323#discussion_r259453608)
    is_xla: bool = False
    is_vulkan: bool = False
    is_pure_torch: bool = False
    restrict_phases: Optional[List[str]] = None
    gpu_resource: Optional[str] = None
    dependent_tests: List = field(default_factory=list)
    parent_build: Optional["Conf"] = None
    is_libtorch: bool = False
    is_important: bool = False
    parallel_backend: Optional[str] = None
    build_only: bool = False
    @staticmethod
    def is_test_phase(phase):
        return "test" in phase
    # TODO: Eliminate the special casing for docker paths
    # In the short term, we *will* need to support special casing as docker images are merged for caffe2 and pytorch
    def get_parms(self, for_docker):
        leading = []
        # We just don't run non-important jobs on pull requests;
        # previously we also named them in a way to make it obvious
        # if self.is_important and not for_docker:
        #    leading.append("AAA")
        leading.append("pytorch")
        if self.is_xla and not for_docker:
            leading.append("xla")
        if self.is_vulkan and not for_docker:
            leading.append("vulkan")
        if self.is_libtorch and not for_docker:
            leading.append("libtorch")
        if self.is_pure_torch and not for_docker:
            leading.append("pure_torch")
        if self.parallel_backend is not None and not for_docker:
            leading.append(self.parallel_backend)
        cuda_parms = []
        if self.cuda_version:
            cudnn = "cudnn8" if self.cuda_version.startswith("11.") else "cudnn7"
            cuda_parms.extend(["cuda" + self.cuda_version, cudnn])
        if self.rocm_version:
            cuda_parms.extend([f"rocm{self.rocm_version}"])
        result = leading + ["linux", self.distro] + cuda_parms + self.parms
        if not for_docker and self.parms_list_ignored_for_docker_image is not None:
            result = result + self.parms_list_ignored_for_docker_image
        return result
    def gen_docker_image_path(self):
        parms_source = self.parent_build or self
        base_build_env_name = "-".join(parms_source.get_parms(True))
        image_name, _ = gen_docker_image(base_build_env_name)
        return miniutils.quote(image_name)
    def gen_docker_image_requires(self):
        parms_source = self.parent_build or self
        base_build_env_name = "-".join(parms_source.get_parms(True))
        _, requires = gen_docker_image(base_build_env_name)
        return miniutils.quote(requires)
    def get_build_job_name_pieces(self, build_or_test):
        return self.get_parms(False) + [build_or_test]
    def gen_build_name(self, build_or_test):
        return (
            ("_".join(map(str, self.get_build_job_name_pieces(build_or_test))))
            .replace(".", "_")
            .replace("-", "_")
        )
    def get_dependents(self):
        return self.dependent_tests or []
    def gen_workflow_params(self, phase):
        parameters = OrderedDict()
        build_job_name_pieces = self.get_build_job_name_pieces(phase)
        build_env_name = "-".join(map(str, build_job_name_pieces))
        parameters["build_environment"] = miniutils.quote(build_env_name)
        parameters["docker_image"] = self.gen_docker_image_path()
        if Conf.is_test_phase(phase) and self.gpu_resource:
            parameters["use_cuda_docker_runtime"] = miniutils.quote("1")
        if Conf.is_test_phase(phase):
            resource_class = "large"
            if self.gpu_resource:
                resource_class = "gpu." + self.gpu_resource
            if self.rocm_version is not None:
                resource_class = "pytorch/amd-gpu"
            parameters["resource_class"] = resource_class
        if phase == "build" and self.rocm_version is not None:
            parameters["resource_class"] = "xlarge"
        if hasattr(self, 'filters'):
            parameters['filters'] = self.filters
        if self.build_only:
            parameters['build_only'] = miniutils.quote(str(int(True)))
        return parameters
    def gen_workflow_job(self, phase):
        job_def = OrderedDict()
        job_def["name"] = self.gen_build_name(phase)
        if Conf.is_test_phase(phase):
            # TODO When merging the caffe2 and pytorch jobs, it might be convenient for a while to make a
            #  caffe2 test job dependent on a pytorch build job. This way we could quickly dedup the repeated
            #  build of pytorch in the caffe2 build job, and just run the caffe2 tests off of a completed
            #  pytorch build job (from https://github.com/pytorch/pytorch/pull/17323#discussion_r259452641)
            dependency_build = self.parent_build or self
            job_def["requires"] = [dependency_build.gen_build_name("build")]
            job_name = "pytorch_linux_test"
        else:
            job_name = "pytorch_linux_build"
            job_def["requires"] = [self.gen_docker_image_requires()]
        if not self.is_important:
            job_def["filters"] = gen_filter_dict()
        job_def.update(self.gen_workflow_params(phase))
        return {job_name: job_def}
 # TODO This is a hack to special case some configs just for the workflow list
 class HiddenConf(object):
    def __init__(self, name, parent_build=None, filters=None):
        self.name = name
        self.parent_build = parent_build
        self.filters = filters
    def gen_workflow_job(self, phase):
        return {
            self.gen_build_name(phase): {
                "requires": [self.parent_build.gen_build_name("build")],
                "filters": self.filters,
            }
        }
    def gen_build_name(self, _):
        return self.name
 class DocPushConf(object):
    def __init__(self, name, parent_build=None, branch="master"):
        self.name = name
        self.parent_build = parent_build
        self.branch = branch
    def gen_workflow_job(self, phase):
        return {
            "pytorch_doc_push": {
                "name": self.name,
                "branch": self.branch,
                "requires": [self.parent_build],
                "context": "org-member",
                "filters": gen_filter_dict(branches_list=["nightly"],
                                           tags_list=RC_PATTERN)
            }
        }
 def gen_docs_configs(xenial_parent_config):
    configs = []
    configs.append(
        HiddenConf(
            "pytorch_python_doc_build",
            parent_build=xenial_parent_config,
            filters=gen_filter_dict(branches_list=["master", "main", "nightly"],
                                    tags_list=RC_PATTERN),
        )
    )
    configs.append(
        DocPushConf(
            "pytorch_python_doc_push",
            parent_build="pytorch_python_doc_build",
            branch="site",
        )
    )
    configs.append(
        HiddenConf(
            "pytorch_cpp_doc_build",
            parent_build=xenial_parent_config,
            filters=gen_filter_dict(branches_list=["master", "main", "nightly"],
                                    tags_list=RC_PATTERN),
        )
    )
    configs.append(
        DocPushConf(
            "pytorch_cpp_doc_push",
            parent_build="pytorch_cpp_doc_build",
            branch="master",
        )
    )
    return configs
 def get_root():
    return TopLevelNode("PyTorch Builds", CONFIG_TREE_DATA)
 def gen_tree():
    root = get_root()
    configs_list = conf_tree.dfs(root)
    return configs_list
 def instantiate_configs(only_slow_gradcheck):
    config_list = []
    root = get_root()
    found_configs = conf_tree.dfs(root)
    for fc in found_configs:
        restrict_phases = None
        distro_name = fc.find_prop("distro_name")
        compiler_name = fc.find_prop("compiler_name")
        compiler_version = fc.find_prop("compiler_version")
        is_xla = fc.find_prop("is_xla") or False
        is_asan = fc.find_prop("is_asan") or False
        is_crossref = fc.find_prop("is_crossref") or False
        is_onnx = fc.find_prop("is_onnx") or False
        is_pure_torch = fc.find_prop("is_pure_torch") or False
        is_vulkan = fc.find_prop("is_vulkan") or False
        is_slow_gradcheck = fc.find_prop("is_slow_gradcheck") or False
        parms_list_ignored_for_docker_image = []
        if only_slow_gradcheck ^ is_slow_gradcheck:
            continue
        python_version = None
        if compiler_name == "cuda" or compiler_name == "android":
            python_version = fc.find_prop("pyver")
            parms_list = [fc.find_prop("abbreviated_pyver")]
        else:
            parms_list = ["py" + fc.find_prop("pyver")]
        cuda_version = None
        rocm_version = None
        if compiler_name == "cuda":
            cuda_version = fc.find_prop("compiler_version")
        elif compiler_name == "rocm":
            rocm_version = fc.find_prop("compiler_version")
            restrict_phases = ["build", "test1", "test2", "caffe2_test"]
        elif compiler_name == "android":
            android_ndk_version = fc.find_prop("compiler_version")
            # TODO: do we need clang to compile host binaries like protoc?
            parms_list.append("clang5")
            parms_list.append("android-ndk-" + android_ndk_version)
            android_abi = fc.find_prop("android_abi")
            parms_list_ignored_for_docker_image.append(android_abi)
            restrict_phases = ["build"]
        elif compiler_name:
            gcc_version = compiler_name + (fc.find_prop("compiler_version") or "")
            parms_list.append(gcc_version)
        if is_asan:
            parms_list.append("asan")
            python_version = fc.find_prop("pyver")
            parms_list[0] = fc.find_prop("abbreviated_pyver")
        if is_crossref:
            parms_list_ignored_for_docker_image.append("crossref")
        if is_onnx:
            parms_list.append("onnx")
            python_version = fc.find_prop("pyver")
            parms_list[0] = fc.find_prop("abbreviated_pyver")
            restrict_phases = ["build", "ort_test1", "ort_test2"]
        if cuda_version:
            cuda_gcc_version = fc.find_prop("cuda_gcc_override") or "gcc7"
            parms_list.append(cuda_gcc_version)
        is_libtorch = fc.find_prop("is_libtorch") or False
        is_important = fc.find_prop("is_important") or False
        parallel_backend = fc.find_prop("parallel_backend") or None
        build_only = fc.find_prop("build_only") or False
        shard_test = fc.find_prop("shard_test") or False
        # TODO: fix pure_torch python test packaging issue.
        if shard_test:
            restrict_phases = ["build"] if restrict_phases is None else restrict_phases
            restrict_phases.extend(["test1", "test2"])
        if build_only or is_pure_torch:
            restrict_phases = ["build"]
        if is_slow_gradcheck:
            parms_list_ignored_for_docker_image.append("old")
            parms_list_ignored_for_docker_image.append("gradcheck")
        gpu_resource = None
        if cuda_version and cuda_version != "10":
            gpu_resource = "medium"
        c = Conf(
            distro_name,
            parms_list,
            parms_list_ignored_for_docker_image,
            python_version,
            cuda_version,
            rocm_version,
            is_xla,
            is_vulkan,
            is_pure_torch,
            restrict_phases,
            gpu_resource,
            is_libtorch=is_libtorch,
            is_important=is_important,
            parallel_backend=parallel_backend,
            build_only=build_only,
        )
        # run docs builds on "pytorch-linux-xenial-py3.7-gcc5.4". Docs builds
        # should run on a CPU-only build that runs on all PRs.
        # XXX should this be updated to a more modern build?
        if (
            distro_name == "xenial"
            and fc.find_prop("pyver") == "3.7"
            and cuda_version is None
            and parallel_backend is None
            and not is_vulkan
            and not is_pure_torch
            and compiler_name == "gcc"
            and fc.find_prop("compiler_version") == "5.4"
        ):
            c.filters = gen_filter_dict(branches_list=r"/.*/",
                                        tags_list=RC_PATTERN)
            c.dependent_tests = gen_docs_configs(c)
        config_list.append(c)
    return config_list
 def get_workflow_jobs(only_slow_gradcheck=False):
    config_list = instantiate_configs(only_slow_gradcheck)
    x = []
    for conf_options in config_list:
        phases = conf_options.restrict_phases or dimensions.PHASES
        for phase in phases:
            # TODO why does this not have a test?
            if Conf.is_test_phase(phase) and conf_options.cuda_version == "10":
                continue
            x.append(conf_options.gen_workflow_job(phase))
        # TODO convert to recursion
        for conf in conf_options.get_dependents():
            x.append(conf.gen_workflow_job("test"))
    return x
--- a/.circleci/cimodel/data/simple/init.py
+++ b/.circleci/cimodel/data/simple/init.py
--- a/.circleci/cimodel/data/simple/anaconda_prune_defintions.py
+++ b/.circleci/cimodel/data/simple/anaconda_prune_defintions.py
@ -0,0 +1,28 @@
 from collections import OrderedDict
 from cimodel.data.simple.util.branch_filters import gen_filter_dict
 from cimodel.lib.miniutils import quote
 CHANNELS_TO_PRUNE = ["pytorch-nightly", "pytorch-test"]
 PACKAGES_TO_PRUNE = "pytorch torchvision torchaudio torchtext ignite torchcsprng"
 def gen_workflow_job(channel: str):
    return OrderedDict(
        {
            "anaconda_prune": OrderedDict(
                {
                    "name": f"anaconda-prune-{channel}",
                    "context": quote("org-member"),
                    "packages": quote(PACKAGES_TO_PRUNE),
                    "channel": channel,
                    "filters": gen_filter_dict(branches_list=["postnightly"]),
                }
            )
        }
    )
 def get_workflow_jobs():
    return [gen_workflow_job(channel) for channel in CHANNELS_TO_PRUNE]
--- a/.circleci/cimodel/data/simple/docker_definitions.py
+++ b/.circleci/cimodel/data/simple/docker_definitions.py
@ -0,0 +1,39 @@
 from collections import OrderedDict
 from cimodel.lib.miniutils import quote
 from cimodel.data.simple.util.branch_filters import gen_filter_dict, RC_PATTERN
 # NOTE: All hardcoded docker image builds have been migrated to GHA
 IMAGE_NAMES = [
 ]
 # This entry should be an element from the list above
 # This should contain the image matching the "slow_gradcheck" entry in
 # pytorch_build_data.py
 SLOW_GRADCHECK_IMAGE_NAME = "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
 def get_workflow_jobs(images=IMAGE_NAMES, only_slow_gradcheck=False):
    """Generates a list of docker image build definitions"""
    ret = []
    for image_name in images:
        if image_name.startswith('docker-'):
            image_name = image_name.lstrip('docker-')
        if only_slow_gradcheck and image_name is not SLOW_GRADCHECK_IMAGE_NAME:
            continue
        parameters = OrderedDict({
            "name": quote(f"docker-{image_name}"),
            "image_name": quote(image_name),
        })
        if image_name == "pytorch-linux-xenial-py3.7-gcc5.4":
            # pushing documentation on tags requires CircleCI to also
            # build all the dependencies on tags, including this docker image
            parameters['filters'] = gen_filter_dict(branches_list=r"/.*/",
                                                    tags_list=RC_PATTERN)
        ret.append(OrderedDict(
            {
                "docker_build_job": parameters
            }
        ))
    return ret
--- a/.circleci/cimodel/data/simple/ios_definitions.py
+++ b/.circleci/cimodel/data/simple/ios_definitions.py
@ -0,0 +1,88 @@
 from cimodel.data.simple.util.versions import MultiPartVersion
 import cimodel.lib.miniutils as miniutils
 XCODE_VERSION = MultiPartVersion([12, 5, 1])
 class ArchVariant:
    def __init__(self, name, custom_build_name=""):
        self.name = name
        self.custom_build_name = custom_build_name
    def render(self):
        extra_parts = [self.custom_build_name] if len(self.custom_build_name) > 0 else []
        return "_".join([self.name] + extra_parts)
 def get_platform(arch_variant_name):
    return "SIMULATOR" if arch_variant_name == "x86_64" else "OS"
 class IOSJob:
    def __init__(self, xcode_version, arch_variant, is_org_member_context=True, extra_props=None):
        self.xcode_version = xcode_version
        self.arch_variant = arch_variant
        self.is_org_member_context = is_org_member_context
        self.extra_props = extra_props
    def gen_name_parts(self, with_version_dots):
        version_parts = self.xcode_version.render_dots_or_parts(with_version_dots)
        build_variant_suffix = "_".join([self.arch_variant.render(), "build"])
        return [
            "pytorch",
            "ios",
        ] + version_parts + [
            build_variant_suffix,
        ]
    def gen_job_name(self):
        return "_".join(self.gen_name_parts(False))
    def gen_tree(self):
        platform_name = get_platform(self.arch_variant.name)
        props_dict = {
            "build_environment": "-".join(self.gen_name_parts(True)),
            "ios_arch": self.arch_variant.name,
            "ios_platform": platform_name,
            "name": self.gen_job_name(),
        }
        if self.is_org_member_context:
            props_dict["context"] = "org-member"
        if self.extra_props:
            props_dict.update(self.extra_props)
        return [{"pytorch_ios_build": props_dict}]
 WORKFLOW_DATA = [
    IOSJob(XCODE_VERSION, ArchVariant("x86_64"), is_org_member_context=False, extra_props={
        "lite_interpreter": miniutils.quote(str(int(True)))}),
    IOSJob(XCODE_VERSION, ArchVariant("x86_64", "full_jit"), is_org_member_context=False, extra_props={
        "lite_interpreter": miniutils.quote(str(int(False)))}),
    IOSJob(XCODE_VERSION, ArchVariant("arm64"), extra_props={
        "lite_interpreter": miniutils.quote(str(int(True)))}),
    IOSJob(XCODE_VERSION, ArchVariant("arm64", "metal"), extra_props={
        "use_metal": miniutils.quote(str(int(True))),
        "lite_interpreter": miniutils.quote(str(int(True)))}),
    IOSJob(XCODE_VERSION, ArchVariant("arm64", "full_jit"), extra_props={
        "lite_interpreter": miniutils.quote(str(int(False)))}),
    IOSJob(XCODE_VERSION, ArchVariant("arm64", "custom"), extra_props={
        "op_list": "mobilenetv2.yaml",
        "lite_interpreter": miniutils.quote(str(int(True)))}),
    IOSJob(XCODE_VERSION, ArchVariant("x86_64", "coreml"), is_org_member_context=False, extra_props={
        "use_coreml": miniutils.quote(str(int(True))),
        "lite_interpreter": miniutils.quote(str(int(True)))}),
    IOSJob(XCODE_VERSION, ArchVariant("arm64", "coreml"), extra_props={
        "use_coreml": miniutils.quote(str(int(True))),
        "lite_interpreter": miniutils.quote(str(int(True)))}),
 ]
 def get_workflow_jobs():
    return [item.gen_tree() for item in WORKFLOW_DATA]
--- a/.circleci/cimodel/data/simple/macos_definitions.py
+++ b/.circleci/cimodel/data/simple/macos_definitions.py
@ -0,0 +1,52 @@
 class MacOsJob:
    def __init__(self, os_version, is_build=False, is_test=False, extra_props=tuple()):
        # extra_props is tuple type, because mutable data structures for argument defaults
        # is not recommended.
        self.os_version = os_version
        self.is_build = is_build
        self.is_test = is_test
        self.extra_props = dict(extra_props)
    def gen_tree(self):
        non_phase_parts = ["pytorch", "macos", self.os_version, "py3"]
        extra_name_list = [name for name, exist in self.extra_props.items() if exist]
        full_job_name_list = non_phase_parts + extra_name_list + [
            'build' if self.is_build else None,
            'test' if self.is_test else None,
        ]
        full_job_name = "_".join(list(filter(None, full_job_name_list)))
        test_build_dependency = "_".join(non_phase_parts + ["build"])
        extra_dependencies = [test_build_dependency] if self.is_test else []
        job_dependencies = extra_dependencies
        # Yes we name the job after itself, it needs a non-empty value in here
        # for the YAML output to work.
        props_dict = {"requires": job_dependencies, "name": full_job_name}
        return [{full_job_name: props_dict}]
 WORKFLOW_DATA = [
    MacOsJob("10_15", is_build=True),
    MacOsJob("10_13", is_build=True),
    MacOsJob(
        "10_13",
        is_build=False,
        is_test=True,
    ),
    MacOsJob(
        "10_13",
        is_build=True,
        is_test=True,
        extra_props=tuple({
            "lite_interpreter": True
        }.items()),
    )
 ]
 def get_workflow_jobs():
    return [item.gen_tree() for item in WORKFLOW_DATA]
--- a/.circleci/cimodel/data/simple/mobile_definitions.py
+++ b/.circleci/cimodel/data/simple/mobile_definitions.py
@ -0,0 +1,53 @@
 """
 PyTorch Mobile PR builds (use linux host toolchain + mobile build options)
 """
 import cimodel.lib.miniutils as miniutils
 import cimodel.data.simple.util.branch_filters
 class MobileJob:
    def __init__(
            self,
            docker_image,
            docker_requires,
            variant_parts,
            is_master_only=False):
        self.docker_image = docker_image
        self.docker_requires = docker_requires
        self.variant_parts = variant_parts
        self.is_master_only = is_master_only
    def gen_tree(self):
        non_phase_parts = [
            "pytorch",
            "linux",
            "xenial",
            "py3",
            "clang5",
            "mobile",
        ] + self.variant_parts
        full_job_name = "_".join(non_phase_parts)
        build_env_name = "-".join(non_phase_parts)
        props_dict = {
            "build_environment": build_env_name,
            "build_only": miniutils.quote(str(int(True))),
            "docker_image": self.docker_image,
            "requires": self.docker_requires,
            "name": full_job_name,
        }
        if self.is_master_only:
            props_dict["filters"] = cimodel.data.simple.util.branch_filters.gen_filter_dict()
        return [{"pytorch_linux_build": props_dict}]
 WORKFLOW_DATA = [
 ]
 def get_workflow_jobs():
    return [item.gen_tree() for item in WORKFLOW_DATA]
--- a/.circleci/cimodel/data/simple/nightly_ios.py
+++ b/.circleci/cimodel/data/simple/nightly_ios.py
@ -0,0 +1,85 @@
 import cimodel.data.simple.ios_definitions as ios_definitions
 import cimodel.lib.miniutils as miniutils
 class IOSNightlyJob:
    def __init__(self,
                 variant,
                 is_full_jit=False,
                 is_upload=False):
        self.variant = variant
        self.is_full_jit = is_full_jit
        self.is_upload = is_upload
    def get_phase_name(self):
        return "upload" if self.is_upload else "build"
    def get_common_name_pieces(self, with_version_dots):
        extra_name_suffix = [self.get_phase_name()] if self.is_upload else []
        extra_name = ["full_jit"] if self.is_full_jit else []
        common_name_pieces = [
            "ios",
        ] + extra_name + [
        ] + ios_definitions.XCODE_VERSION.render_dots_or_parts(with_version_dots) + [
            "nightly",
            self.variant,
            "build",
        ] + extra_name_suffix
        return common_name_pieces
    def gen_job_name(self):
        return "_".join(["pytorch"] + self.get_common_name_pieces(False))
    def gen_tree(self):
        build_configs = BUILD_CONFIGS_FULL_JIT if self.is_full_jit else BUILD_CONFIGS
        extra_requires = [x.gen_job_name() for x in build_configs] if self.is_upload else []
        props_dict = {
            "build_environment": "-".join(["libtorch"] + self.get_common_name_pieces(True)),
            "requires": extra_requires,
            "context": "org-member",
            "filters": {"branches": {"only": "nightly"}},
        }
        if not self.is_upload:
            props_dict["ios_arch"] = self.variant
            props_dict["ios_platform"] = ios_definitions.get_platform(self.variant)
            props_dict["name"] = self.gen_job_name()
            props_dict["use_metal"] = miniutils.quote(str(int(True)))
            props_dict["use_coreml"] = miniutils.quote(str(int(True)))
        if self.is_full_jit:
            props_dict["lite_interpreter"] = miniutils.quote(str(int(False)))
        template_name = "_".join([
            "binary",
            "ios",
            self.get_phase_name(),
        ])
        return [{template_name: props_dict}]
 BUILD_CONFIGS = [
    IOSNightlyJob("x86_64"),
    IOSNightlyJob("arm64"),
 ]
 BUILD_CONFIGS_FULL_JIT = [
    IOSNightlyJob("x86_64", is_full_jit=True),
    IOSNightlyJob("arm64", is_full_jit=True),
 ]
 WORKFLOW_DATA = BUILD_CONFIGS + BUILD_CONFIGS_FULL_JIT + [
    IOSNightlyJob("binary", is_full_jit=False, is_upload=True),
    IOSNightlyJob("binary", is_full_jit=True, is_upload=True),
 ]
 def get_workflow_jobs():
    return [item.gen_tree() for item in WORKFLOW_DATA]
--- a/.circleci/cimodel/data/simple/util/init.py
+++ b/.circleci/cimodel/data/simple/util/init.py
--- a/.circleci/cimodel/data/simple/util/branch_filters.py
+++ b/.circleci/cimodel/data/simple/util/branch_filters.py
@ -0,0 +1,28 @@
 NON_PR_BRANCH_LIST = [
    "main",
    "master",
    r"/ci-all\/.*/",
    r"/release\/.*/",
 ]
 PR_BRANCH_LIST = [
    r"/gh\/.*\/head/",
    r"/pull\/.*/",
 ]
 RC_PATTERN = r"/v[0-9]+(\.[0-9]+)*-rc[0-9]+/"
 def gen_filter_dict(
        branches_list=NON_PR_BRANCH_LIST,
        tags_list=None
 ):
    """Generates a filter dictionary for use with CircleCI's job filter"""
    filter_dict = {
        "branches": {
            "only": branches_list,
        },
    }
    if tags_list is not None:
        filter_dict["tags"] = {"only": tags_list}
    return filter_dict
--- a/.circleci/cimodel/data/simple/util/docker_constants.py
+++ b/.circleci/cimodel/data/simple/util/docker_constants.py
@ -0,0 +1,33 @@
 AWS_DOCKER_HOST = "308535385114.dkr.ecr.us-east-1.amazonaws.com"
 def gen_docker_image(container_type):
    return (
        "/".join([AWS_DOCKER_HOST, "pytorch", container_type]),
        f"docker-{container_type}",
    )
 def gen_docker_image_requires(image_name):
    return [f"docker-{image_name}"]
 DOCKER_IMAGE_BASIC, DOCKER_REQUIREMENT_BASE = gen_docker_image(
    "pytorch-linux-xenial-py3.7-gcc5.4"
 )
 DOCKER_IMAGE_CUDA_10_2, DOCKER_REQUIREMENT_CUDA_10_2 = gen_docker_image(
    "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
 )
 DOCKER_IMAGE_GCC7, DOCKER_REQUIREMENT_GCC7 = gen_docker_image(
    "pytorch-linux-xenial-py3.7-gcc7"
 )
 def gen_mobile_docker(specifier):
    container_type = "pytorch-linux-xenial-py3-clang5-" + specifier
    return gen_docker_image(container_type)
 DOCKER_IMAGE_ASAN, DOCKER_REQUIREMENT_ASAN = gen_mobile_docker("asan")
 DOCKER_IMAGE_NDK, DOCKER_REQUIREMENT_NDK = gen_mobile_docker("android-ndk-r19c")
--- a/.circleci/cimodel/data/simple/util/versions.py
+++ b/.circleci/cimodel/data/simple/util/versions.py
@ -0,0 +1,34 @@
 class MultiPartVersion:
    def __init__(self, parts, prefix=""):
        self.parts = parts
        self.prefix = prefix
    def prefixed_parts(self):
        """
        Prepends the first element of the version list
        with the prefix string.
        """
        if self.parts:
            return [self.prefix + str(self.parts[0])] + [str(part) for part in self.parts[1:]]
        else:
            return [self.prefix]
    def render_dots(self):
        return ".".join(self.prefixed_parts())
    def render_dots_or_parts(self, with_dots):
        if with_dots:
            return [self.render_dots()]
        else:
            return self.prefixed_parts()
 class CudaVersion(MultiPartVersion):
    def __init__(self, major, minor):
        self.major = major
        self.minor = minor
        super().__init__([self.major, self.minor], "cuda")
    def __str__(self):
        return f"{self.major}.{self.minor}"
--- a/.circleci/cimodel/lib/init.py
+++ b/.circleci/cimodel/lib/init.py
--- a/.circleci/cimodel/lib/conf_tree.py
+++ b/.circleci/cimodel/lib/conf_tree.py
@ -0,0 +1,107 @@
 from dataclasses import dataclass, field
 from typing import Optional, Dict
 def X(val):
    """
    Compact way to write a leaf node
    """
    return val, []
 def XImportant(name):
    """Compact way to write an important (run on PRs) leaf node"""
    return (name, [("important", [X(True)])])
@dataclass
 class Ver:
    """
    Represents a product with a version number
    """
    name: str
    version: str = ""
    def __str__(self):
        return self.name + self.version
@dataclass
 class ConfigNode:
    parent: Optional['ConfigNode']
    node_name: str
    props: Dict[str, str] = field(default_factory=dict)
    def get_label(self):
        return self.node_name
    # noinspection PyMethodMayBeStatic
    def get_children(self):
        return []
    def get_parents(self):
        return (self.parent.get_parents() + [self.parent.get_label()]) if self.parent else []
    def get_depth(self):
        return len(self.get_parents())
    def get_node_key(self):
        return "%".join(self.get_parents() + [self.get_label()])
    def find_prop(self, propname, searched=None):
        """
        Checks if its own dictionary has
        the property, otherwise asks parent node.
        """
        if searched is None:
            searched = []
        searched.append(self.node_name)
        if propname in self.props:
            return self.props[propname]
        elif self.parent:
            return self.parent.find_prop(propname, searched)
        else:
            # raise Exception('Property "%s" does not exist anywhere in the tree! Searched: %s' % (propname, searched))
            return None
 def dfs_recurse(
        node,
        leaf_callback=lambda x: None,
        discovery_callback=lambda x, y, z: None,
        child_callback=lambda x, y: None,
        sibling_index=0,
        sibling_count=1):
    discovery_callback(node, sibling_index, sibling_count)
    node_children = node.get_children()
    if node_children:
        for i, child in enumerate(node_children):
            child_callback(node, child)
            dfs_recurse(
                child,
                leaf_callback,
                discovery_callback,
                child_callback,
                i,
                len(node_children),
            )
    else:
        leaf_callback(node)
 def dfs(toplevel_config_node):
    config_list = []
    def leaf_callback(node):
        config_list.append(node)
    dfs_recurse(toplevel_config_node, leaf_callback)
    return config_list
--- a/.circleci/cimodel/lib/miniutils.py
+++ b/.circleci/cimodel/lib/miniutils.py
@ -0,0 +1,10 @@
 def quote(s):
    return sandwich('"', s)
 def sandwich(bread, jam):
    return bread + jam + bread
 def override(word, substitutions):
    return substitutions.get(word, word)
--- a/.circleci/cimodel/lib/miniyaml.py
+++ b/.circleci/cimodel/lib/miniyaml.py
@ -0,0 +1,52 @@
 from collections import OrderedDict
 import cimodel.lib.miniutils as miniutils
 LIST_MARKER = "- "
 INDENTATION_WIDTH = 2
 def is_dict(data):
    return type(data) in [dict, OrderedDict]
 def is_collection(data):
    return is_dict(data) or type(data) is list
 def render(fh, data, depth, is_list_member=False):
    """
    PyYaml does not allow precise control over the quoting
    behavior, especially for merge references.
    Therefore, we use this custom YAML renderer.
    """
    indentation = " " * INDENTATION_WIDTH * depth
    if is_dict(data):
        tuples = list(data.items())
        if type(data) is not OrderedDict:
            tuples.sort()
        for i, (k, v) in enumerate(tuples):
            if not v:
                continue
            # If this dict is itself a list member, the first key gets prefixed with a list marker
            list_marker_prefix = LIST_MARKER if is_list_member and not i else ""
            trailing_whitespace = "\n" if is_collection(v) else " "
            fh.write(indentation + list_marker_prefix + k + ":" + trailing_whitespace)
            render(fh, v, depth + 1 + int(is_list_member))
    elif type(data) is list:
        for v in data:
            render(fh, v, depth, True)
    else:
        # use empty quotes to denote an empty string value instead of blank space
        modified_data = miniutils.quote(data) if data == "" else data
        list_member_prefix = indentation + LIST_MARKER if is_list_member else ""
        fh.write(list_member_prefix + str(modified_data) + "\n")
--- a/.circleci/codegen_validation/normalize_yaml_fragment.py
+++ b/.circleci/codegen_validation/normalize_yaml_fragment.py
@ -2,11 +2,10 @@
 import os
 import sys
 import yaml
 # Need to import modules that lie on an upward-relative path
-sys.path.append(os.path.join(sys.path[0], ".."))
+sys.path.append(os.path.join(sys.path[0], '..'))
 import cimodel.lib.miniyaml as miniyaml
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
--- a/.circleci/docker/README.md
+++ b/.circleci/docker/README.md
@ -0,0 +1,31 @@
 # Docker images for Jenkins
 This directory contains everything needed to build the Docker images
 that are used in our CI
 The Dockerfiles located in subdirectories are parameterized to
 conditionally run build stages depending on build arguments passed to
 `docker build`. This lets us use only a few Dockerfiles for many
 images. The different configurations are identified by a freeform
 string that we call a _build environment_. This string is persisted in
 each image as the `BUILD_ENVIRONMENT` environment variable.
 See `build.sh` for valid build environments (it's the giant switch).
 Docker builds are now defined with `.circleci/cimodel/data/simple/docker_definitions.py`
 ## Contents
 * `build.sh` -- dispatch script to launch all builds
 * `common` -- scripts used to execute individual Docker build stages
 * `ubuntu-cuda` -- Dockerfile for Ubuntu image with CUDA support for nvidia-docker
 ## Usage
 ```bash
 # Build a specific image
 ./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
 # Set flags (see build.sh) and build image
 sudo bash -c 'PROTOBUF=1 ./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
 ```
--- a/.circleci/docker/android/AndroidManifest.xml
+++ b/.circleci/docker/android/AndroidManifest.xml
--- a/Show More
+++ b/Show More