fix formatting CIRCLE_TAG when building docs (#67026 ) (#69876 )

Summary: Similar to pytorch/text#1416 malfet, brianjo The previous code failed when tags changed from `v0.9.0` to `v0.10.0`. I tested this offline, it would be nice to somehow be actually tag the repo and see that this adds the correct documentation directory to the pytorch/pytorch.github.io repo. Pull Request resolved: https://github.com/pytorch/pytorch/pull/67026 Reviewed By: saketh-are Differential Revision: D31843381 Pulled By: malfet fbshipit-source-id: 21526ad9ed4c1751c2d7f6d621da305f166a7f55 Co-authored-by: mattip <matti.picus@gmail.com>
[release/1.10] Remove fgrad_input from slow_conv2d (#64280 ) (#69622 )
2025-11-03 23:45:05 +08:00 · 2021-12-14 09:24:18 -08:00 · 2021-12-10 11:42:03 -08:00 · 2021-12-10 11:41:40 -08:00 · 2021-12-09 08:59:45 -08:00 · 2021-12-09 07:09:08 -08:00
4430 changed files with 136909 additions and 410981 deletions
--- a/.azure_pipelines/job_templates/prepare-build-template.yml
+++ b/.azure_pipelines/job_templates/prepare-build-template.yml
@ -46,7 +46,7 @@ steps:
      curl -k https://s3.amazonaws.com/ossci-windows/sccache.exe --output .\tmp_bin\sccache.exe
      curl -k https://s3.amazonaws.com/ossci-windows/sccache-cl.exe --output .\tmp_bin\sccache-cl.exe
      copy .\tmp_bin\sccache.exe .\tmp_bin\nvcc.exe
-      curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.4/randomtemp.exe --output .\tmp_bin\randomtemp.exe
+      curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.3/randomtemp.exe --output .\tmp_bin\randomtemp.exe
    displayName: Install sccache and randomtemp
    condition: not(eq(variables.CUDA_VERSION, ''))

--- a/.azure_pipelines/job_templates/set-environment-variables.yml
+++ b/.azure_pipelines/job_templates/set-environment-variables.yml
@ -120,7 +120,9 @@ steps:
        Write-Host "##vso[task.setvariable variable=CMAKE_LIBRARY_PATH;]$(Build.SourcesDirectory)\mkl\lib;$env:CMAKE_LIBRARY_PATH"
        Write-Host "##vso[task.setvariable variable=ADDITIONAL_PATH;]$(Build.SourcesDirectory)\tmp_bin"
        Write-Host "##vso[task.setvariable variable=SCCACHE_IDLE_TIMEOUT;]1500"
-        Write-Host "##vso[task.setvariable variable=CMAKE_CUDA_COMPILER_LAUNCHER;]$(Build.SourcesDirectory)/tmp_bin/randomtemp.exe;$(Build.SourcesDirectory)/tmp_bin/sccache.exe"
+        Write-Host "##vso[task.setvariable variable=RANDOMTEMP_EXECUTABLE;]$(Build.SourcesDirectory)\tmp_bin\nvcc.exe"
+        Write-Host "##vso[task.setvariable variable=CUDA_NVCC_EXECUTABLE;]$(Build.SourcesDirectory)\tmp_bin\randomtemp.exe"
+        Write-Host "##vso[task.setvariable variable=RANDOMTEMP_BASEDIR;]$(Build.SourcesDirectory)\tmp_bin"
      displayName: Set MKL, sccache and randomtemp environment variables

    # View current environment variables
--- a/.bazelrc
+++ b/.bazelrc
@ -1,11 +1,6 @@
 build --copt=--std=c++14
 build --copt=-I.
-# Bazel does not support including its cc_library targets as system
-# headers. We work around this for generated code
-# (e.g. c10/macros/cmake_macros.h) by making the generated directory a
-# system include path.
 build --copt=-isystem --copt bazel-out/k8-fastbuild/bin
-build --experimental_ui_max_stdouterr_bytes=2048576

 # Configuration to disable tty features for environments like CI
 build:no-tty --curses no
@ -16,11 +11,3 @@ build:no-tty --show_progress_rate_limit 10
 build:gpu --define=cuda=true
 # define a separate build folder for faster switching between configs
 build:gpu --platform_suffix=-gpu
-# See the note on the config-less build for details about why we are
-# doing this. We must also do it for the "-gpu" platform suffix.
-build --copt=-isystem --copt=bazel-out/k8-fastbuild-gpu/bin
-# rules_cuda configuration
-build:gpu --@rules_cuda//cuda:enable_cuda
-build:gpu --@rules_cuda//cuda:cuda_targets=sm_52
-build:gpu --@rules_cuda//cuda:compiler=nvcc
-build:gpu --repo_env=CUDA_PATH=/usr/local/cuda
--- a/.circleci/cimodel/data/binary_build_data.py
+++ b/.circleci/cimodel/data/binary_build_data.py
@ -30,7 +30,21 @@ def get_processor_arch_name(gpu_version):
        "cu" + gpu_version.strip("cuda") if gpu_version.startswith("cuda") else gpu_version
    )

+LINUX_PACKAGE_VARIANTS = OrderedDict(
+    manywheel=[
+        "3.6m",
+        "3.7m",
+        "3.8m",
+        "3.9m"
+    ],
+    conda=dimensions.STANDARD_PYTHON_VERSIONS,
+    libtorch=[
+        "3.7m",
+    ],
+)
+
 CONFIG_TREE_DATA = OrderedDict(
+    linux=(dimensions.GPU_VERSIONS, LINUX_PACKAGE_VARIANTS),
    macos=([None], OrderedDict(
        wheel=dimensions.STANDARD_PYTHON_VERSIONS,
        conda=dimensions.STANDARD_PYTHON_VERSIONS,
@ -42,19 +56,20 @@ CONFIG_TREE_DATA = OrderedDict(
        wheel=[
            "3.8",
            "3.9",
-            "3.10",
        ],
        conda=[
            "3.8",
            "3.9",
-            "3.10",
        ],
    )),
    windows=(
-        # Stop building Win+CU102, see https://github.com/pytorch/pytorch/issues/65648
-        [v for v in dimensions.GPU_VERSIONS if v not in dimensions.ROCM_VERSION_LABELS and v != "cuda102"],
+        [v for v in dimensions.GPU_VERSIONS if v not in dimensions.ROCM_VERSION_LABELS],
        OrderedDict(
+            wheel=dimensions.STANDARD_PYTHON_VERSIONS,
            conda=dimensions.STANDARD_PYTHON_VERSIONS,
+            libtorch=[
+                "3.7",
+            ],
        )
    ),
 )
--- a/.circleci/cimodel/data/dimensions.py
+++ b/.circleci/cimodel/data/dimensions.py
@ -4,12 +4,12 @@ CUDA_VERSIONS = [
    "102",
    "111",
    "113",
-    "115",
 ]

 ROCM_VERSIONS = [
-    "4.3.1",
-    "4.5.2",
+    "4.0.1",
+    "4.1",
+    "4.2",
 ]

 ROCM_VERSION_LABELS = ["rocm" + v for v in ROCM_VERSIONS]
@ -17,8 +17,8 @@ ROCM_VERSION_LABELS = ["rocm" + v for v in ROCM_VERSIONS]
 GPU_VERSIONS = [None] + ["cuda" + v for v in CUDA_VERSIONS] + ROCM_VERSION_LABELS

 STANDARD_PYTHON_VERSIONS = [
+    "3.6",
    "3.7",
    "3.8",
-    "3.9",
-    "3.10"
+    "3.9"
 ]
--- a/.circleci/cimodel/data/pytorch_build_data.py
+++ b/.circleci/cimodel/data/pytorch_build_data.py
@ -1,7 +1,70 @@
-from cimodel.lib.conf_tree import ConfigNode
+from cimodel.lib.conf_tree import ConfigNode, X, XImportant


 CONFIG_TREE_DATA = [
+    ("xenial", [
+        ("gcc", [
+            ("5.4", [  # All this subtree rebases to master and then build
+                ("3.6", [
+                    ("important", [X(True)]),
+                ]),
+            ]),
+            # TODO: bring back libtorch test
+            ("7", [X("3.6")]),
+        ]),
+        ("clang", [
+            ("7", [
+                ("3.6", [
+                    ("asan", [
+                        (True, [
+                            ("shard_test", [XImportant(True)]),
+                        ]),
+                    ]),
+                    ("onnx", [XImportant(True)]),
+                ]),
+            ]),
+        ]),
+        ("cuda", [
+            ("10.2", [
+                ("3.6", [
+                    # Build are needed for slow_gradcheck
+                    ('build_only', [X(True)]),
+                    ("slow_gradcheck", [
+                        # If you update this slow gradcheck, you should
+                        # also update docker_definitions.py to make sure
+                        # the docker image match the config used here
+                        (True, [
+                            ('shard_test', [XImportant(True)]),
+                        ]),
+                    ]),
+                    # UNCOMMENT THE BELOW TO REENABLE LIBTORCH
+                    # ("libtorch", [
+                    #     (True, [
+                    #         ('build_only', [X(True)]),
+                    #     ]),
+                    # ]),
+                ]),
+            ]),
+        ]),
+    ]),
+    ("bionic", [
+        ("clang", [
+            ("9", [
+                ("3.6", [
+                    ("xla", [XImportant(True)]),
+                    ("vulkan", [XImportant(True)]),
+                ]),
+            ]),
+        ]),
+        # @jithunnair-amd believes Jenkins builds are sufficient
+        # ("rocm", [
+        #     ("3.9", [
+        #         ("3.6", [
+        #             ('build_only', [XImportant(True)]),
+        #         ]),
+        #     ]),
+        # ]),
+    ]),
 ]


@ -82,6 +145,7 @@ class ExperimentalFeatureConfigNode(TreeConfigNode):
            "build_only": BuildOnlyConfigNode,
            "shard_test": ShardTestConfigNode,
            "cuda_gcc_override": CudaGccOverrideConfigNode,
+            "coverage": CoverageConfigNode,
            "pure_torch": PureTorchConfigNode,
            "slow_gradcheck": SlowGradcheckConfigNode,
        }
@ -225,6 +289,14 @@ class ShardTestConfigNode(TreeConfigNode):
        return ImportantConfigNode


+class CoverageConfigNode(TreeConfigNode):
+    def init2(self, node_name):
+        self.props["is_coverage"] = node_name
+
+    def child_constructor(self):
+        return ExperimentalFeatureConfigNode
+
+
 class ImportantConfigNode(TreeConfigNode):
    def modify_label(self, label):
        return "IMPORTANT=" + str(label)
--- a/.circleci/cimodel/data/pytorch_build_definitions.py
+++ b/.circleci/cimodel/data/pytorch_build_definitions.py
@ -239,6 +239,7 @@ def instantiate_configs(only_slow_gradcheck):
        compiler_version = fc.find_prop("compiler_version")
        is_xla = fc.find_prop("is_xla") or False
        is_asan = fc.find_prop("is_asan") or False
+        is_coverage = fc.find_prop("is_coverage") or False
        is_noarch = fc.find_prop("is_noarch") or False
        is_onnx = fc.find_prop("is_onnx") or False
        is_pure_torch = fc.find_prop("is_pure_torch") or False
@ -283,6 +284,10 @@ def instantiate_configs(only_slow_gradcheck):
            python_version = fc.find_prop("pyver")
            parms_list[0] = fc.find_prop("abbreviated_pyver")

+        if is_coverage:
+            parms_list_ignored_for_docker_image.append("coverage")
+            python_version = fc.find_prop("pyver")
+
        if is_noarch:
            parms_list_ignored_for_docker_image.append("noarch")

@ -352,6 +357,28 @@ def instantiate_configs(only_slow_gradcheck):
                                        tags_list=RC_PATTERN)
            c.dependent_tests = gen_docs_configs(c)

+        if (
+            compiler_name != "clang"
+            and not rocm_version
+            and not is_libtorch
+            and not is_vulkan
+            and not is_pure_torch
+            and not is_noarch
+            and not is_slow_gradcheck
+            and not only_slow_gradcheck
+            and not build_only
+        ):
+            distributed_test = Conf(
+                c.gen_build_name("") + "distributed",
+                [],
+                is_xla=False,
+                restrict_phases=["test"],
+                is_libtorch=False,
+                is_important=True,
+                parent_build=c,
+            )
+            c.dependent_tests.append(distributed_test)
+
        config_list.append(c)

    return config_list
--- a/.circleci/cimodel/data/simple/android_definitions.py
+++ b/.circleci/cimodel/data/simple/android_definitions.py
@ -2,6 +2,7 @@ import cimodel.data.simple.util.branch_filters as branch_filters
 from cimodel.data.simple.util.docker_constants import (
    DOCKER_IMAGE_NDK, DOCKER_REQUIREMENT_NDK
 )
+import cimodel.lib.miniutils as miniutils


 class AndroidJob:
@ -89,6 +90,21 @@ WORKFLOW_DATA = [
        ["pytorch_linux_xenial_py3_clang5_android_ndk_r19c_x86_32_build"],
        is_master_only=False,
        is_pr_only=True),
+    AndroidGradleJob(
+        "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single",
+        "pytorch_android_gradle_custom_build_single",
+        [DOCKER_REQUIREMENT_NDK],
+        is_master_only=False,
+        is_pr_only=True),
+    AndroidGradleJob(
+        "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit",
+        "pytorch_android_gradle_custom_build_single",
+        [DOCKER_REQUIREMENT_NDK],
+        is_master_only=False,
+        is_pr_only=True,
+        extra_props=tuple({
+            "lite_interpreter": miniutils.quote(str(int(False)))
+        }.items())),
    AndroidGradleJob(
        "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-build",
        "pytorch_android_gradle_build",
--- a/.circleci/cimodel/data/simple/bazel_definitions.py
+++ b/.circleci/cimodel/data/simple/bazel_definitions.py
@ -0,0 +1,69 @@
+from cimodel.data.simple.util.docker_constants import (
+    DOCKER_IMAGE_GCC7,
+    DOCKER_REQUIREMENT_GCC7
+)
+
+
+def gen_job_name(phase):
+    job_name_parts = [
+        "pytorch",
+        "bazel",
+        phase,
+    ]
+
+    return "_".join(job_name_parts)
+
+
+class BazelJob:
+    def __init__(self, phase, extra_props=None):
+        self.phase = phase
+        self.extra_props = extra_props or {}
+
+    def gen_tree(self):
+
+        template_parts = [
+            "pytorch",
+            "linux",
+            "bazel",
+            self.phase,
+        ]
+
+        build_env_parts = [
+            "pytorch",
+            "linux",
+            "xenial",
+            "py3.6",
+            "gcc7",
+            "bazel",
+            self.phase,
+        ]
+
+        full_job_name = gen_job_name(self.phase)
+        build_env_name = "-".join(build_env_parts)
+
+        extra_requires = (
+            [gen_job_name("build")] if self.phase == "test" else
+            [DOCKER_REQUIREMENT_GCC7]
+        )
+
+        props_dict = {
+            "build_environment": build_env_name,
+            "docker_image": DOCKER_IMAGE_GCC7,
+            "name": full_job_name,
+            "requires": extra_requires,
+        }
+
+        props_dict.update(self.extra_props)
+
+        template_name = "_".join(template_parts)
+        return [{template_name: props_dict}]
+
+
+WORKFLOW_DATA = [
+    BazelJob("build", {"resource_class": "large"}),
+    BazelJob("test"),
+]
+
+
+def get_workflow_jobs():
+    return [item.gen_tree() for item in WORKFLOW_DATA]
--- a/.circleci/cimodel/data/simple/binary_smoketest.py
+++ b/.circleci/cimodel/data/simple/binary_smoketest.py
@ -120,9 +120,9 @@ WORKFLOW_DATA = [
    ),
    SmoketestJob(
        "binary_windows_build",
-        ["wheel", "3.7", "cu113"],
+        ["wheel", "3.7", "cu102"],
        None,
-        "binary_windows_wheel_3_7_cu113_build",
+        "binary_windows_wheel_3_7_cu102_build",
        is_master_only=True,
    ),

@ -144,11 +144,11 @@ WORKFLOW_DATA = [
    ),
    SmoketestJob(
        "binary_windows_test",
-        ["wheel", "3.7", "cu113"],
+        ["wheel", "3.7", "cu102"],
        None,
-        "binary_windows_wheel_3_7_cu113_test",
+        "binary_windows_wheel_3_7_cu102_test",
        is_master_only=True,
-        requires=["binary_windows_wheel_3_7_cu113_build"],
+        requires=["binary_windows_wheel_3_7_cu102_build"],
        extra_props={
            "executor": "windows-with-nvidia-gpu",
        },
--- a/.circleci/cimodel/data/simple/docker_definitions.py
+++ b/.circleci/cimodel/data/simple/docker_definitions.py
@ -4,8 +4,27 @@ from cimodel.lib.miniutils import quote
 from cimodel.data.simple.util.branch_filters import gen_filter_dict, RC_PATTERN


-# NOTE: All hardcoded docker image builds have been migrated to GHA
+# TODO: make this generated from a matrix rather than just a static list
 IMAGE_NAMES = [
+    "pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7",
+    "pytorch-linux-bionic-py3.6-clang9",
+    "pytorch-linux-bionic-cuda10.2-cudnn7-py3.6-clang9",
+    "pytorch-linux-bionic-py3.8-gcc9",
+    "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
+    "pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
+    "pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7",
+    "pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
+    "pytorch-linux-xenial-py3-clang5-asan",
+    "pytorch-linux-xenial-py3-clang7-asan",
+    "pytorch-linux-xenial-py3-clang7-onnx",
+    "pytorch-linux-xenial-py3.8",
+    "pytorch-linux-xenial-py3.6-clang7",
+    "pytorch-linux-xenial-py3.6-gcc5.4",  # this one is used in doc builds
+    "pytorch-linux-xenial-py3.6-gcc7.2",
+    "pytorch-linux-xenial-py3.6-gcc7",
+    "pytorch-linux-bionic-rocm4.1-py3.6",
+    "pytorch-linux-bionic-rocm4.2-py3.6",
+    "pytorch-linux-bionic-rocm4.3.1-py3.6",
 ]

 # This entry should be an element from the list above
@ -13,12 +32,10 @@ IMAGE_NAMES = [
 # pytorch_build_data.py
 SLOW_GRADCHECK_IMAGE_NAME = "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"

-def get_workflow_jobs(images=IMAGE_NAMES, only_slow_gradcheck=False):
+def get_workflow_jobs(only_slow_gradcheck=False):
    """Generates a list of docker image build definitions"""
    ret = []
-    for image_name in images:
-        if image_name.startswith('docker-'):
-            image_name = image_name.lstrip('docker-')
+    for image_name in IMAGE_NAMES:
        if only_slow_gradcheck and image_name is not SLOW_GRADCHECK_IMAGE_NAME:
            continue

--- a/.circleci/cimodel/data/simple/ios_definitions.py
+++ b/.circleci/cimodel/data/simple/ios_definitions.py
@ -75,12 +75,6 @@ WORKFLOW_DATA = [
    IOSJob(XCODE_VERSION, ArchVariant("arm64", "custom"), extra_props={
        "op_list": "mobilenetv2.yaml",
        "lite_interpreter": miniutils.quote(str(int(True)))}),
-    IOSJob(XCODE_VERSION, ArchVariant("x86_64", "coreml"), is_org_member_context=False, extra_props={
-        "use_coreml": miniutils.quote(str(int(True))),
-        "lite_interpreter": miniutils.quote(str(int(True)))}),
-    IOSJob(XCODE_VERSION, ArchVariant("arm64", "coreml"), extra_props={
-        "use_coreml": miniutils.quote(str(int(True))),
-        "lite_interpreter": miniutils.quote(str(int(True)))}),
 ]


--- a/.circleci/cimodel/data/simple/mobile_definitions.py
+++ b/.circleci/cimodel/data/simple/mobile_definitions.py
@ -4,6 +4,12 @@ PyTorch Mobile PR builds (use linux host toolchain + mobile build options)

 import cimodel.lib.miniutils as miniutils
 import cimodel.data.simple.util.branch_filters
+from cimodel.data.simple.util.docker_constants import (
+    DOCKER_IMAGE_ASAN,
+    DOCKER_REQUIREMENT_ASAN,
+    DOCKER_IMAGE_NDK,
+    DOCKER_REQUIREMENT_NDK
+)


 class MobileJob:
@ -46,6 +52,33 @@ class MobileJob:


 WORKFLOW_DATA = [
+    MobileJob(
+        DOCKER_IMAGE_ASAN,
+        [DOCKER_REQUIREMENT_ASAN],
+        ["build"]
+    ),
+
+    # Use LLVM-DEV toolchain in android-ndk-r19c docker image
+    MobileJob(
+        DOCKER_IMAGE_NDK,
+        [DOCKER_REQUIREMENT_NDK],
+        ["custom", "build", "dynamic"]
+    ),
+
+    MobileJob(
+        DOCKER_IMAGE_NDK,
+        [DOCKER_REQUIREMENT_NDK],
+        ["custom", "build", "static"]
+    ),
+
+    # Use LLVM-DEV toolchain in android-ndk-r19c docker image
+    # Most of this CI is already covered by "mobile-custom-build-dynamic" job
+    MobileJob(
+        DOCKER_IMAGE_NDK,
+        [DOCKER_REQUIREMENT_NDK],
+        ["code", "analysis"],
+        True
+    ),
 ]


--- a/.circleci/cimodel/data/simple/nightly_ios.py
+++ b/.circleci/cimodel/data/simple/nightly_ios.py
@ -5,11 +5,9 @@ import cimodel.lib.miniutils as miniutils
 class IOSNightlyJob:
    def __init__(self,
                 variant,
-                 is_full_jit=False,
                 is_upload=False):

        self.variant = variant
-        self.is_full_jit = is_full_jit
        self.is_upload = is_upload

    def get_phase_name(self):
@ -19,11 +17,8 @@ class IOSNightlyJob:

        extra_name_suffix = [self.get_phase_name()] if self.is_upload else []

-        extra_name = ["full_jit"] if self.is_full_jit else []
-
        common_name_pieces = [
            "ios",
-        ] + extra_name + [
        ] + ios_definitions.XCODE_VERSION.render_dots_or_parts(with_version_dots) + [
            "nightly",
            self.variant,
@ -36,8 +31,7 @@ class IOSNightlyJob:
        return "_".join(["pytorch"] + self.get_common_name_pieces(False))

    def gen_tree(self):
-        build_configs = BUILD_CONFIGS_FULL_JIT if self.is_full_jit else BUILD_CONFIGS
-        extra_requires = [x.gen_job_name() for x in build_configs] if self.is_upload else []
+        extra_requires = [x.gen_job_name() for x in BUILD_CONFIGS] if self.is_upload else []

        props_dict = {
            "build_environment": "-".join(["libtorch"] + self.get_common_name_pieces(True)),
@ -53,9 +47,6 @@ class IOSNightlyJob:
            props_dict["use_metal"] = miniutils.quote(str(int(True)))
            props_dict["use_coreml"] = miniutils.quote(str(int(True)))

-        if self.is_full_jit:
-            props_dict["lite_interpreter"] = miniutils.quote(str(int(False)))
-
        template_name = "_".join([
            "binary",
            "ios",
@ -70,14 +61,9 @@ BUILD_CONFIGS = [
    IOSNightlyJob("arm64"),
 ]

-BUILD_CONFIGS_FULL_JIT = [
-    IOSNightlyJob("x86_64", is_full_jit=True),
-    IOSNightlyJob("arm64", is_full_jit=True),
-]

-WORKFLOW_DATA = BUILD_CONFIGS + BUILD_CONFIGS_FULL_JIT + [
-    IOSNightlyJob("binary", is_full_jit=False, is_upload=True),
-    IOSNightlyJob("binary", is_full_jit=True, is_upload=True),
+WORKFLOW_DATA = BUILD_CONFIGS + [
+    IOSNightlyJob("binary", is_upload=True),
 ]


--- a/.circleci/cimodel/data/simple/util/docker_constants.py
+++ b/.circleci/cimodel/data/simple/util/docker_constants.py
@ -11,7 +11,7 @@ def gen_docker_image_requires(image_name):


 DOCKER_IMAGE_BASIC, DOCKER_REQUIREMENT_BASE = gen_docker_image(
-    "pytorch-linux-xenial-py3.7-gcc5.4"
+    "pytorch-linux-xenial-py3.6-gcc5.4"
 )

 DOCKER_IMAGE_CUDA_10_2, DOCKER_REQUIREMENT_CUDA_10_2 = gen_docker_image(
@ -19,7 +19,7 @@ DOCKER_IMAGE_CUDA_10_2, DOCKER_REQUIREMENT_CUDA_10_2 = gen_docker_image(
 )

 DOCKER_IMAGE_GCC7, DOCKER_REQUIREMENT_GCC7 = gen_docker_image(
-    "pytorch-linux-xenial-py3.7-gcc7"
+    "pytorch-linux-xenial-py3.6-gcc7"
 )


--- a/.circleci/cimodel/data/windows_build_definitions.py
+++ b/.circleci/cimodel/data/windows_build_definitions.py
@ -0,0 +1,160 @@
+import cimodel.lib.miniutils as miniutils
+from cimodel.data.simple.util.branch_filters import gen_filter_dict, RC_PATTERN, NON_PR_BRANCH_LIST
+from cimodel.data.simple.util.versions import CudaVersion
+
+
+class WindowsJob:
+    def __init__(
+        self,
+        test_index,
+        vscode_spec,
+        cuda_version,
+        force_on_cpu=False,
+        multi_gpu=False,
+        master_only=False,
+        nightly_only=False,
+        master_and_nightly=False
+    ):
+        self.test_index = test_index
+        self.vscode_spec = vscode_spec
+        self.cuda_version = cuda_version
+        self.force_on_cpu = force_on_cpu
+        self.multi_gpu = multi_gpu
+        self.master_only = master_only
+        self.nightly_only = nightly_only
+        self.master_and_nightly = master_and_nightly
+
+    def gen_tree(self):
+
+        base_phase = "build" if self.test_index is None else "test"
+        numbered_phase = (
+            base_phase if self.test_index is None else base_phase + str(self.test_index)
+        )
+
+        key_parts = ["pytorch", "windows", base_phase]
+        if self.multi_gpu:
+            key_parts.append('multigpu')
+        key_name = "_".join(key_parts)
+
+        cpu_forcing_name_parts = ["on", "cpu"] if self.force_on_cpu else []
+
+        target_arch = self.cuda_version.render_dots() if self.cuda_version else "cpu"
+
+        python_version = "3.8"
+
+        base_name_parts = [
+            "pytorch",
+            "windows",
+            self.vscode_spec.render(),
+            "py" + python_version.replace(".", ""),
+            target_arch,
+        ]
+
+        prerequisite_jobs = []
+        if base_phase == "test":
+            prerequisite_jobs.append("_".join(base_name_parts + ["build"]))
+
+        if self.cuda_version:
+            self.cudnn_version = 8 if self.cuda_version.major == 11 else 7
+
+        arch_env_elements = (
+            ["cuda" + str(self.cuda_version.major) + "." + str(self.cuda_version.minor)]
+            if self.cuda_version
+            else ["cpu"]
+        )
+
+        build_environment_string = "-".join(
+            ["pytorch", "win"]
+            + self.vscode_spec.get_elements()
+            + arch_env_elements
+            + ["py" + python_version.split(".")[0]]
+        )
+
+        is_running_on_cuda = bool(self.cuda_version) and not self.force_on_cpu
+
+        if self.multi_gpu:
+            props_dict = {"requires": prerequisite_jobs}
+        else:
+            props_dict = {
+                "build_environment": build_environment_string,
+                "python_version": miniutils.quote(python_version),
+                "vs_version": miniutils.quote("16.8.6"),
+                "vc_version": miniutils.quote(self.vscode_spec.dotted_version()),
+                "vc_year": miniutils.quote(str(self.vscode_spec.year)),
+                "vc_product": self.vscode_spec.get_product(),
+                "use_cuda": miniutils.quote(str(int(is_running_on_cuda))),
+                "requires": prerequisite_jobs,
+            }
+
+        if self.master_only:
+            props_dict[
+                "filters"
+            ] = gen_filter_dict()
+        elif self.nightly_only:
+            props_dict[
+                "filters"
+            ] = gen_filter_dict(branches_list=["nightly"], tags_list=RC_PATTERN)
+        elif self.master_and_nightly:
+            props_dict[
+                "filters"
+            ] = gen_filter_dict(branches_list=NON_PR_BRANCH_LIST + ["nightly"], tags_list=RC_PATTERN)
+
+        name_parts = base_name_parts + cpu_forcing_name_parts + [numbered_phase]
+
+        if not self.multi_gpu:
+            if base_phase == "test":
+                test_name = "-".join(["pytorch", "windows", numbered_phase])
+                props_dict["test_name"] = test_name
+
+                if is_running_on_cuda:
+                    props_dict["executor"] = "windows-with-nvidia-gpu"
+
+            props_dict["cuda_version"] = (
+                miniutils.quote(str(self.cuda_version))
+                if self.cuda_version
+                else "cpu"
+            )
+
+        props_dict["name"] = "_".join(name_parts)
+
+        return [{key_name: props_dict}]
+
+
+class VcSpec:
+    def __init__(self, year, version_elements=None, hide_version=False):
+        self.year = year
+        self.version_elements = version_elements or []
+        self.hide_version = hide_version
+
+    def get_elements(self):
+        if self.hide_version:
+            return [self.prefixed_year()]
+        return [self.prefixed_year()] + self.version_elements
+
+    def get_product(self):
+        return "BuildTools"
+
+    def dotted_version(self):
+        return ".".join(self.version_elements)
+
+    def prefixed_year(self):
+        return "vs" + str(self.year)
+
+    def render(self):
+        return "_".join(self.get_elements())
+
+_VC2019 = VcSpec(2019)
+
+WORKFLOW_DATA = [
+    # VS2019 CUDA-10.2
+    WindowsJob(None, _VC2019, CudaVersion(10, 2), master_only=True),
+    # VS2019 CUDA-10.2 force on cpu
+    WindowsJob(1, _VC2019, CudaVersion(10, 2), force_on_cpu=True, master_only=True),
+
+    # TODO: This test is disabled due to https://github.com/pytorch/pytorch/issues/59724
+    # WindowsJob('_azure_multi_gpu', _VC2019, CudaVersion(11, 1), multi_gpu=True, master_and_nightly=True),
+]
+
+
+def get_windows_workflows():
+    return [item.gen_tree() for item in WORKFLOW_DATA]
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
--- a/.circleci/docker/android/build.gradle
+++ b/.circleci/docker/android/build.gradle
@ -51,9 +51,9 @@ android {
 dependencies {
    implementation 'com.android.support:appcompat-v7:28.0.0'
    implementation 'androidx.appcompat:appcompat:1.0.0'
-    implementation 'com.facebook.fbjni:fbjni-java-only:0.2.2'
+    implementation 'com.facebook.fbjni:fbjni-java-only:0.0.3'
    implementation 'com.google.code.findbugs:jsr305:3.0.1'
-    implementation 'com.facebook.soloader:nativeloader:0.10.1'
+    implementation 'com.facebook.soloader:nativeloader:0.8.0'

    implementation 'junit:junit:' + rootProject.junitVersion
    implementation 'androidx.test:core:' + rootProject.coreVersion
--- a/.circleci/docker/build.sh
+++ b/.circleci/docker/build.sh
@ -40,12 +40,6 @@ function extract_all_from_image_name() {
  done
 }

-# Use the same pre-built XLA test image from PyTorch/XLA
-if [[ "$image" == *xla* ]]; then
-  echo "Using pre-built XLA test image..."
-  exit 0
-fi
-
 if [[ "$image" == *-xenial* ]]; then
  UBUNTU_VERSION=16.04
 elif [[ "$image" == *-artful* ]]; then
@ -88,8 +82,8 @@ case "$image" in
    GCC_VERSION=7
    # Do not install PROTOBUF, DB, and VISION as a test
    ;;
-  pytorch-linux-xenial-py3.7-gcc5.4)
-    ANACONDA_PYTHON_VERSION=3.7
+  pytorch-linux-xenial-py3.6-gcc5.4)
+    ANACONDA_PYTHON_VERSION=3.6
    CMAKE_VERSION=3.10.3
    GCC_VERSION=5
    PROTOBUF=yes
@ -97,14 +91,14 @@ case "$image" in
    VISION=yes
    KATEX=yes
    ;;
-  pytorch-linux-xenial-py3.7-gcc7.2)
-    ANACONDA_PYTHON_VERSION=3.7
+  pytorch-linux-xenial-py3.6-gcc7.2)
+    ANACONDA_PYTHON_VERSION=3.6
    CMAKE_VERSION=3.10.3
    GCC_VERSION=7
    # Do not install PROTOBUF, DB, and VISION as a test
    ;;
-  pytorch-linux-xenial-py3.7-gcc7)
-    ANACONDA_PYTHON_VERSION=3.7
+  pytorch-linux-xenial-py3.6-gcc7)
+    ANACONDA_PYTHON_VERSION=3.6
    CMAKE_VERSION=3.10.3
    GCC_VERSION=7
    PROTOBUF=yes
@ -114,7 +108,7 @@ case "$image" in
  pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7)
    CUDA_VERSION=10.2
    CUDNN_VERSION=7
-    ANACONDA_PYTHON_VERSION=3.7
+    ANACONDA_PYTHON_VERSION=3.6
    CMAKE_VERSION=3.10.3
    GCC_VERSION=7
    PROTOBUF=yes
@ -125,7 +119,7 @@ case "$image" in
  pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7)
    CUDA_VERSION=11.1
    CUDNN_VERSION=8
-    ANACONDA_PYTHON_VERSION=3.7
+    ANACONDA_PYTHON_VERSION=3.6
    CMAKE_VERSION=3.10.3
    GCC_VERSION=7
    PROTOBUF=yes
@ -136,19 +130,7 @@ case "$image" in
  pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7)
    CUDA_VERSION=11.3.0 # Deviating from major.minor to conform to nvidia's Docker image names
    CUDNN_VERSION=8
-    TENSORRT_VERSION=8.0.1.6
-    ANACONDA_PYTHON_VERSION=3.7
-    CMAKE_VERSION=3.10.3
-    GCC_VERSION=7
-    PROTOBUF=yes
-    DB=yes
-    VISION=yes
-    KATEX=yes
-    ;;
-  pytorch-linux-bionic-cuda11.5-cudnn8-py3-gcc7)
-    CUDA_VERSION=11.5.0
-    CUDNN_VERSION=8
-    ANACONDA_PYTHON_VERSION=3.7
+    ANACONDA_PYTHON_VERSION=3.6
    CMAKE_VERSION=3.10.3
    GCC_VERSION=7
    PROTOBUF=yes
@ -157,15 +139,15 @@ case "$image" in
    KATEX=yes
    ;;
  pytorch-linux-xenial-py3-clang5-asan)
-    ANACONDA_PYTHON_VERSION=3.7
+    ANACONDA_PYTHON_VERSION=3.6
    CLANG_VERSION=5.0
-    CMAKE_VERSION=3.13.5
+    CMAKE_VERSION=3.10.3
    PROTOBUF=yes
    DB=yes
    VISION=yes
    ;;
  pytorch-linux-xenial-py3-clang7-asan)
-    ANACONDA_PYTHON_VERSION=3.7
+    ANACONDA_PYTHON_VERSION=3.6
    CLANG_VERSION=7
    CMAKE_VERSION=3.10.3
    PROTOBUF=yes
@ -173,7 +155,7 @@ case "$image" in
    VISION=yes
    ;;
  pytorch-linux-xenial-py3-clang7-onnx)
-    ANACONDA_PYTHON_VERSION=3.7
+    ANACONDA_PYTHON_VERSION=3.6
    CLANG_VERSION=7
    CMAKE_VERSION=3.10.3
    PROTOBUF=yes
@ -181,9 +163,9 @@ case "$image" in
    VISION=yes
    ;;
  pytorch-linux-xenial-py3-clang5-android-ndk-r19c)
-    ANACONDA_PYTHON_VERSION=3.7
+    ANACONDA_PYTHON_VERSION=3.6
    CLANG_VERSION=5.0
-    CMAKE_VERSION=3.13.5
+    CMAKE_VERSION=3.10.3
    LLVMDEV=yes
    PROTOBUF=yes
    ANDROID=yes
@ -191,16 +173,16 @@ case "$image" in
    GRADLE_VERSION=6.8.3
    NINJA_VERSION=1.9.0
    ;;
-  pytorch-linux-xenial-py3.7-clang7)
-    ANACONDA_PYTHON_VERSION=3.7
+  pytorch-linux-xenial-py3.6-clang7)
+    ANACONDA_PYTHON_VERSION=3.6
    CMAKE_VERSION=3.10.3
    CLANG_VERSION=7
    PROTOBUF=yes
    DB=yes
    VISION=yes
    ;;
-  pytorch-linux-bionic-py3.7-clang9)
-    ANACONDA_PYTHON_VERSION=3.7
+  pytorch-linux-bionic-py3.6-clang9)
+    ANACONDA_PYTHON_VERSION=3.6
    CLANG_VERSION=9
    PROTOBUF=yes
    DB=yes
@ -215,10 +197,10 @@ case "$image" in
    DB=yes
    VISION=yes
    ;;
-  pytorch-linux-bionic-cuda10.2-cudnn7-py3.7-clang9)
+  pytorch-linux-bionic-cuda10.2-cudnn7-py3.6-clang9)
    CUDA_VERSION=10.2
    CUDNN_VERSION=7
-    ANACONDA_PYTHON_VERSION=3.7
+    ANACONDA_PYTHON_VERSION=3.6
    CLANG_VERSION=9
    PROTOBUF=yes
    DB=yes
@ -233,32 +215,40 @@ case "$image" in
    DB=yes
    VISION=yes
    ;;
-  pytorch-linux-bionic-cuda11.0-cudnn8-py3.7-gcc9)
+  pytorch-linux-bionic-cuda11.0-cudnn8-py3.6-gcc9)
    CUDA_VERSION=11.0
    CUDNN_VERSION=8
-    ANACONDA_PYTHON_VERSION=3.7
+    ANACONDA_PYTHON_VERSION=3.6
    GCC_VERSION=9
    PROTOBUF=yes
    DB=yes
    VISION=yes
    ROCM_VERSION=3.9
    ;;
-  pytorch-linux-bionic-rocm4.3.1-py3.7)
-    ANACONDA_PYTHON_VERSION=3.7
+  pytorch-linux-bionic-rocm4.1-py3.6)
+    ANACONDA_PYTHON_VERSION=3.6
+    GCC_VERSION=9
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    ROCM_VERSION=4.1
+    ;;
+  pytorch-linux-bionic-rocm4.2-py3.6)
+    ANACONDA_PYTHON_VERSION=3.6
+    GCC_VERSION=9
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    ROCM_VERSION=4.2
+    ;;
+  pytorch-linux-bionic-rocm4.3.1-py3.6)
+    ANACONDA_PYTHON_VERSION=3.6
    GCC_VERSION=9
    PROTOBUF=yes
    DB=yes
    VISION=yes
    ROCM_VERSION=4.3.1
    ;;
-  pytorch-linux-bionic-rocm4.5-py3.7)
-    ANACONDA_PYTHON_VERSION=3.7
-    GCC_VERSION=9
-    PROTOBUF=yes
-    DB=yes
-    VISION=yes
-    ROCM_VERSION=4.5.2
-    ;;
  *)
    # Catch-all for builds that are not hardcoded.
    PROTOBUF=yes
@ -304,7 +294,6 @@ fi

 tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')

-
 # Build image
 # TODO: build-arg THRIFT is not turned on for any image, remove it once we confirm
 # it's no longer needed.
@ -331,7 +320,6 @@ docker build \
       --build-arg "GCC_VERSION=${GCC_VERSION}" \
       --build-arg "CUDA_VERSION=${CUDA_VERSION}" \
       --build-arg "CUDNN_VERSION=${CUDNN_VERSION}" \
-       --build-arg "TENSORRT_VERSION=${TENSORRT_VERSION}" \
       --build-arg "ANDROID=${ANDROID}" \
       --build-arg "ANDROID_NDK=${ANDROID_NDK_VERSION}" \
       --build-arg "GRADLE_VERSION=${GRADLE_VERSION}" \
@ -341,7 +329,6 @@ docker build \
       --build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
       --build-arg "KATEX=${KATEX:-}" \
       --build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
-       --build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx900;gfx906}" \
       -f $(dirname ${DOCKERFILE})/Dockerfile \
       -t "$tmp_tag" \
       "$@" \
@ -360,7 +347,6 @@ function drun() {
 }

 if [[ "$OS" == "ubuntu" ]]; then
-
  if !(drun lsb_release -a 2>&1 | grep -qF Ubuntu); then
    echo "OS=ubuntu, but:"
    drun lsb_release -a
--- a/.circleci/docker/build_docker.sh
+++ b/.circleci/docker/build_docker.sh
@ -26,14 +26,11 @@ login() {
    docker login -u AWS --password-stdin "$1"
 }

+# Retry on timeouts (can happen on job stampede).
+retry login "${registry}"

-# Only run these steps if not on github actions
-if [[ -z "${GITHUB_ACTIONS}" ]]; then
-  # Retry on timeouts (can happen on job stampede).
-  retry login "${registry}"
-  # Logout on exit
-  trap "docker logout ${registry}" EXIT
-fi
+# Logout on exit
+trap "docker logout ${registry}" EXIT

 # export EC2=1
 # export JENKINS=1
@ -48,8 +45,8 @@ fi

 docker push "${image}:${tag}"

+docker save -o "${IMAGE_NAME}:${tag}.tar" "${image}:${tag}"
+
 if [ -z "${DOCKER_SKIP_S3_UPLOAD:-}" ]; then
-  trap "rm -rf ${IMAGE_NAME}:${tag}.tar" EXIT
-  docker save -o "${IMAGE_NAME}:${tag}.tar" "${image}:${tag}"
  aws s3 cp "${IMAGE_NAME}:${tag}.tar" "s3://ossci-linux-build/pytorch/base/${IMAGE_NAME}:${tag}.tar" --acl public-read
 fi
--- a/.circleci/docker/centos-rocm/Dockerfile
+++ b/.circleci/docker/centos-rocm/Dockerfile
@ -4,10 +4,6 @@ FROM centos:${CENTOS_VERSION}

 ARG CENTOS_VERSION

-# Set AMD gpu targets to build for
-ARG PYTORCH_ROCM_ARCH
-ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
-
 # Install required packages to build Caffe2

 # Install common dependencies (so that this step can be cached separately)
@ -15,12 +11,6 @@ ARG EC2
 ADD ./common/install_base.sh install_base.sh
 RUN bash ./install_base.sh && rm install_base.sh

-# Update CentOS git version
-RUN yum -y remove git
-RUN yum -y remove git-*
-RUN yum -y install https://packages.endpoint.com/rhel/7/os/x86_64/endpoint-repo-1.9-1.x86_64.rpm
-RUN yum install -y git
-
 # Install devtoolset
 ARG DEVTOOLSET_VERSION
 ADD ./common/install_devtoolset.sh install_devtoolset.sh
@ -37,7 +27,7 @@ RUN rm install_glibc.sh
 ADD ./common/install_user.sh install_user.sh
 RUN bash ./install_user.sh && rm install_user.sh

-# Install conda and other packages (e.g., numpy, pytest)
+# Install conda and other packages (e.g., numpy, coverage, pytest)
 ENV PATH /opt/conda/bin:$PATH
 ARG ANACONDA_PYTHON_VERSION
 ADD ./common/install_conda.sh install_conda.sh
--- a/.circleci/docker/common/install_base.sh
+++ b/.circleci/docker/common/install_base.sh
@ -11,13 +11,8 @@ install_ubuntu() {
  #   "$UBUNTU_VERSION" == "18.04"
  if [[ "$UBUNTU_VERSION" == "18.04"* ]]; then
    cmake3="cmake=3.10*"
-    maybe_libiomp_dev="libiomp-dev"
-  elif [[ "$UBUNTU_VERSION" == "20.04"* ]]; then
-    cmake3="cmake=3.16*"
-    maybe_libiomp_dev=""
  else
    cmake3="cmake=3.5*"
-    maybe_libiomp_dev="libiomp-dev"
  fi

  # Install common dependencies
@ -38,7 +33,7 @@ install_ubuntu() {
    git \
    libatlas-base-dev \
    libc6-dbg \
-    ${maybe_libiomp_dev} \
+    libiomp-dev \
    libyaml-dev \
    libz-dev \
    libjpeg-dev \
@ -49,10 +44,6 @@ install_ubuntu() {
    wget \
    vim

-  # Should resolve issues related to various apt package repository cert issues
-  # see: https://github.com/pytorch/pytorch/issues/65931
-  apt-get install -y libgnutls30
-
  # Cleanup package manager
  apt-get autoclean && apt-get clean
  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
@ -118,11 +109,14 @@ esac
 # Install Valgrind separately since the apt-get version is too old.
 mkdir valgrind_build && cd valgrind_build
 VALGRIND_VERSION=3.16.1
-wget https://ossci-linux.s3.amazonaws.com/valgrind-${VALGRIND_VERSION}.tar.bz2
+if ! wget http://valgrind.org/downloads/valgrind-${VALGRIND_VERSION}.tar.bz2
+then
+  wget https://sourceware.org/ftp/valgrind/valgrind-${VALGRIND_VERSION}.tar.bz2
+fi
 tar -xjf valgrind-${VALGRIND_VERSION}.tar.bz2
 cd valgrind-${VALGRIND_VERSION}
 ./configure --prefix=/usr/local
-make -j6
+make -j 4
 sudo make install
 cd ../../
 rm -rf valgrind_build
--- a/.circleci/docker/common/install_conda.sh
+++ b/.circleci/docker/common/install_conda.sh
@ -13,12 +13,7 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
      CONDA_FILE="Miniconda2-latest-Linux-x86_64.sh"
    ;;
    3)
-      if [ "$ANACONDA_PYTHON_VERSION" = "3.6" ]; then
-        # Latest release of Conda that still supports python-3.6
-        CONDA_FILE="Miniconda3-py37_4.10.3-Linux-x86_64.sh"
-      else
-        CONDA_FILE="Miniconda3-latest-Linux-x86_64.sh"
-      fi
+      CONDA_FILE="Miniconda3-latest-Linux-x86_64.sh"
    ;;
    *)
      echo "Unsupported ANACONDA_PYTHON_VERSION: $ANACONDA_PYTHON_VERSION"
@ -61,9 +56,7 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
  pushd /opt/conda

  # Track latest conda update
-  if [ "$ANACONDA_PYTHON_VERSION" != "3.6" ]; then
-    as_jenkins conda update -y -n base conda
-  fi
+  as_jenkins conda update -y -n base conda

  # Install correct Python version
  as_jenkins conda install -y python="$ANACONDA_PYTHON_VERSION"
@ -93,10 +86,14 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
    conda_install numpy=1.18.5 astunparse pyyaml mkl mkl-include setuptools cffi future six dataclasses typing_extensions
  fi

-  # Magma package names are concatenation of CUDA major and minor ignoring revision
-  # I.e. magma-cuda102 package corresponds to CUDA_VERSION=10.2 and CUDA_VERSION=10.2.89
-  if [ -n "$CUDA_VERSION" ]; then
-    conda_install magma-cuda$(TMP=${CUDA_VERSION/./};echo ${TMP%.*[0-9]}) -c pytorch
+  if [[ "$CUDA_VERSION" == 10.2* ]]; then
+    conda_install magma-cuda102 -c pytorch
+  elif [[ "$CUDA_VERSION" == 11.0* ]]; then
+    conda_install magma-cuda110 -c pytorch
+  elif [[ "$CUDA_VERSION" == 11.1* ]]; then
+    conda_install magma-cuda111 -c pytorch
+  elif [[ "$CUDA_VERSION" == 11.3* ]]; then
+    conda_install magma-cuda113 -c pytorch
  fi

  # TODO: This isn't working atm
@ -106,12 +103,14 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
  # TODO: Why is scipy pinned
  # Pin MyPy version because new errors are likely to appear with each release
  # Pin hypothesis to avoid flakiness: https://github.com/pytorch/pytorch/issues/31136
+  # Pin coverage so we can use COVERAGE_RCFILE
  as_jenkins pip install --progress-bar off pytest \
    scipy==$SCIPY_VERSION \
    scikit-image \
    psutil \
    unittest-xml-reporting \
    boto3==1.16.34 \
+    coverage==5.5 \
    hypothesis==4.53.2 \
    expecttest==0.1.3 \
    mypy==0.812 \
@ -120,9 +119,9 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
  # Install numba only on python-3.8 or below
  # For numba issue see https://github.com/pytorch/pytorch/issues/51511
  if [[ $(python -c "import sys; print(int(sys.version_info < (3, 9)))") == "1" ]]; then
-    as_jenkins pip install --progress-bar off numba==0.54.1 "librosa>=0.6.2,<0.9.0"
+    as_jenkins pip install --progress-bar off numba librosa>=0.6.2
  else
-    as_jenkins pip install --progress-bar off numba==0.49.0 "librosa>=0.6.2,<0.9.0"
+    as_jenkins pip install --progress-bar off numba==0.49.0 librosa>=0.6.2
  fi

  # Update scikit-learn to a python-3.8 compatible version
--- a/.circleci/docker/common/install_gcc.sh
+++ b/.circleci/docker/common/install_gcc.sh
@ -7,18 +7,15 @@ if [ -n "$GCC_VERSION" ]; then
  # Need the official toolchain repo to get alternate packages
  add-apt-repository ppa:ubuntu-toolchain-r/test
  apt-get update
-  if [[ "$UBUNTU_VERSION" == "16.04" && "${GCC_VERSION:0:1}" == "5" ]]; then
+  if [ "$UBUNTU_VERSION" = "16.04" -a "$GCC_VERSION" = "5" ]; then
    apt-get install -y g++-5=5.4.0-6ubuntu1~16.04.12
-    update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 50
-    update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-5 50
-    update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-5 50
  else
    apt-get install -y g++-$GCC_VERSION
-    update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-"$GCC_VERSION" 50
-    update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-"$GCC_VERSION" 50
-    update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-"$GCC_VERSION" 50
  fi

+  update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-"$GCC_VERSION" 50
+  update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-"$GCC_VERSION" 50
+  update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-"$GCC_VERSION" 50

  # Cleanup package manager
  apt-get autoclean && apt-get clean
--- a/.circleci/docker/common/install_openssl.sh
+++ b/.circleci/docker/common/install_openssl.sh
@ -4,11 +4,11 @@ set -ex

 OPENSSL=openssl-1.1.1k

-wget -q -O "${OPENSSL}.tar.gz" "https://ossci-linux.s3.amazonaws.com/${OPENSSL}.tar.gz"
+wget -q -O "${OPENSSL}.tar.gz" "https://www.openssl.org/source/${OPENSSL}.tar.gz"
 tar xf "${OPENSSL}.tar.gz"
 cd "${OPENSSL}"
 ./config --prefix=/opt/openssl -d '-Wl,--enable-new-dtags,-rpath,$(LIBRPATH)'
-# NOTE: openssl install errors out when built with the -j option
-make -j6; make install_sw
+# NOTE: opensl errors out when built with the -j option
+make install_sw
 cd ..
 rm -rf "${OPENSSL}"
--- a/.circleci/docker/common/install_protobuf.sh
+++ b/.circleci/docker/common/install_protobuf.sh
@ -14,9 +14,9 @@ install_protobuf_317() {

  curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v3.17.3/protobuf-all-3.17.3.tar.gz"
  tar -xvz -C "$pb_dir" --strip-components 1 -f protobuf-all-3.17.3.tar.gz
-  # -j6 to balance memory usage and speed.
+  # -j2 to balance memory usage and speed.
  # naked `-j` seems to use too much memory.
-  pushd "$pb_dir" && ./configure && make -j6 && make -j6 check && sudo make -j6 install && sudo ldconfig
+  pushd "$pb_dir" && ./configure && make -j2 && make -j2 check && sudo make -j2 install && sudo ldconfig
  popd
  rm -rf $pb_dir
 }
--- a/.circleci/docker/common/install_rocm.sh
+++ b/.circleci/docker/common/install_rocm.sh
@ -4,27 +4,22 @@ set -ex

 install_magma() {
    # "install" hipMAGMA into /opt/rocm/magma by copying after build
-    git clone https://bitbucket.org/icl/magma.git
+    git clone https://bitbucket.org/icl/magma.git -b magma_ctrl_launch_bounds
    pushd magma
-    # fix for magma_queue memory leak issue
-    git checkout c62d700d880c7283b33fb1d615d62fc9c7f7ca21
+    # The branch "magma_ctrl_launch_bounds" is having a fix over the below commit, so keeping the below comment for reference.
+    #git checkout 878b1ce02e9cfe4a829be22c8f911e9c0b6bd88f
+    # Work around non-asii characters in certain magma sources; remove this after upstream magma fixes this.
+    perl -i.bak -pe 's/[^[:ascii:]]//g' sparse/control/magma_zfree.cpp
+    perl -i.bak -pe 's/[^[:ascii:]]//g' sparse/control/magma_zsolverinfo.cpp
    cp make.inc-examples/make.inc.hip-gcc-mkl make.inc
    echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc
    echo 'LIB += -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib -Wl,--rpath,$(MKLROOT)/lib -Wl,--rpath,/opt/rocm/magma/lib' >> make.inc
-    echo 'DEVCCFLAGS += --gpu-max-threads-per-block=256' >> make.inc
-    export PATH="${PATH}:/opt/rocm/bin"
-    if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then
-      amdgpu_targets=`echo $PYTORCH_ROCM_ARCH | sed 's/;/ /g'`
-    else
-      amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs`
-    fi
-    for arch in $amdgpu_targets; do
-      echo "DEVCCFLAGS += --amdgpu-target=$arch" >> make.inc
-    done
+    echo 'DEVCCFLAGS += --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906 --amdgpu-target=gfx908 --gpu-max-threads-per-block=256' >> make.inc
    # hipcc with openmp flag may cause isnan() on __device__ not to be found; depending on context, compiler may attempt to match with host definition
    sed -i 's/^FOPENMP/#FOPENMP/g' make.inc
+    export PATH="${PATH}:/opt/rocm/bin"
    make -f make.gen.hipMAGMA -j $(nproc)
-    LANG=C.UTF-8 make lib/libmagma.so -j $(nproc) MKLROOT=/opt/conda
+    make lib/libmagma.so -j $(nproc) MKLROOT=/opt/conda
    make testing/testing_dgemm -j $(nproc) MKLROOT=/opt/conda
    popd
    mv magma /opt/rocm
@ -34,19 +29,12 @@ ver() {
    printf "%3d%03d%03d%03d" $(echo "$1" | tr '.' ' ');
 }

-# Map ROCm version to AMDGPU version
-declare -A AMDGPU_VERSIONS=( ["4.5.2"]="21.40.2" )
-
 install_ubuntu() {
    apt-get update
    if [[ $UBUNTU_VERSION == 18.04 ]]; then
      # gpg-agent is not available by default on 18.04
      apt-get install -y --no-install-recommends gpg-agent
    fi
-    if [[ $UBUNTU_VERSION == 20.04 ]]; then
-      # gpg-agent is not available by default on 20.04
-      apt-get install -y --no-install-recommends gpg-agent
-    fi
    apt-get install -y kmod
    apt-get install -y wget

@ -54,13 +42,6 @@ install_ubuntu() {
    apt-get install -y libc++1
    apt-get install -y libc++abi1

-    if [[ $(ver $ROCM_VERSION) -ge $(ver 4.5) ]]; then
-        # Add amdgpu repository
-        UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'`
-        local amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/ubuntu"
-        echo "deb [arch=amd64] ${amdgpu_baseurl} ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list
-    fi
-
    ROCM_REPO="ubuntu"
    if [[ $(ver $ROCM_VERSION) -lt $(ver 4.2) ]]; then
        ROCM_REPO="xenial"
@ -68,8 +49,7 @@ install_ubuntu() {

    # Add rocm repository
    wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
-    local rocm_baseurl="http://repo.radeon.com/rocm/apt/${ROCM_VERSION}"
-    echo "deb [arch=amd64] ${rocm_baseurl} ${ROCM_REPO} main" > /etc/apt/sources.list.d/rocm.list
+    echo "deb [arch=amd64] http://repo.radeon.com/rocm/apt/${ROCM_VERSION} ${ROCM_REPO} main" > /etc/apt/sources.list.d/rocm.list
    apt-get update --allow-insecure-repositories

    DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
@ -106,24 +86,11 @@ install_centos() {
  yum install -y epel-release
  yum install -y dkms kernel-headers-`uname -r` kernel-devel-`uname -r`

-  if [[ $(ver $ROCM_VERSION) -ge $(ver 4.5) ]]; then
-      # Add amdgpu repository
-      local amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/rhel/7.9/main/x86_64"
-      echo "[AMDGPU]" > /etc/yum.repos.d/amdgpu.repo
-      echo "name=AMDGPU" >> /etc/yum.repos.d/amdgpu.repo
-      echo "baseurl=${amdgpu_baseurl}" >> /etc/yum.repos.d/amdgpu.repo
-      echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo
-      echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo
-      echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo
-  fi
-
-  local rocm_baseurl="http://repo.radeon.com/rocm/yum/${ROCM_VERSION}"
  echo "[ROCm]" > /etc/yum.repos.d/rocm.repo
  echo "name=ROCm" >> /etc/yum.repos.d/rocm.repo
-  echo "baseurl=${rocm_baseurl}" >> /etc/yum.repos.d/rocm.repo
+  echo "baseurl=http://repo.radeon.com/rocm/yum/${ROCM_VERSION}" >> /etc/yum.repos.d/rocm.repo
  echo "enabled=1" >> /etc/yum.repos.d/rocm.repo
-  echo "gpgcheck=1" >> /etc/yum.repos.d/rocm.repo
-  echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/rocm.repo
+  echo "gpgcheck=0" >> /etc/yum.repos.d/rocm.repo

  yum update -y

--- a/.circleci/docker/common/install_tensorrt.sh
+++ b/.circleci/docker/common/install_tensorrt.sh
@ -1,7 +0,0 @@
-#!/bin/bash
-
-if [ -n "$TENSORRT_VERSION" ]; then
-    python3 -m pip install --upgrade setuptools pip
-    python3 -m pip install nvidia-pyindex
-    python3 -m pip install nvidia-tensorrt==${TENSORRT_VERSION} --extra-index-url https://pypi.ngc.nvidia.com
-fi
--- a/.circleci/docker/ubuntu-cuda/Dockerfile
+++ b/.circleci/docker/ubuntu-cuda/Dockerfile
@ -24,7 +24,7 @@ ARG KATEX
 ADD ./common/install_katex.sh install_katex.sh
 RUN bash ./install_katex.sh && rm install_katex.sh

-# Install conda and other packages (e.g., numpy, pytest)
+# Install conda and other packages (e.g., numpy, coverage, pytest)
 ENV PATH /opt/conda/bin:$PATH
 ARG ANACONDA_PYTHON_VERSION
 ADD ./common/install_conda.sh install_conda.sh
@ -65,12 +65,6 @@ ADD ./common/install_openssl.sh install_openssl.sh
 ENV OPENSSL_ROOT_DIR /opt/openssl
 RUN bash ./install_openssl.sh

-# (optional) Install TensorRT
-ARG TENSORRT_VERSION
-ADD ./common/install_tensorrt.sh install_tensorrt.sh
-RUN if [ -n "${TENSORRT_VERSION}" ]; then bash ./install_tensorrt.sh; fi
-RUN rm install_tensorrt.sh
-
 # (optional) Install non-default CMake version
 ARG CMAKE_VERSION
 ADD ./common/install_cmake.sh install_cmake.sh
@ -81,7 +75,7 @@ RUN rm install_cmake.sh
 ADD ./common/install_cache.sh install_cache.sh
 ENV PATH /opt/cache/bin:$PATH
 RUN bash ./install_cache.sh && rm install_cache.sh
-ENV CMAKE_CUDA_COMPILER_LAUNCHER=/opt/cache/bin/sccache
+ENV CUDA_NVCC_EXECUTABLE=/opt/cache/lib/nvcc

 # Add jni.h for java host build
 ADD ./common/install_jni.sh install_jni.sh
@ -100,7 +94,6 @@ ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
 # AWS specific CUDA build guidance
 ENV TORCH_CUDA_ARCH_LIST Maxwell
 ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all"
-ENV CUDA_PATH /usr/local/cuda

 # Install LLVM dev version (Defined in the pytorch/builder github repository)
 COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
--- a/.circleci/docker/ubuntu-rocm/Dockerfile
+++ b/.circleci/docker/ubuntu-rocm/Dockerfile
@ -6,10 +6,6 @@ ARG UBUNTU_VERSION

 ENV DEBIAN_FRONTEND noninteractive

-# Set AMD gpu targets to build for
-ARG PYTORCH_ROCM_ARCH
-ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
-
 # Install common dependencies (so that this step can be cached separately)
 ARG EC2
 ADD ./common/install_base.sh install_base.sh
@ -25,7 +21,7 @@ RUN bash ./install_clang.sh && rm install_clang.sh
 ADD ./common/install_user.sh install_user.sh
 RUN bash ./install_user.sh && rm install_user.sh

-# Install conda and other packages (e.g., numpy, pytest)
+# Install conda and other packages (e.g., numpy, coverage, pytest)
 ENV PATH /opt/conda/bin:$PATH
 ARG ANACONDA_PYTHON_VERSION
 ADD ./common/install_conda.sh install_conda.sh
--- a/.circleci/docker/ubuntu/Dockerfile
+++ b/.circleci/docker/ubuntu/Dockerfile
@ -33,7 +33,7 @@ ARG KATEX
 ADD ./common/install_katex.sh install_katex.sh
 RUN bash ./install_katex.sh && rm install_katex.sh

-# Install conda and other packages (e.g., numpy, pytest)
+# Install conda and other packages (e.g., numpy, coverage, pytest)
 ENV PATH /opt/conda/bin:$PATH
 ARG ANACONDA_PYTHON_VERSION
 ADD ./common/install_conda.sh install_conda.sh
--- a/.circleci/ecr_gc_docker/Dockerfile
+++ b/.circleci/ecr_gc_docker/Dockerfile
@ -0,0 +1,13 @@
+FROM ubuntu:18.04
+
+RUN apt-get update && apt-get install -y python3-pip git && rm -rf /var/lib/apt/lists/* /var/log/dpkg.log
+
+ADD requirements.txt /requirements.txt
+
+RUN pip3 install -r /requirements.txt
+
+ADD gc.py /usr/bin/gc.py
+
+ADD docker_hub.py /usr/bin/docker_hub.py
+
+ENTRYPOINT ["/usr/bin/gc.py"]
--- a/.circleci/ecr_gc_docker/docker_hub.py
+++ b/.circleci/ecr_gc_docker/docker_hub.py
@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+
+from collections import namedtuple
+
+import boto3
+import requests
+import os
+
+
+IMAGE_INFO = namedtuple(
+    "IMAGE_INFO", ("repo", "tag", "size", "last_updated_at", "last_updated_by")
+)
+
+
+def build_access_token(username, passwordtr):
+    r = requests.post(
+        "https://hub.docker.com/v2/users/login/",
+        data={"username": username, "password": password},
+    )
+    r.raise_for_status()
+    token = r.json().get("token")
+    return {"Authorization": "JWT " + token}
+
+
+def list_repos(user, token):
+    r = requests.get("https://hub.docker.com/v2/repositories/" + user, headers=token)
+    r.raise_for_status()
+    ret = sorted(
+        repo["user"] + "/" + repo["name"] for repo in r.json().get("results", [])
+    )
+    if ret:
+        print("repos found:")
+        print("".join("\n\t" + r for r in ret))
+    return ret
+
+
+def list_tags(repo, token):
+    r = requests.get(
+        "https://hub.docker.com/v2/repositories/" + repo + "/tags", headers=token
+    )
+    r.raise_for_status()
+    return [
+        IMAGE_INFO(
+            repo=repo,
+            tag=t["name"],
+            size=t["full_size"],
+            last_updated_at=t["last_updated"],
+            last_updated_by=t["last_updater_username"],
+        )
+        for t in r.json().get("results", [])
+    ]
+
+
+def save_to_s3(tags):
+    table_content = ""
+    client = boto3.client("s3")
+    for t in tags:
+        table_content += (
+            "<tr><td>{repo}</td><td>{tag}</td><td>{size}</td>"
+            "<td>{last_updated_at}</td><td>{last_updated_by}</td></tr>"
+        ).format(
+            repo=t.repo,
+            tag=t.tag,
+            size=t.size,
+            last_updated_at=t.last_updated_at,
+            last_updated_by=t.last_updated_by,
+        )
+    html_body = """
+    <html>
+        <head>
+            <link rel="stylesheet"
+                href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css"
+                integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh"
+                crossorigin="anonymous">
+            <link rel="stylesheet" type="text/css"
+                href="https://cdn.datatables.net/1.10.20/css/jquery.dataTables.css">
+            <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js">
+            </script>
+            <script type="text/javascript" charset="utf8"
+                src="https://cdn.datatables.net/1.10.20/js/jquery.dataTables.js"></script>
+            <title> docker image info</title>
+        </head>
+        <body>
+            <table class="table table-striped table-hover" id="docker">
+            <caption>Docker images on docker hub</caption>
+            <thead class="thead-dark">
+                <tr>
+                <th scope="col">repo</th>
+                <th scope="col">tag</th>
+                <th scope="col">size</th>
+                <th scope="col">last_updated_at</th>
+                <th scope="col">last_updated_by</th>
+                </tr>
+            </thead>
+            <tbody>
+                {table_content}
+            </tbody>
+            </table>
+        </body>
+        <script>
+            $(document).ready( function () {{
+                $('#docker').DataTable({{paging: false}});
+            }} );py
+        </script>
+    </html>
+    """.format(
+        table_content=table_content
+    )
+    client.put_object(
+        Bucket="docker.pytorch.org",
+        ACL="public-read",
+        Key="docker_hub.html",
+        Body=html_body,
+        ContentType="text/html",
+    )
+
+
+if __name__ == "__main__":
+    username = os.environ.get("DOCKER_HUB_USERNAME")
+    password = os.environ.get("DOCKER_HUB_PASSWORD")
+    token = build_access_token(username, password)
+    tags = []
+    for repo in list_repos("pytorch", token):
+        tags.extend(list_tags(repo, token))
+    save_to_s3(tags)
--- a/.circleci/ecr_gc_docker/gc.py
+++ b/.circleci/ecr_gc_docker/gc.py
@ -0,0 +1,218 @@
+#!/usr/bin/env python3
+
+import argparse
+import boto3
+import datetime
+import pytz
+import re
+import sys
+
+
+def save_to_s3(project, data):
+    table_content = ""
+    client = boto3.client("s3")
+    for repo, tag, window, age, pushed in data:
+        table_content += "<tr><td>{repo}</td><td>{tag}</td><td>{window}</td><td>{age}</td><td>{pushed}</td></tr>".format(
+            repo=repo, tag=tag, window=window, age=age, pushed=pushed
+        )
+    html_body = """
+    <html>
+        <head>
+            <link rel="stylesheet"
+                href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css"
+                integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh"
+                crossorigin="anonymous">
+            <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.10.20/css/jquery.dataTables.css">
+            <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js"></script>
+            <script type="text/javascript" charset="utf8" src="https://cdn.datatables.net/1.10.20/js/jquery.dataTables.js"></script>
+            <title>{project} nightly and permanent docker image info</title>
+        </head>
+        <body>
+            <table class="table table-striped table-hover" id="docker">
+            <thead class="thead-dark">
+                <tr>
+                <th scope="col">repo</th>
+                <th scope="col">tag</th>
+                <th scope="col">keep window</th>
+                <th scope="col">age</th>
+                <th scope="col">pushed at</th>
+                </tr>
+            </thead>
+            <tbody>
+                {table_content}
+            </tbody>
+            </table>
+        </body>
+        <script>
+            $(document).ready( function () {{
+                $('#docker').DataTable({{paging: false}});
+            }} );
+        </script>
+    </html>
+    """.format(
+        project=project, table_content=table_content
+    )
+
+    # for pytorch, file can be found at
+    # http://ossci-docker.s3-website.us-east-1.amazonaws.com/pytorch.html
+    # and later one we can config docker.pytorch.org to point to the location
+
+    client.put_object(
+        Bucket="docker.pytorch.org",
+        ACL="public-read",
+        Key="{project}.html".format(project=project),
+        Body=html_body,
+        ContentType="text/html",
+    )
+
+
+def repos(client):
+    paginator = client.get_paginator("describe_repositories")
+    pages = paginator.paginate(registryId="308535385114")
+    for page in pages:
+        for repo in page["repositories"]:
+            yield repo
+
+
+def images(client, repository):
+    paginator = client.get_paginator("describe_images")
+    pages = paginator.paginate(
+        registryId="308535385114", repositoryName=repository["repositoryName"]
+    )
+    for page in pages:
+        for image in page["imageDetails"]:
+            yield image
+
+
+parser = argparse.ArgumentParser(description="Delete old Docker tags from registry")
+parser.add_argument(
+    "--dry-run", action="store_true", help="Dry run; print tags that would be deleted"
+)
+parser.add_argument(
+    "--debug", action="store_true", help="Debug, print ignored / saved tags"
+)
+parser.add_argument(
+    "--keep-stable-days",
+    type=int,
+    default=14,
+    help="Days of stable Docker tags to keep (non per-build images)",
+)
+parser.add_argument(
+    "--keep-unstable-days",
+    type=int,
+    default=1,
+    help="Days of unstable Docker tags to keep (per-build images)",
+)
+parser.add_argument(
+    "--filter-prefix",
+    type=str,
+    default="",
+    help="Only run cleanup for repositories with this prefix",
+)
+parser.add_argument(
+    "--ignore-tags",
+    type=str,
+    default="",
+    help="Never cleanup these tags (comma separated)",
+)
+args = parser.parse_args()
+
+if not args.ignore_tags or not args.filter_prefix:
+    print(
+        """
+Missing required arguments --ignore-tags and --filter-prefix
+
+You must specify --ignore-tags and --filter-prefix to avoid accidentally
+pruning a stable Docker tag which is being actively used.  This will
+make you VERY SAD.  So pay attention.
+
+First, which filter-prefix do you want?  The list of valid prefixes
+is in jobs/private.groovy under the 'docker-registry-cleanup' job.
+You probably want either pytorch or caffe2.
+
+Second, which ignore-tags do you want?  It should be whatever the most
+up-to-date DockerVersion for the repository in question is.  Follow
+the imports of jobs/pytorch.groovy to find them.
+"""
+    )
+    sys.exit(1)
+
+client = boto3.client("ecr", region_name="us-east-1")
+stable_window = datetime.timedelta(days=args.keep_stable_days)
+unstable_window = datetime.timedelta(days=args.keep_unstable_days)
+now = datetime.datetime.now(pytz.UTC)
+ignore_tags = args.ignore_tags.split(",")
+
+
+def chunks(chunkable, n):
+    """ Yield successive n-sized chunks from l.
+    """
+    for i in range(0, len(chunkable), n):
+        yield chunkable[i: i + n]
+
+
+SHA_PATTERN = re.compile(r'^[0-9a-f]{40}$')
+
+
+def looks_like_git_sha(tag):
+    """Returns a boolean to check if a tag looks like a git sha
+
+    For reference a sha1 is 40 characters with only 0-9a-f and contains no
+    "-" characters
+    """
+    return re.match(SHA_PATTERN, tag) is not None
+
+
+stable_window_tags = []
+for repo in repos(client):
+    repositoryName = repo["repositoryName"]
+    if not repositoryName.startswith(args.filter_prefix):
+        continue
+
+    # Keep list of image digests to delete for this repository
+    digest_to_delete = []
+
+    for image in images(client, repo):
+        tags = image.get("imageTags")
+        if not isinstance(tags, (list,)) or len(tags) == 0:
+            continue
+        created = image["imagePushedAt"]
+        age = now - created
+        for tag in tags:
+            if any([
+                    looks_like_git_sha(tag),
+                    tag.isdigit(),
+                    tag.count("-") == 4,  # TODO: Remove, this no longer applies as tags are now built using a SHA1
+                    tag in ignore_tags]):
+                window = stable_window
+                if tag in ignore_tags:
+                    stable_window_tags.append((repositoryName, tag, "", age, created))
+                elif age < window:
+                    stable_window_tags.append((repositoryName, tag, window, age, created))
+            else:
+                window = unstable_window
+
+            if tag in ignore_tags or age < window:
+                if args.debug:
+                    print("Ignoring {}:{} (age: {})".format(repositoryName, tag, age))
+                break
+        else:
+            for tag in tags:
+                print("{}Deleting {}:{} (age: {})".format("(dry run) " if args.dry_run else "", repositoryName, tag, age))
+            digest_to_delete.append(image["imageDigest"])
+    if args.dry_run:
+        if args.debug:
+            print("Skipping actual deletion, moving on...")
+    else:
+        # Issue batch delete for all images to delete for this repository
+        # Note that as of 2018-07-25, the maximum number of images you can
+        # delete in a single batch is 100, so chunk our list into batches of
+        # 100
+        for c in chunks(digest_to_delete, 100):
+            client.batch_delete_image(
+                registryId="308535385114",
+                repositoryName=repositoryName,
+                imageIds=[{"imageDigest": digest} for digest in c],
+            )
+
+        save_to_s3(args.filter_prefix, stable_window_tags)
--- a/.circleci/ecr_gc_docker/requirements.txt
+++ b/.circleci/ecr_gc_docker/requirements.txt
@ -0,0 +1,3 @@
+boto3
+pytz
+requests
--- a/.circleci/generate_config_yml.py
+++ b/.circleci/generate_config_yml.py
@ -11,13 +11,17 @@ import sys
 from collections import namedtuple

 import cimodel.data.binary_build_definitions as binary_build_definitions
+import cimodel.data.pytorch_build_definitions as pytorch_build_definitions
 import cimodel.data.simple.android_definitions
 import cimodel.data.simple.binary_smoketest
 import cimodel.data.simple.docker_definitions
+import cimodel.data.simple.ios_definitions
+import cimodel.data.simple.macos_definitions
 import cimodel.data.simple.mobile_definitions
 import cimodel.data.simple.nightly_android
 import cimodel.data.simple.nightly_ios
 import cimodel.data.simple.anaconda_prune_defintions
+import cimodel.data.windows_build_definitions as windows_build_definitions
 import cimodel.lib.miniutils as miniutils
 import cimodel.lib.miniyaml as miniyaml

@ -74,15 +78,15 @@ class Header(object):
        for line in filter(None, lines):
            output_filehandle.write(line + "\n")

-def _for_all_items(items, functor) -> None:
-    if isinstance(items, list):
-        for item in items:
-            _for_all_items(item, functor)
-    if isinstance(items, dict) and len(items) == 1:
-        item_type, item = next(iter(items.items()))
-        functor(item_type, item)
-
 def filter_master_only_jobs(items):
+    def _for_all_items(items, functor) -> None:
+        if isinstance(items, list):
+            for item in items:
+                _for_all_items(item, functor)
+        if isinstance(items, dict) and len(items) == 1:
+            item_type, item = next(iter(items.items()))
+            functor(item_type, item)
+
    def _is_master_item(item):
        filters = item.get('filters', None)
        branches = filters.get('branches', None) if filters is not None else None
@ -120,39 +124,24 @@ def filter_master_only_jobs(items):
    _for_all_items(items, _save_requires_if_master)
    return _do_filtering(items)

-def generate_required_docker_images(items):
-    required_docker_images = set()
-
-    def _requires_docker_image(item_type, item):
-        requires = item.get('requires', None)
-        if not isinstance(requires, list):
-            return
-        for requirement in requires:
-            requirement = requirement.replace('"', '')
-            if requirement.startswith('docker-'):
-                required_docker_images.add(requirement)
-
-    _for_all_items(items, _requires_docker_image)
-    return required_docker_images

 def gen_build_workflows_tree():
    build_workflows_functions = [
+        cimodel.data.simple.docker_definitions.get_workflow_jobs,
+        pytorch_build_definitions.get_workflow_jobs,
+        cimodel.data.simple.macos_definitions.get_workflow_jobs,
        cimodel.data.simple.android_definitions.get_workflow_jobs,
+        cimodel.data.simple.ios_definitions.get_workflow_jobs,
        cimodel.data.simple.mobile_definitions.get_workflow_jobs,
        cimodel.data.simple.binary_smoketest.get_workflow_jobs,
        cimodel.data.simple.nightly_ios.get_workflow_jobs,
        cimodel.data.simple.nightly_android.get_workflow_jobs,
        cimodel.data.simple.anaconda_prune_defintions.get_workflow_jobs,
+        windows_build_definitions.get_windows_workflows,
        binary_build_definitions.get_post_upload_jobs,
        binary_build_definitions.get_binary_smoke_test_jobs,
    ]
    build_jobs = [f() for f in build_workflows_functions]
-    build_jobs.extend(
-        cimodel.data.simple.docker_definitions.get_workflow_jobs(
-            # sort for consistency
-            sorted(generate_required_docker_images(build_jobs))
-        )
-    )
    master_build_jobs = filter_master_only_jobs(build_jobs)

    binary_build_functions = [
@ -161,6 +150,11 @@ def gen_build_workflows_tree():
        binary_build_definitions.get_nightly_uploads,
    ]

+    slow_gradcheck_jobs = [
+        pytorch_build_definitions.get_workflow_jobs,
+        cimodel.data.simple.docker_definitions.get_workflow_jobs,
+    ]
+
    return {
        "workflows": {
            "binary_builds": {
@ -175,6 +169,10 @@ def gen_build_workflows_tree():
                "when": r"<< pipeline.parameters.run_master_build >>",
                "jobs": master_build_jobs,
            },
+            "slow_gradcheck_build": {
+                "when": r"<< pipeline.parameters.run_slow_gradcheck_build >>",
+                "jobs": [f(only_slow_gradcheck=True) for f in slow_gradcheck_jobs],
+            },
        }
    }

@ -198,6 +196,8 @@ YAML_SOURCES = [
    File("job-specs/docker_jobs.yml"),
    Header("Workflows"),
    Treegen(gen_build_workflows_tree, 0),
+    File("workflows/workflows-scheduled-ci.yml"),
+    File("workflows/workflows-ecr-gc.yml"),
    File("workflows/workflows-promote.yml"),
 ]

--- a/.circleci/scripts/binary_checkout.sh
+++ b/.circleci/scripts/binary_checkout.sh
@ -61,7 +61,7 @@ git --no-pager log --max-count 1
 popd

 # Clone the Builder master repo
-retry git clone -q https://github.com/pytorch/builder.git -b release/1.11 "$BUILDER_ROOT"
+retry git clone -q https://github.com/pytorch/builder.git -b release/1.10 "$BUILDER_ROOT"
 pushd "$BUILDER_ROOT"
 echo "Using builder from "
 git --no-pager log --max-count 1
--- a/.circleci/scripts/binary_ios_test.sh
+++ b/.circleci/scripts/binary_ios_test.sh
@ -27,4 +27,4 @@ if ! [ -x "$(command -v xcodebuild)" ]; then
    exit 1
 fi
 PROFILE=PyTorch_CI_2022
-ruby ${PROJ_ROOT}/scripts/xcode_build.rb -i ${PROJ_ROOT}/build_ios/install -x ${PROJ_ROOT}/ios/TestApp/TestApp.xcodeproj -p ${IOS_PLATFORM} -c ${PROFILE} -t ${IOS_DEV_TEAM_ID}
+ruby ${PROJ_ROOT}/scripts/xcode_build.rb -i ${PROJ_ROOT}/build_ios/install -x ${PROJ_ROOT}/ios/TestApp/TestApp.xcodeproj -p ${IOS_PLATFORM} -c ${PROFILE} -t ${IOS_DEV_TEAM_ID} -f Accelerate,MetalPerformanceShaders,CoreML
--- a/.circleci/scripts/binary_ios_upload.sh
+++ b/.circleci/scripts/binary_ios_upload.sh
@ -23,23 +23,14 @@ do
    fi
 done
 lipo -i ${ZIP_DIR}/install/lib/*.a
-echo "BUILD_LITE_INTERPRETER: ${BUILD_LITE_INTERPRETER}"
 # copy the umbrella header and license
-if [ "${BUILD_LITE_INTERPRETER}" == "1" ]; then
-    cp ${PROJ_ROOT}/ios/LibTorch-Lite.h ${ZIP_DIR}/src/
-else
-    cp ${PROJ_ROOT}/ios/LibTorch.h ${ZIP_DIR}/src/
-fi
+cp ${PROJ_ROOT}/ios/LibTorch-Lite.h ${ZIP_DIR}/src/
 cp ${PROJ_ROOT}/LICENSE ${ZIP_DIR}/
 # zip the library
 export DATE="$(date -u +%Y%m%d)"
-export IOS_NIGHTLY_BUILD_VERSION="1.11.0.${DATE}"
-if [ "${BUILD_LITE_INTERPRETER}" == "1" ]; then
-    # libtorch_lite_ios_nightly_1.11.0.20210810.zip
-    ZIPFILE="libtorch_lite_ios_nightly_${IOS_NIGHTLY_BUILD_VERSION}.zip"
-else
-    ZIPFILE="libtorch_ios_nightly_build.zip"
-fi
+export IOS_NIGHTLY_BUILD_VERSION="1.10.0.${DATE}"
+# libtorch_lite_ios_nightly_1.10.0.20210810.zip
+ZIPFILE="libtorch_lite_ios_nightly_${IOS_NIGHTLY_BUILD_VERSION}.zip"
 cd ${ZIP_DIR}
 #for testing
 touch version.txt
@ -61,15 +52,13 @@ set +x
 # echo "AWS SECRET: ${AWS_SECRET_ACCESS_KEY}"
 aws s3 cp ${ZIPFILE} s3://ossci-ios-build/ --acl public-read

-if [ "${BUILD_LITE_INTERPRETER}" == "1" ]; then
-    # create a new LibTorch-Lite-Nightly.podspec from the template
-    echo "cp ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec.template ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec"
-    cp ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec.template ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec
+# create a new LibTorch-Lite-Nightly.podspec from the template
+echo "cp ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec.template ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec"
+cp ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec.template ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec

-    # update pod version
-    sed -i '' -e "s/IOS_NIGHTLY_BUILD_VERSION/${IOS_NIGHTLY_BUILD_VERSION}/g" ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec
-    cat ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec
+# update pod version
+sed -i '' -e "s/IOS_NIGHTLY_BUILD_VERSION/${IOS_NIGHTLY_BUILD_VERSION}/g" ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec
+cat ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec

-    # push the new LibTorch-Lite-Nightly.podspec to CocoaPods
-    pod trunk push --verbose --allow-warnings --use-libraries --skip-import-validation ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec
-fi
+# push the new LibTorch-Lite-Nightly.podspec to CocoaPods
+pod trunk push --verbose --allow-warnings --use-libraries --skip-import-validation ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec
--- a/.circleci/scripts/binary_linux_build.sh
+++ b/.circleci/scripts/binary_linux_build.sh
@ -11,7 +11,7 @@ NUM_CPUS=$(( $(nproc) - 2 ))
 # Defaults here for **binary** linux builds so they can be changed in one place
 export MAX_JOBS=${MAX_JOBS:-$(( ${NUM_CPUS} > ${MEMORY_LIMIT_MAX_JOBS} ? ${MEMORY_LIMIT_MAX_JOBS} : ${NUM_CPUS} ))}

-if [[ "${DESIRED_CUDA}" =~ cu11[0-9] ]]; then
+if [[ "${DESIRED_CUDA}" == "cu111" || "${DESIRED_CUDA}" == "cu113" ]]; then
  export BUILD_SPLIT_CUDA="ON"
 fi

--- a/.circleci/scripts/binary_linux_test.sh
+++ b/.circleci/scripts/binary_linux_test.sh
@ -1,24 +1,10 @@
 #!/bin/bash

-OUTPUT_SCRIPT=${OUTPUT_SCRIPT:-/home/circleci/project/ci_test_script.sh}
-
-# only source if file exists
-if [[ -f /home/circleci/project/env ]]; then
-  source /home/circleci/project/env
-fi
-cat >"${OUTPUT_SCRIPT}" <<EOL
+source /home/circleci/project/env
+cat >/home/circleci/project/ci_test_script.sh <<EOL
 # =================== The following code will be executed inside Docker container ===================
 set -eux -o pipefail

-retry () {
-    "\$@"  || (sleep 1 && "\$@") || (sleep 2 && "\$@")
-}
-
-# Source binary env file here if exists
-if [[ -e "${BINARY_ENV_FILE:-/nofile}" ]]; then
-  source "${BINARY_ENV_FILE:-/nofile}"
-fi
-
 python_nodot="\$(echo $DESIRED_PYTHON | tr -d m.u)"

 # Set up Python
@ -37,23 +23,14 @@ fi

 EXTRA_CONDA_FLAGS=""
 NUMPY_PIN=""
-PROTOBUF_PACKAGE="defaults::protobuf"
-if [[ "\$python_nodot" = *310* ]]; then
-  EXTRA_CONDA_FLAGS="-c=conda-forge"
-  # There's an issue with conda channel priority where it'll randomly pick 1.19 over 1.20
-  # we set a lower boundary here just to be safe
-  NUMPY_PIN=">=1.21.2"
-  PROTOBUF_PACKAGE="protobuf>=3.19.0"
-fi
-
-if [[ "\$python_nodot" = *39*  ]]; then
+if [[ "\$python_nodot" = *39* ]]; then
  EXTRA_CONDA_FLAGS="-c=conda-forge"
  # There's an issue with conda channel priority where it'll randomly pick 1.19 over 1.20
  # we set a lower boundary here just to be safe
  NUMPY_PIN=">=1.20"
 fi

-if [[ "$DESIRED_CUDA" == "cu112" || "$DESIRED_CUDA" == "cu115" ]]; then
+if [[ "$DESIRED_CUDA" == "cu112" ]]; then
  EXTRA_CONDA_FLAGS="-c=conda-forge"
 fi

@ -82,7 +59,7 @@ if [[ "$PACKAGE_TYPE" == conda ]]; then
      ninja \
      dataclasses \
      typing-extensions \
-      ${PROTOBUF_PACKAGE} \
+      defaults::protobuf \
      six
    if [[ "$DESIRED_CUDA" == 'cpu' ]]; then
      retry conda install -c pytorch -y cpuonly
@ -115,4 +92,4 @@ EOL
 echo
 echo
 echo "The script that will run in the next step is:"
-cat "${OUTPUT_SCRIPT}"
+cat /home/circleci/project/ci_test_script.sh
--- a/.circleci/scripts/binary_populate_env.sh
+++ b/.circleci/scripts/binary_populate_env.sh
@ -5,70 +5,53 @@ export TZ=UTC
 tagged_version() {
  # Grabs version from either the env variable CIRCLE_TAG
  # or the pytorch git described version
-  if [[ "$OSTYPE" == "msys" &&  -z "${IS_GHA:-}" ]]; then
-    GIT_DIR="${workdir}/p/.git"
+  if [[ "$OSTYPE" == "msys" ]]; then
+    GIT_DESCRIBE="git --git-dir ${workdir}/p/.git describe"
  else
-    GIT_DIR="${workdir}/pytorch/.git"
+    GIT_DESCRIBE="git --git-dir ${workdir}/pytorch/.git describe"
  fi
-  GIT_DESCRIBE="git --git-dir ${GIT_DIR} describe --tags --match v[0-9]*.[0-9]*.[0-9]*"
  if [[ -n "${CIRCLE_TAG:-}" ]]; then
    echo "${CIRCLE_TAG}"
-  elif [[ ! -d "${GIT_DIR}" ]]; then
-    echo "Abort, abort! Git dir ${GIT_DIR} does not exists!"
-    kill $$
-  elif ${GIT_DESCRIBE} --exact >/dev/null; then
-    ${GIT_DESCRIBE}
+  elif ${GIT_DESCRIBE} --exact --tags >/dev/null; then
+    ${GIT_DESCRIBE} --tags
  else
    return 1
  fi
 }

-# These are only relevant for CircleCI
-# TODO: Remove these later once migrated fully to GHA
-if [[ -z ${IS_GHA:-} ]]; then
-  # We need to write an envfile to persist these variables to following
-  # steps, but the location of the envfile depends on the circleci executor
-  if [[ "$(uname)" == Darwin ]]; then
-    # macos executor (builds and tests)
-    workdir="/Users/distiller/project"
-  elif [[ "$OSTYPE" == "msys" ]]; then
-    # windows executor (builds and tests)
-    workdir="/c/w"
-  elif [[ -d "/home/circleci/project" ]]; then
-    # machine executor (binary tests)
-    workdir="/home/circleci/project"
-  else
-    # docker executor (binary builds)
-    workdir="/"
-  fi
-  envfile="$workdir/env"
-  touch "$envfile"
-  chmod +x "$envfile"
+# We need to write an envfile to persist these variables to following
+# steps, but the location of the envfile depends on the circleci executor
+if [[ "$(uname)" == Darwin ]]; then
+  # macos executor (builds and tests)
+  workdir="/Users/distiller/project"
+elif [[ "$OSTYPE" == "msys" ]]; then
+  # windows executor (builds and tests)
+  workdir="/c/w"
+elif [[ -d "/home/circleci/project" ]]; then
+  # machine executor (binary tests)
+  workdir="/home/circleci/project"
+else
+  # docker executor (binary builds)
+  workdir="/"
+fi
+envfile="$workdir/env"
+touch "$envfile"
+chmod +x "$envfile"

-  # Parse the BUILD_ENVIRONMENT to package type, python, and cuda
-  configs=($BUILD_ENVIRONMENT)
-  export PACKAGE_TYPE="${configs[0]}"
-  export DESIRED_PYTHON="${configs[1]}"
-  export DESIRED_CUDA="${configs[2]}"
-  if [[ "${OSTYPE}" == "msys" ]]; then
-    export DESIRED_DEVTOOLSET=""
-    export LIBTORCH_CONFIG="${configs[3]:-}"
-    if [[ "$LIBTORCH_CONFIG" == 'debug' ]]; then
-      export DEBUG=1
-    fi
-  else
-    export DESIRED_DEVTOOLSET="${configs[3]:-}"
+# Parse the BUILD_ENVIRONMENT to package type, python, and cuda
+configs=($BUILD_ENVIRONMENT)
+export PACKAGE_TYPE="${configs[0]}"
+export DESIRED_PYTHON="${configs[1]}"
+export DESIRED_CUDA="${configs[2]}"
+if [[ "${BUILD_FOR_SYSTEM:-}" == "windows" ]]; then
+  export DESIRED_DEVTOOLSET=""
+  export LIBTORCH_CONFIG="${configs[3]:-}"
+  if [[ "$LIBTORCH_CONFIG" == 'debug' ]]; then
+    export DEBUG=1
  fi
 else
-  envfile=${BINARY_ENV_FILE:-/tmp/env}
-  if [[ -n "${PYTORCH_ROOT}"  ]]; then
-    workdir=$(dirname "${PYTORCH_ROOT}")
-  else
-    # docker executor (binary builds)
-    workdir="/"
-  fi
+  export DESIRED_DEVTOOLSET="${configs[3]:-}"
 fi
-
 if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
  export BUILD_PYTHONLESS=1
 fi
@ -102,7 +85,7 @@ PIP_UPLOAD_FOLDER='nightly/'
 # We put this here so that OVERRIDE_PACKAGE_VERSION below can read from it
 export DATE="$(date -u +%Y%m%d)"
 #TODO: We should be pulling semver version from the base version.txt
-BASE_BUILD_VERSION="1.11.0.dev$DATE"
+BASE_BUILD_VERSION="1.10.0.dev$DATE"
 # Change BASE_BUILD_VERSION to git tag when on a git tag
 # Use 'git -C' to make doubly sure we're in the correct directory for checking
 # the git tag
@ -148,28 +131,24 @@ if [[ "$PACKAGE_TYPE" == libtorch ]]; then
  fi
 fi

-cat >"$envfile" <<EOL
+cat >>"$envfile" <<EOL
 # =================== The following code will be executed inside Docker container ===================
 export TZ=UTC
 echo "Running on $(uname -a) at $(date)"

 export PACKAGE_TYPE="$PACKAGE_TYPE"
-export DESIRED_PYTHON="${DESIRED_PYTHON:-}"
+export DESIRED_PYTHON="$DESIRED_PYTHON"
 export DESIRED_CUDA="$DESIRED_CUDA"
 export LIBTORCH_VARIANT="${LIBTORCH_VARIANT:-}"
 export BUILD_PYTHONLESS="${BUILD_PYTHONLESS:-}"
-if [[ "${OSTYPE}" == "msys" ]]; then
+export DESIRED_DEVTOOLSET="$DESIRED_DEVTOOLSET"
+if [[ "${BUILD_FOR_SYSTEM:-}" == "windows" ]]; then
  export LIBTORCH_CONFIG="${LIBTORCH_CONFIG:-}"
-  if [[ "${LIBTORCH_CONFIG:-}" == 'debug' ]]; then
-    export DEBUG=1
-  fi
-  export DESIRED_DEVTOOLSET=""
-else
-  export DESIRED_DEVTOOLSET="${DESIRED_DEVTOOLSET:-}"
+  export DEBUG="${DEBUG:-}"
 fi

 export DATE="$DATE"
-export NIGHTLIES_DATE_PREAMBLE=1.11.0.dev
+export NIGHTLIES_DATE_PREAMBLE=1.10.0.dev
 export PYTORCH_BUILD_VERSION="$PYTORCH_BUILD_VERSION"
 export PYTORCH_BUILD_NUMBER="$PYTORCH_BUILD_NUMBER"
 export OVERRIDE_PACKAGE_VERSION="$PYTORCH_BUILD_VERSION"
@ -177,7 +156,6 @@ export OVERRIDE_PACKAGE_VERSION="$PYTORCH_BUILD_VERSION"
 # TODO: We don't need this anymore IIUC
 export TORCH_PACKAGE_NAME='torch'
 export TORCH_CONDA_BUILD_FOLDER='pytorch-nightly'
-export ANACONDA_USER='pytorch'

 export USE_FBGEMM=1
 export JAVA_HOME=$JAVA_HOME
@ -185,6 +163,23 @@ export BUILD_JNI=$BUILD_JNI
 export PIP_UPLOAD_FOLDER="$PIP_UPLOAD_FOLDER"
 export DOCKER_IMAGE="$DOCKER_IMAGE"

+export workdir="$workdir"
+export MAC_PACKAGE_WORK_DIR="$workdir"
+if [[ "$OSTYPE" == "msys" ]]; then
+  export PYTORCH_ROOT="$workdir/p"
+  export BUILDER_ROOT="$workdir/b"
+else
+  export PYTORCH_ROOT="$workdir/pytorch"
+  export BUILDER_ROOT="$workdir/builder"
+fi
+export MINICONDA_ROOT="$workdir/miniconda"
+export PYTORCH_FINAL_PACKAGE_DIR="$workdir/final_pkgs"
+
+export CIRCLE_TAG="${CIRCLE_TAG:-}"
+export CIRCLE_SHA1="$CIRCLE_SHA1"
+export CIRCLE_PR_NUMBER="${CIRCLE_PR_NUMBER:-}"
+export CIRCLE_BRANCH="$CIRCLE_BRANCH"
+export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"

 export USE_GOLD_LINKER="${USE_GOLD_LINKER}"
 export USE_GLOO_WITH_OPENSSL="ON"
@ -192,42 +187,6 @@ export USE_WHOLE_CUDNN="${USE_WHOLE_CUDNN}"
 # =================== The above code will be executed inside Docker container ===================
 EOL

-# nproc doesn't exist on darwin
-if [[ "$(uname)" != Darwin ]]; then
-  # Because most Circle executors only have 20 CPUs, using more causes OOMs w/ Ninja and nvcc parallelization
-  MEMORY_LIMIT_MAX_JOBS=18
-  NUM_CPUS=$(( $(nproc) - 2 ))
-
-  # Defaults here for **binary** linux builds so they can be changed in one place
-  export MAX_JOBS=${MAX_JOBS:-$(( ${NUM_CPUS} > ${MEMORY_LIMIT_MAX_JOBS} ? ${MEMORY_LIMIT_MAX_JOBS} : ${NUM_CPUS} ))}
-
-  cat >>"$envfile" <<EOL
-  export MAX_JOBS="${MAX_JOBS}"
-EOL
-fi
-
-if [[ -z "${IS_GHA:-}" ]]; then
-  cat >>"$envfile" <<EOL
-  export workdir="$workdir"
-  export MAC_PACKAGE_WORK_DIR="$workdir"
-  if [[ "$OSTYPE" == "msys" ]]; then
-    export PYTORCH_ROOT="$workdir/p"
-    export BUILDER_ROOT="$workdir/b"
-  else
-    export PYTORCH_ROOT="$workdir/pytorch"
-    export BUILDER_ROOT="$workdir/builder"
-  fi
-  export MINICONDA_ROOT="$workdir/miniconda"
-  export PYTORCH_FINAL_PACKAGE_DIR="$workdir/final_pkgs"
-
-  export CIRCLE_TAG="${CIRCLE_TAG:-}"
-  export CIRCLE_SHA1="$CIRCLE_SHA1"
-  export CIRCLE_PR_NUMBER="${CIRCLE_PR_NUMBER:-}"
-  export CIRCLE_BRANCH="$CIRCLE_BRANCH"
-  export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"
-EOL
-fi
-
 echo 'retry () {' >> "$envfile"
 echo '    $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)' >> "$envfile"
 echo '}' >> "$envfile"
--- a/.circleci/scripts/binary_upload.sh
+++ b/.circleci/scripts/binary_upload.sh
@ -63,10 +63,6 @@ s3_upload() {
  )
 }

-# Install dependencies (should be a no-op if previously installed)
-conda install -yq anaconda-client
-pip install -q awscli
-
 case "${PACKAGE_TYPE}" in
  conda)
    conda_upload
--- a/.circleci/scripts/binary_windows_build.sh
+++ b/.circleci/scripts/binary_windows_build.sh
@ -1,7 +1,7 @@
 #!/bin/bash
 set -eux -o pipefail

-source "${BINARY_ENV_FILE:-/c/w/env}"
+source "/c/w/env"
 mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR"

 export CUDA_VERSION="${DESIRED_CUDA/cu/}"
@ -10,12 +10,12 @@ export SCCACHE_BUCKET=ossci-compiler-cache-windows
 export NIGHTLIES_PYTORCH_ROOT="$PYTORCH_ROOT"
 export VC_YEAR=2019

-if [[ "${DESIRED_CUDA}" == *"cu11"* ]]; then
-    export BUILD_SPLIT_CUDA=ON
+if [[ "${DESIRED_CUDA}" == "cu111" || "${DESIRED_CUDA}" == "cu113" ]]; then
+    export BUILD_SPLIT_CUDA="ON"
 fi

 echo "Free Space for CUDA DEBUG BUILD"
-if [[ "${CIRCLECI:-}" == 'true' ]]; then
+if [[ "$CIRCLECI" == 'true' ]]; then
    if [[ -d "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community" ]]; then
        rm -rf "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community"
    fi
@ -47,20 +47,23 @@ if [[ "${CIRCLECI:-}" == 'true' ]]; then
    if [[ -d "C:\\Program Files (x86)\\Google" ]]; then
        rm -rf "C:\\Program Files (x86)\\Google"
    fi
-    set +x
-    export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_S3_BUCKET_V4:-}
-    export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET_V4:-}
-    set -x
-    if [[ -d "C:\\ProgramData\\Microsoft\\VisualStudio\\Packages\\_Instances" ]]; then
-        mv "C:\\ProgramData\\Microsoft\\VisualStudio\\Packages\\_Instances" .
-        rm -rf "C:\\ProgramData\\Microsoft\\VisualStudio\\Packages"
-        mkdir -p "C:\\ProgramData\\Microsoft\\VisualStudio\\Packages"
-        mv _Instances "C:\\ProgramData\\Microsoft\\VisualStudio\\Packages"
-    fi
-    if [[ -d "C:\\Microsoft" ]]; then
-        # don't use quotes here
-        rm -rf /c/Microsoft/AndroidNDK*
-    fi
+fi
+
+set +x
+export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_S3_BUCKET_V4:-}
+export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET_V4:-}
+set -x
+
+if [[ "$CIRCLECI" == 'true' && -d "C:\\ProgramData\\Microsoft\\VisualStudio\\Packages\\_Instances" ]]; then
+  mv "C:\\ProgramData\\Microsoft\\VisualStudio\\Packages\\_Instances" .
+  rm -rf "C:\\ProgramData\\Microsoft\\VisualStudio\\Packages"
+  mkdir -p "C:\\ProgramData\\Microsoft\\VisualStudio\\Packages"
+  mv _Instances "C:\\ProgramData\\Microsoft\\VisualStudio\\Packages"
+fi
+
+if [[ "$CIRCLECI" == 'true' && -d "C:\\Microsoft" ]]; then
+  # don't use quotes here
+  rm -rf /c/Microsoft/AndroidNDK*
 fi

 echo "Free space on filesystem before build:"
@ -68,9 +71,9 @@ df -h

 pushd "$BUILDER_ROOT"
 if [[ "$PACKAGE_TYPE" == 'conda' ]]; then
-    ./windows/internal/build_conda.bat
+  ./windows/internal/build_conda.bat
 elif [[ "$PACKAGE_TYPE" == 'wheel' || "$PACKAGE_TYPE" == 'libtorch' ]]; then
-    ./windows/internal/build_wheels.bat
+  ./windows/internal/build_wheels.bat
 fi

 echo "Free space on filesystem after build:"
--- a/.circleci/scripts/binary_windows_test.sh
+++ b/.circleci/scripts/binary_windows_test.sh
@ -1,7 +1,7 @@
 #!/bin/bash
 set -eux -o pipefail

-source "${BINARY_ENV_FILE:-/c/w/env}"
+source "/c/w/env"

 export CUDA_VERSION="${DESIRED_CUDA/cu/}"
 export VC_YEAR=2019
--- a/.circleci/scripts/cpp_doc_push_script.sh
+++ b/.circleci/scripts/cpp_doc_push_script.sh
@ -65,6 +65,7 @@ cp torch/_utils_internal.py tools/shared

 # Generate PyTorch files
 time python tools/setup_helpers/generate_code.py \
+  --declarations-path build/aten/src/ATen/Declarations.yaml \
  --native-functions-path aten/src/ATen/native/native_functions.yaml \
  --nn-path aten/src/

@ -96,12 +97,8 @@ git status
 git config user.email "soumith+bot@pytorch.org"
 git config user.name "pytorchbot"
 # If there aren't changes, don't make a commit; push is no-op
-git commit -m "Generate C++ docs from pytorch/pytorch@${GITHUB_SHA}" || true
+git commit -m "Generate C++ docs from pytorch/pytorch@$CIRCLE_SHA1" || true
 git status

-if [[ "${WITH_PUSH:-}" == true ]]; then
-  git push -u origin
-fi
-
 popd
 # =================== The above code **should** be executed inside Docker container ===================
--- a/.circleci/scripts/python_doc_push_script.sh
+++ b/.circleci/scripts/python_doc_push_script.sh
@ -131,12 +131,8 @@ git status
 git config user.email "soumith+bot@pytorch.org"
 git config user.name "pytorchbot"
 # If there aren't changes, don't make a commit; push is no-op
-git commit -m "Generate Python docs from pytorch/pytorch@${GITHUB_SHA}" || true
+git commit -m "Generate Python docs from pytorch/pytorch@$CIRCLE_SHA1" || true
 git status

-if [[ "${WITH_PUSH:-}" == true ]]; then
-  git push -u origin "${branch}"
-fi
-
 popd
 # =================== The above code **should** be executed inside Docker container ===================
--- a/.circleci/scripts/setup_ci_environment.sh
+++ b/.circleci/scripts/setup_ci_environment.sh
@ -32,7 +32,7 @@ if ! command -v aws >/dev/null; then
 fi

 if [ -n "${USE_CUDA_DOCKER_RUNTIME:-}" ]; then
-  DRIVER_FN="NVIDIA-Linux-x86_64-495.44.run"
+  DRIVER_FN="NVIDIA-Linux-x86_64-460.39.run"
  wget "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN"
  sudo /bin/bash "$DRIVER_FN" -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false)
  nvidia-smi
--- a/.circleci/scripts/windows_cuda_install.sh
+++ b/.circleci/scripts/windows_cuda_install.sh
@ -11,17 +11,13 @@ case ${CUDA_VERSION} in
        cuda_install_packages="nvcc_10.2 cuobjdump_10.2 nvprune_10.2 cupti_10.2 cublas_10.2 cublas_dev_10.2 cudart_10.2 cufft_10.2 cufft_dev_10.2 curand_10.2 curand_dev_10.2 cusolver_10.2 cusolver_dev_10.2 cusparse_10.2 cusparse_dev_10.2 nvgraph_10.2 nvgraph_dev_10.2 npp_10.2 npp_dev_10.2 nvrtc_10.2 nvrtc_dev_10.2 nvml_dev_10.2"
        ;;
    11.1)
-        cuda_installer_name="cuda_11.1.1_456.81_win10"
+        cuda_installer_name="cuda_11.1.0_456.43_win10"
        cuda_install_packages="nvcc_11.1 cuobjdump_11.1 nvprune_11.1 nvprof_11.1 cupti_11.1 cublas_11.1 cublas_dev_11.1 cudart_11.1 cufft_11.1 cufft_dev_11.1 curand_11.1 curand_dev_11.1 cusolver_11.1 cusolver_dev_11.1 cusparse_11.1 cusparse_dev_11.1 npp_11.1 npp_dev_11.1 nvrtc_11.1 nvrtc_dev_11.1 nvml_dev_11.1"
        ;;
    11.3)
        cuda_installer_name="cuda_11.3.0_465.89_win10"
        cuda_install_packages="thrust_11.3 nvcc_11.3 cuobjdump_11.3 nvprune_11.3 nvprof_11.3 cupti_11.3 cublas_11.3 cublas_dev_11.3 cudart_11.3 cufft_11.3 cufft_dev_11.3 curand_11.3 curand_dev_11.3 cusolver_11.3 cusolver_dev_11.3 cusparse_11.3 cusparse_dev_11.3 npp_11.3 npp_dev_11.3 nvrtc_11.3 nvrtc_dev_11.3 nvml_dev_11.3"
        ;;
-    11.5)
-        cuda_installer_name="cuda_11.5.0_496.13_win10"
-        cuda_install_packages="thrust_11.5 nvcc_11.5 cuobjdump_11.5 nvprune_11.5 nvprof_11.5 cupti_11.5 cublas_11.5 cublas_dev_11.5 cudart_11.5 cufft_11.5 cufft_dev_11.5 curand_11.5 curand_dev_11.5 cusolver_11.5 cusolver_dev_11.5 cusparse_11.5 cusparse_dev_11.5 npp_11.5 npp_dev_11.5 nvrtc_11.5 nvrtc_dev_11.5 nvml_dev_11.5"
-        ;;
    *)
        echo "CUDA_VERSION $CUDA_VERSION is not supported yet"
        exit 1
--- a/.circleci/scripts/windows_cudnn_install.sh
+++ b/.circleci/scripts/windows_cudnn_install.sh
@ -1,26 +1,23 @@
 #!/bin/bash
 set -eux -o pipefail

-
-windows_s3_link="https://ossci-windows.s3.amazonaws.com"
+# This is typically blank but for CUDA 10* it'll be set to 10
+windows_version_qualifier=""

 case ${CUDA_VERSION} in
    10.1)
-        # This is typically blank but for CUDA 10* it'll be set to 10
-        cudnn_file_name="cudnn-${CUDA_VERSION}-windows10-x64-v7.6.4.38"
+        archive_version="v7.6.4.38"
+        windows_version_qualifier="10"
        ;;
    10.2)
-        cudnn_file_name="cudnn-${CUDA_VERSION}-windows10-x64-v7.6.5.32"
+        archive_version="v7.6.5.32"
+        windows_version_qualifier="10"
        ;;
    11.1)
-        cudnn_file_name="cudnn-${CUDA_VERSION}-windows-x64-v8.0.5.39"
+        archive_version="v8.0.5.39"
        ;;
    11.3)
-        cudnn_file_name="cudnn-${CUDA_VERSION}-windows-x64-v8.2.0.53"
-        ;;
-    11.5)
-        # Since cudnn 8.3  the filename have changed
-        cudnn_file_name="cudnn-windows-x86_64-8.3.2.44_cuda${CUDA_VERSION}-archive"
+        archive_version="v8.2.0.53"
        ;;
    *)
        echo "CUDA_VERSION: ${CUDA_VERSION} not supported yet"
@ -29,7 +26,7 @@ case ${CUDA_VERSION} in
 esac

 cudnn_installer_name="cudnn_installer.zip"
-cudnn_installer_link="${windows_s3_link}/${cudnn_file_name}.zip"
+cudnn_installer_link="https://ossci-windows.s3.amazonaws.com/cudnn-${CUDA_VERSION}-windows${windows_version_qualifier}-x64-${archive_version}.zip"
 cudnn_install_folder="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${CUDA_VERSION}/"

 if [[ -f "${cudnn_install_folder}/include/cudnn.h" ]]; then
@ -44,11 +41,6 @@ else
        # Remove all of the directories before attempting to copy files
        rm -rf "${cudnn_install_folder:?}/*"
        cp -rf cudnn/cuda/* "${cudnn_install_folder}"
-
-        #Make sure windows path contains zlib dll
-        curl -k -L "${windows_s3_link}/zlib123dllx64.zip" --output "${tmp_dir}\zlib123dllx64.zip"
-        7z x "${tmp_dir}\zlib123dllx64.zip" -o"${tmp_dir}\zlib"
-        xcopy /Y "${tmp_dir}\zlib\dll_x64\*.dll" "C:\Windows\System32"
    )
    rm -rf "${tmp_dir}"
 fi
--- a/.circleci/verbatim-sources/build-parameters/binary-build-params.yml
+++ b/.circleci/verbatim-sources/build-parameters/binary-build-params.yml
@ -62,4 +62,5 @@ binary_windows_params: &binary_windows_params
      default: "windows-xlarge-cpu-with-nvidia-cuda"
  environment:
    BUILD_ENVIRONMENT: << parameters.build_environment >>
+    BUILD_FOR_SYSTEM: windows
    JOB_EXECUTOR: <<parameters.executor>>
--- a/.circleci/verbatim-sources/build-parameters/pytorch-build-params.yml
+++ b/.circleci/verbatim-sources/build-parameters/pytorch-build-params.yml
@ -26,6 +26,24 @@ pytorch_params: &pytorch_params
    CI_MASTER: << pipeline.parameters.run_master_build >>
  resource_class: << parameters.resource_class >>

+pytorch_android_params: &pytorch_android_params
+  parameters:
+    build_environment:
+      type: string
+      default: ""
+    op_list:
+      type: string
+      default: ""
+    lite_interpreter:
+      type: string
+      default: "1"
+  environment:
+    BUILD_ENVIRONMENT: pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single
+    DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c"
+    PYTHON_VERSION: "3.6"
+    SELECTED_OP_LIST: << parameters.op_list >>
+    BUILD_LITE_INTERPRETER: << parameters.lite_interpreter >>
+
 pytorch_ios_params: &pytorch_ios_params
  parameters:
    build_environment:
--- a/.circleci/verbatim-sources/job-specs/binary-job-specs.yml
+++ b/.circleci/verbatim-sources/job-specs/binary-job-specs.yml
@ -161,7 +161,6 @@
    <<: *binary_mac_params
    macos:
      xcode: "12.0"
-      resource_class: "large"
    steps:
    # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml
    - checkout
--- a/.circleci/verbatim-sources/job-specs/docker_jobs.yml
+++ b/.circleci/verbatim-sources/job-specs/docker_jobs.yml
@ -54,3 +54,61 @@
              export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_DOCKER_BUILDER_V1}
              set -x
              cd .circleci/docker && ./build_docker.sh
+  docker_for_ecr_gc_build_job:
+      machine:
+        image: ubuntu-2004:202104-01
+      steps:
+        - checkout
+        - run:
+            name: build_docker_image_for_ecr_gc
+            no_output_timeout: "1h"
+            command: |
+              cd .circleci/ecr_gc_docker
+              docker build . -t 308535385114.dkr.ecr.us-east-1.amazonaws.com/gc/ecr
+              set +x
+              export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_DOCKER_BUILDER_V1}
+              export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_DOCKER_BUILDER_V1}
+              export AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
+              export AWS_REGION=us-east-1
+              aws ecr get-login-password --region $AWS_REGION|docker login --username AWS \
+                       --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com
+              set -x
+              docker push $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/gc/ecr
+  ecr_gc_job:
+      parameters:
+        project:
+          type: string
+          default: "pytorch"
+        tags_to_keep:  # comma separate values
+          type: string
+      environment:
+        PROJECT: << parameters.project >>
+        # TODO: Remove legacy image tags once we feel comfortable with new docker image tags
+        IMAGE_TAG: << parameters.tags_to_keep >>
+      docker:
+        - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/gc/ecr
+          aws_auth:
+            aws_access_key_id: ${CIRCLECI_AWS_ACCESS_KEY_FOR_DOCKER_BUILDER_V1}
+            aws_secret_access_key: ${CIRCLECI_AWS_SECRET_KEY_FOR_DOCKER_BUILDER_V1}
+
+      steps:
+        - checkout
+        - run:
+            # NOTE: see 'docker_build_job' for how these tags actually get built
+            name: dynamically generate tags to keep
+            no_output_timeout: "1h"
+            command: |
+              GENERATED_IMAGE_TAG=$(\
+                git log --oneline --pretty='%H' .circleci/docker \
+                  | xargs -I '{}' git rev-parse '{}:.circleci/docker' \
+                  | paste -sd "," -)
+              echo "export GENERATED_IMAGE_TAG='${GENERATED_IMAGE_TAG}'" >> ${BASH_ENV}
+        - run:
+            name: garbage collecting for ecr images
+            no_output_timeout: "1h"
+            command: |
+              set +x
+              export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_DOCKER_BUILDER_V1}
+              export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_DOCKER_BUILDER_V1}
+              set -x
+              /usr/bin/gc.py --filter-prefix ${PROJECT}  --ignore-tags "${IMAGE_TAG},${GENERATED_IMAGE_TAG}"
--- a/.circleci/verbatim-sources/job-specs/job-specs-custom.yml
+++ b/.circleci/verbatim-sources/job-specs/job-specs-custom.yml
@ -27,7 +27,7 @@
  pytorch_python_doc_build:
    environment:
      BUILD_ENVIRONMENT: pytorch-python-doc-push
-      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.7-gcc5.4"
+      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4"
    resource_class: large
    machine:
      image: ubuntu-2004:202104-01
@ -43,8 +43,8 @@
          set -ex
          export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}
          echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
-          # turn v1.12.0rc3 into 1.12
-          tag=$(echo $CIRCLE_TAG | sed -e 's/v*\([0-9]*\.[0-9]*\).*/\1/')
+          # turn v1.12.0rc3 into 1.12.0
+          tag=$(echo $CIRCLE_TAG | sed -e 's/v*\([0-9.]*\).*/\1/')
          target=${tag:-master}
          echo "building for ${target}"
          time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null
@ -73,7 +73,7 @@
  pytorch_cpp_doc_build:
    environment:
      BUILD_ENVIRONMENT: pytorch-cpp-doc-push
-      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.7-gcc5.4"
+      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4"
    resource_class: large
    machine:
      image: ubuntu-2004:202104-01
@ -89,8 +89,9 @@
          set -ex
          export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}
          echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
-          # turn v1.12.0rc3 into 1.12
-          tag=$(echo $CIRCLE_TAG | sed -e 's/v*\([0-9]*\.[0-9]*\).*/\1/')
+          # turn v1.12.0rc3 into 1.12.0
+          tag=$(echo $CIRCLE_TAG | sed -e 's/v*\([0-9.]*\).*/\1/')
+          tag=${CIRCLE_TAG:1:5}
          target=${tag:-master}
          echo "building for ${target}"
          time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null
@ -212,7 +213,7 @@
          command: |
            set -ex
            source /Users/distiller/workspace/miniconda3/bin/activate
-            python3 -m pip install boto3==1.19.12
+            pip install boto3

            export IN_CI=1
            export JOB_BASE_NAME=$CIRCLE_JOB
@ -252,7 +253,7 @@
    environment:
      BUILD_ENVIRONMENT: pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-build
      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c"
-      PYTHON_VERSION: "3.7"
+      PYTHON_VERSION: "3.6"
    resource_class: large
    machine:
      image: ubuntu-2004:202104-01
@ -341,7 +342,7 @@
    environment:
      BUILD_ENVIRONMENT: pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-publish-snapshot
      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c"
-      PYTHON_VERSION: "3.7"
+      PYTHON_VERSION: "3.6"
    resource_class: large
    machine:
      image: ubuntu-2004:202104-01
@ -377,7 +378,7 @@
    environment:
      BUILD_ENVIRONMENT: pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-build-only-x86_32
      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c"
-      PYTHON_VERSION: "3.7"
+      PYTHON_VERSION: "3.6"
    resource_class: large
    machine:
      image: ubuntu-2004:202104-01
@ -415,6 +416,43 @@
        path: ~/workspace/build_android_x86_32_artifacts/artifacts.tgz
        destination: artifacts.tgz

+  pytorch_android_gradle_custom_build_single:
+    <<: *pytorch_android_params
+    resource_class: large
+    machine:
+      image: ubuntu-2004:202104-01
+    steps:
+    - checkout
+    - calculate_docker_image_tag
+    - setup_linux_system_environment
+    - checkout
+    - calculate_docker_image_tag
+    - setup_ci_environment
+    - run:
+        name: pytorch android gradle custom build single architecture (for PR)
+        no_output_timeout: "1h"
+        command: |
+          set -e
+          # Unlike other gradle jobs, it's not worth building libtorch in a separate CI job and share via docker, because:
+          # 1) Not shareable: it's custom selective build, which is different from default libtorch mobile build;
+          # 2) Not parallelizable by architecture: it only builds libtorch for one architecture;
+
+          echo "DOCKER_IMAGE: ${DOCKER_IMAGE}:${DOCKER_TAG}"
+          time docker pull ${DOCKER_IMAGE}:${DOCKER_TAG} >/dev/null
+
+          git submodule sync && git submodule update -q --init --recursive --depth 1 --jobs 0
+          VOLUME_MOUNTS="-v /home/circleci/project/:/var/lib/jenkins/workspace"
+          export id=$(docker run --env-file "${BASH_ENV}" ${VOLUME_MOUNTS} --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}:${DOCKER_TAG})
+
+          export COMMAND='((echo "export GRADLE_OFFLINE=1" && echo "export BUILD_LITE_INTERPRETER=${BUILD_LITE_INTERPRETER}" && echo "sudo chown -R jenkins workspace && cd workspace && ./.circleci/scripts/build_android_gradle.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
+
+          # Skip docker push as this job is purely for size analysis purpose.
+          # Result binaries are already in `/home/circleci/project/` as it's mounted instead of copied.
+
+    - upload_binary_size_for_android_build:
+        build_type: custom-build-single
+
  pytorch_ios_build:
    <<: *pytorch_ios_params
    macos:
@ -483,7 +521,6 @@
            echo "IOS_PLATFORM: ${IOS_PLATFORM}"
            echo "USE_PYTORCH_METAL": "${USE_METAL}"
            echo "BUILD_LITE_INTERPRETER": "${BUILD_LITE_INTERPRETER}"
-            echo "USE_COREML_DELEGATE": "${USE_COREML_DELEGATE}"

            #check the custom build flag
            echo "SELECTED_OP_LIST: ${SELECTED_OP_LIST}"
@ -492,7 +529,6 @@
            fi
            export IOS_ARCH=${IOS_ARCH}
            export IOS_PLATFORM=${IOS_PLATFORM}
-            export USE_COREML_DELEGATE=${USE_COREML_DELEGATE}
            if [ ${IOS_PLATFORM} != "SIMULATOR" ]; then
              export USE_PYTORCH_METAL=${USE_METAL}
            fi
@ -532,32 +568,20 @@
            PROJ_ROOT=/Users/distiller/project
            source ~/anaconda/bin/activate
            # use the pytorch nightly build to generate models
-            pip3 install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
+            conda install pytorch torchvision -c pytorch-nightly --yes
            # generate models for differnet backends
            cd ${PROJ_ROOT}/ios/TestApp/benchmark
            mkdir -p ../models
-            if [ ${USE_COREML_DELEGATE} == 1 ]; then
-              pip install coremltools==5.0b5
-              pip install six
-              python coreml_backend.py
-            else
-              python trace_model.py
-            fi
+            python trace_model.py
            if [ ${BUILD_LITE_INTERPRETER} == 1 ]; then
-              echo "Setting up the TestApp for LiteInterpreter"
              ruby setup.rb --lite 1
            else
-              echo "Setting up the TestApp for Full JIT"
              ruby setup.rb
            fi
            cd ${PROJ_ROOT}/ios/TestApp
            instruments -s -devices
            if [ ${BUILD_LITE_INTERPRETER} == 1 ]; then
-              if [ ${USE_COREML_DELEGATE} == 1 ]; then
-                fastlane scan --only_testing TestAppTests/TestAppTests/testCoreML
-              else
-                fastlane scan --only_testing TestAppTests/TestAppTests/testLiteInterpreter
-              fi
+              fastlane scan --only_testing TestAppTests/TestAppTests/testLiteInterpreter
            else
              fastlane scan --only_testing TestAppTests/TestAppTests/testFullJIT
            fi
@ -659,7 +683,7 @@
  pytorch_doc_test:
    environment:
      BUILD_ENVIRONMENT: pytorch-doc-test
-      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.7-gcc5.4"
+      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4"
    resource_class: medium
    machine:
      image: ubuntu-2004:202104-01
--- a/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml
+++ b/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml
@ -148,10 +148,10 @@ jobs:
        command: |
          set -e
          is_vanilla_build() {
-            if [ "${BUILD_ENVIRONMENT}" == "pytorch-linux-bionic-py3.7-clang9-test" ]; then
+            if [ "${BUILD_ENVIRONMENT}" == "pytorch-linux-bionic-py3.6-clang9-test" ]; then
              return 0
            fi
-            if [ "${BUILD_ENVIRONMENT}" == "pytorch-linux-xenial-py3.7-gcc5.4-test" ]; then
+            if [ "${BUILD_ENVIRONMENT}" == "pytorch-linux-xenial-py3.6-gcc5.4-test" ]; then
              return 0
            fi
            return 1
@ -190,6 +190,8 @@ jobs:
          if [[ ${BUILD_ENVIRONMENT} == *"multigpu"* ]]; then
            echo ".jenkins/pytorch/multigpu-test.sh" >> docker_commands.sh
          elif [[ ${BUILD_ENVIRONMENT} == *onnx* ]]; then
+            echo "pip install click mock tabulate networkx==2.0" >> docker_commands.sh
+            echo "pip -q install --user \"file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx\"" >> docker_commands.sh
            echo ".jenkins/caffe2/test.sh" >> docker_commands.sh
          else
            echo ".jenkins/pytorch/test.sh" >> docker_commands.sh
@ -197,6 +199,17 @@ jobs:
          echo "(cat docker_commands.sh | docker exec -u jenkins -i "$id" bash) 2>&1" > command.sh
          unbuffer bash command.sh | ts

+          if [[ ${BUILD_ENVIRONMENT} == *"coverage"* ]]; then
+              echo "Retrieving C++ coverage report"
+              docker cp $id:/var/lib/jenkins/workspace/build/coverage.info ./test
+          fi
+          if [[ ${BUILD_ENVIRONMENT} == *"coverage"* || ${BUILD_ENVIRONMENT} == *"onnx"* ]]; then
+              echo "Retrieving Python coverage report"
+              docker cp $id:/var/lib/jenkins/workspace/test/.coverage ./test
+              docker cp $id:/var/lib/jenkins/workspace/test/coverage.xml ./test
+              python3 -mpip install codecov
+              python3 -mcodecov
+          fi
    - run:
        name: Report results
        no_output_timeout: "5m"
@ -227,3 +240,161 @@ jobs:
        when: always
    - store_test_results:
        path: test-reports
+    - store_artifacts:
+        path: test/.coverage
+    - store_artifacts:
+        path: test/coverage.xml
+
+  pytorch_windows_build:
+    <<: *pytorch_windows_params
+    parameters:
+      executor:
+        type: string
+        default: "windows-xlarge-cpu-with-nvidia-cuda"
+      build_environment:
+        type: string
+        default: ""
+      test_name:
+        type: string
+        default: ""
+      cuda_version:
+        type: string
+        default: "10.1"
+      python_version:
+        type: string
+        default: "3.8"
+      vs_version:
+        type: string
+        default: "16.8.6"
+      vc_version:
+        type: string
+        default: "14.16"
+      vc_year:
+        type: string
+        default: "2019"
+      vc_product:
+        type: string
+        default: "BuildTools"
+      use_cuda:
+        type: string
+        default: ""
+    executor: <<parameters.executor>>
+    steps:
+      - checkout
+      - run:
+          name: Install VS2019 toolchain
+          no_output_timeout: 10m
+          command: |
+              powershell .circleci/scripts/vs_install.ps1
+      - run:
+          name: Install Cuda
+          no_output_timeout: 30m
+          command: |
+            if [[ "${USE_CUDA}" == "1" ]]; then
+              .circleci/scripts/windows_cuda_install.sh
+            fi
+      - run:
+          name: Install Cudnn
+          command : |
+            if [[ "${USE_CUDA}" == "1" ]]; then
+              .circleci/scripts/windows_cudnn_install.sh
+            fi
+      - run:
+          name: Build
+          no_output_timeout: "90m"
+          command: |
+            set -e
+            set +x
+            export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_WIN_BUILD_V1}
+            export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_WIN_BUILD_V1}
+            set -x
+            .jenkins/pytorch/win-build.sh
+      - persist_to_workspace:
+          root: "C:/w"
+          paths: build-results
+      - store_artifacts:
+          path: C:/w/build-results
+
+  pytorch_windows_test:
+    <<: *pytorch_windows_params
+    parameters:
+      executor:
+        type: string
+        default: "windows-medium-cpu-with-nvidia-cuda"
+      build_environment:
+        type: string
+        default: ""
+      test_name:
+        type: string
+        default: ""
+      cuda_version:
+        type: string
+        default: "10.1"
+      python_version:
+        type: string
+        default: "3.8"
+      vs_version:
+        type: string
+        default: "16.8.6"
+      vc_version:
+        type: string
+        default: "14.16"
+      vc_year:
+        type: string
+        default: "2019"
+      vc_product:
+        type: string
+        default: "BuildTools"
+      use_cuda:
+        type: string
+        default: ""
+    executor: <<parameters.executor>>
+    steps:
+      - checkout
+      - attach_workspace:
+          at: c:/users/circleci/workspace
+      - run:
+          name: Install VS2019 toolchain
+          no_output_timeout: 10m
+          command: |
+              powershell .circleci/scripts/vs_install.ps1
+      - run:
+          name: Install Cuda
+          no_output_timeout: 30m
+          command: |
+            if [[ "${CUDA_VERSION}" != "cpu" ]]; then
+              if [[ "${CUDA_VERSION}" != "10" || "${JOB_EXECUTOR}" != "windows-with-nvidia-gpu" ]]; then
+                .circleci/scripts/windows_cuda_install.sh
+              fi
+            fi
+      - run:
+          name: Install Cudnn
+          command : |
+            if [[ "${CUDA_VERSION}" != "cpu" ]]; then
+              .circleci/scripts/windows_cudnn_install.sh
+            fi
+      - run:
+          name: Test
+          no_output_timeout: "30m"
+          command: |
+            set -e
+            export IN_CI=1
+            set +x
+            export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_WIN_BUILD_V1}
+            export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_WIN_BUILD_V1}
+            set -x
+            .jenkins/pytorch/win-test.sh
+      - run:
+          name: Report results
+          no_output_timeout: "5m"
+          command: |
+            set -ex
+            export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_WIN_BUILD_V1}
+            export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_WIN_BUILD_V1}
+            pip install typing_extensions boto3
+            python -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
+          when: always
+      - store_test_results:
+          path: test/test-reports
+      - store_artifacts:
+          path: test/coverage.xml
--- a/.circleci/verbatim-sources/nightly-binary-build-defaults.yml
+++ b/.circleci/verbatim-sources/nightly-binary-build-defaults.yml
@ -26,7 +26,6 @@
 # (smoke tests and upload jobs do not need the pytorch repo).
 binary_checkout: &binary_checkout
  name: Checkout pytorch/builder repo
-  no_output_timeout: "30m"
  command: .circleci/scripts/binary_checkout.sh

 # Parses circleci arguments in a consistent way, essentially routing to the
--- a/.circleci/verbatim-sources/workflows/workflows-ecr-gc.yml
+++ b/.circleci/verbatim-sources/workflows/workflows-ecr-gc.yml
@ -0,0 +1,34 @@
+  ecr_gc:
+    triggers:
+      - schedule:
+          cron: "45 * * * *"
+          filters:
+            branches:
+              only:
+                - master
+    jobs:
+      - docker_for_ecr_gc_build_job
+      - ecr_gc_job:
+            name: ecr_gc_job_for_pytorch
+            project: pytorch
+            tags_to_keep: "271,262,256,278,282,291,300,323,327,347,389,401,402,403,405,a8006f9a-272d-4478-b137-d121c6f05c83,6e7b11da-a919-49e5-b2ba-da66e3d4bb0a,f990c76a-a798-42bb-852f-5be5006f8026,e43973a9-9d5a-4138-9181-a08a0fc55e2f,8fcf46ef-4a34-480b-a8ee-b0a30a4d3e59,9a3986fa-7ce7-4a36-a001-3c9bef9892e2,1bc00f11-e0f3-4e5c-859f-15937dd938cd,209062ef-ab58-422a-b295-36c4eed6e906,be76e8fd-44e2-484d-b090-07e0cc3a56f0,fff7795428560442086f7b2bb6004b65245dc11a,ab1632df-fa59-40e6-8c23-98e004f61148"
+            requires:
+              - docker_for_ecr_gc_build_job
+      - ecr_gc_job:
+            name: ecr_gc_job_for_caffe2
+            project: caffe2
+            tags_to_keep: "376,373,369,348,345,336,325,324,315,306,301,287,283,276,273,266,253,248,238,230,213"
+            requires:
+              - docker_for_ecr_gc_build_job
+      - ecr_gc_job:
+            name: ecr_gc_job_for_translate
+            project: translate
+            tags_to_keep: "8"
+            requires:
+              - docker_for_ecr_gc_build_job
+      - ecr_gc_job:
+            name: ecr_gc_job_for_tensorcomp
+            project: tensorcomp
+            tags_to_keep: "34"
+            requires:
+              - docker_for_ecr_gc_build_job
--- a/.circleci/verbatim-sources/workflows/workflows-scheduled-ci.yml
+++ b/.circleci/verbatim-sources/workflows/workflows-scheduled-ci.yml
@ -0,0 +1,37 @@
+  # the following clones pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7's tests but enables
+  # slow tests and sets an environment variable so gradcheck runs with fast_mode=False
+  slow-gradcheck-scheduled-ci:
+    triggers:
+      - schedule:
+          # runs every 8 hours on the 45th minute
+          cron: "45 0,8,16 * * *"
+          filters:
+            branches:
+              only:
+                - master
+    jobs:
+      - docker_build_job:
+          name: "docker-pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
+          image_name: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
+      - pytorch_linux_build:
+          name: periodic_pytorch_xenial_cuda10_2_cudnn7_gcc7_build
+          requires:
+            - "docker-pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
+          build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-build"
+          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
+      - pytorch_linux_test:
+          name: periodic_pytorch_xenial_cuda10_2_cudnn7_gcc7_old_gradcheck_test1
+          requires:
+            - periodic_pytorch_xenial_cuda10_2_cudnn7_gcc7_build
+          build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-old-gradcheck-test1"
+          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
+          use_cuda_docker_runtime: "1"
+          resource_class: gpu.medium
+      - pytorch_linux_test:
+          name: periodic_pytorch_xenial_cuda10_2_cudnn7_gcc7_old_gradcheck_test2
+          requires:
+            - periodic_pytorch_xenial_cuda10_2_cudnn7_gcc7_build
+          build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-old-gradcheck-test2"
+          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
+          use_cuda_docker_runtime: "1"
+          resource_class: gpu.medium
--- a/.clang-tidy
+++ b/.clang-tidy
@ -33,12 +33,11 @@ modernize-*,
 -modernize-use-default-member-init,
 -modernize-use-using,
 -modernize-use-trailing-return-type,
-modernize-use-nodiscard,
 performance-*,
 -performance-noexcept-move-constructor,
 -performance-unnecessary-value-param,
 '
-HeaderFilterRegex: 'torch/csrc/(?!deploy/interpreter/cpython).*'
+HeaderFilterRegex: 'torch/csrc/.*'
 AnalyzeTemporaryDtors: false
 WarningsAsErrors: '*'
 CheckOptions:
--- a/.flake8
+++ b/.flake8
@ -16,6 +16,7 @@ per-file-ignores = __init__.py: F401 torch/utils/cpp_extension.py: B950
 optional-ascii-coding = True
 exclude =
    ./.git,
+    ./build_code_analyzer,
    ./build_test_custom_build,
    ./build,
    ./caffe2,
--- a/.github/ISSUE_TEMPLATE/bug-report.md
+++ b/.github/ISSUE_TEMPLATE/bug-report.md
@ -0,0 +1,49 @@
+---
+name: "\U0001F41B Bug Report"
+about: Submit a bug report to help us improve PyTorch
+
+---
+
+## 🐛 Bug
+
+<!-- A clear and concise description of what the bug is. -->
+
+## To Reproduce
+
+Steps to reproduce the behavior:
+
+1.
+1.
+1.
+
+<!-- If you have a code sample, error messages, stack traces, please provide it here as well -->
+
+## Expected behavior
+
+<!-- A clear and concise description of what you expected to happen. -->
+
+## Environment
+
+Please copy and paste the output from our
+[environment collection script](https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py)
+(or fill out the checklist below manually).
+
+You can get the script and run it with:
+```
+wget https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py
+# For security purposes, please check the contents of collect_env.py before running it.
+python collect_env.py
+```
+
+ - PyTorch Version (e.g., 1.0):
+ - OS (e.g., Linux):
+ - How you installed PyTorch (`conda`, `pip`, source):
+ - Build command you used (if compiling from source):
+ - Python version:
+ - CUDA/cuDNN version:
+ - GPU models and configuration:
+ - Any other relevant information:
+
+## Additional context
+
+<!-- Add any other context about the problem here. -->
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@ -1,56 +0,0 @@
-name: 🐛 Bug Report
-description: Create a report to help us reproduce and fix the bug
-
-body:
- type: markdown
-  attributes:
-    value: >
-      #### Before submitting a bug, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/pytorch/pytorch/issues?q=is%3Aissue+sort%3Acreated-desc+).
- type: textarea
-  attributes:
-    label: 🐛 Describe the bug
-    description: |
-      Please provide a clear and concise description of what the bug is.
-
-      If relevant, add a minimal example so that we can reproduce the error by running the code. It is very important for the snippet to be as succinct (minimal) as possible, so please take time to trim down any irrelevant code to help us debug efficiently. We are going to copy-paste your code and we expect to get the same result as you did: avoid any external data, and include the relevant imports, etc. For example:
-
-      ```python
-      # All necessary imports at the beginning
-      import torch
-
-      # A succinct reproducing example trimmed down to the essential parts:
-      t = torch.rand(5, 10)  # Note: the bug is here, we should pass requires_grad=True
-      t.sum().backward()
-      ```
-
-      If the code is too long (hopefully, it isn't), feel free to put it in a public gist and link it in the issue: https://gist.github.com.
-
-      Please also paste or describe the results you observe instead of the expected results. If you observe an error, please paste the error message including the **full** traceback of the exception. It may be relevant to wrap error messages in ```` ```triple quotes blocks``` ````.
-    placeholder: |
-      A clear and concise description of what the bug is.
-
-      ```python
-      # Sample code to reproduce the problem
-      ```
-
-      ```
-      The error message you got, with the full traceback.
-      ```
-  validations:
-    required: true
- type: textarea
-  attributes:
-    label: Versions
-    description: |
-      Please run the following and paste the output below.
-      ```sh
-      wget https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py
-      # For security purposes, please check the contents of collect_env.py before running it.
-      python collect_env.py
-      ```
-  validations:
-    required: true
- type: markdown
-  attributes:
-    value: >
-      Thanks for contributing 🎉!
--- a/.github/ISSUE_TEMPLATE/ci-sev.md
+++ b/.github/ISSUE_TEMPLATE/ci-sev.md
@ -1,39 +0,0 @@
---
-name: "⚠️CI SEV"
-about: Tracking incidents for PyTorch's CI infra.
---
-
-> NOTE: Remember to label this issue with "`ci: sev`"
-
-## Current Status
-*Status could be: preemptive, ongoing, mitigated, closed. Also tell people if they need to take action to fix it (i.e. rebase)*.
-
-## Error looks like
-*Provide some way users can tell that this SEV is causing their issue.*
-
-## Incident timeline (all times pacific)
-*Include when the incident began, when it was detected, mitigated, root caused, and finally closed.*
-
-<details>
-<summary> Click for example </summary>
-
-e.g.
- 10/30 7:27a incident began
- 10/30 8:30a detected by <method>
- 10/30 9:00 pm root caused as…
- 10/30 9:10 pm mitigated by…
- 10/31 10: am closed by…
-
-</details>
-
-## User impact
-*How does this affect users of PyTorch CI?*
-
-## Root cause
-*What was the root cause of this issue?*
-
-## Mitigation
-*How did we mitigate the issue?*
-
-## Prevention/followups
-*How do we prevent issues like this in the future?*
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@ -1,5 +0,0 @@
-blank_issues_enabled: true
-contact_links:
-  - name: Questions
-    url: https://discuss.pytorch.org/
-    about: Ask questions and discuss with other pytorch community members
--- a/.github/ISSUE_TEMPLATE/documentation.md
+++ b/.github/ISSUE_TEMPLATE/documentation.md
@ -0,0 +1,9 @@
+---
+name: "\U0001F4DA Documentation"
+about: Report an issue related to https://pytorch.org/docs
+
+---
+
+## 📚 Documentation
+
+<!-- A clear and concise description of what content in https://pytorch.org/docs is an issue. If this has to do with the general https://pytorch.org website, please file an issue at https://github.com/pytorch/pytorch.github.io/issues/new/choose instead. If this has to do with https://pytorch.org/tutorials, please file an issue at https://github.com/pytorch/tutorials/issues/new -->
--- a/.github/ISSUE_TEMPLATE/documentation.yml
+++ b/.github/ISSUE_TEMPLATE/documentation.yml
@ -1,20 +0,0 @@
-name: 📚 Documentation
-description: Report an issue related to https://pytorch.org/docs/stable/index.html
-
-body:
- type: textarea
-  attributes:
-    label: 📚 The doc issue
-    description: >
-      A clear and concise description of what content in https://pytorch.org/docs/stable/index.html is an issue. If this has to do with the general https://pytorch.org website, please file an issue at https://github.com/pytorch/pytorch.github.io/issues/new/choose instead. If this has to do with https://pytorch.org/tutorials, please file an issue at https://github.com/pytorch/tutorials/issues/new.
-  validations:
-    required: true
- type: textarea
-  attributes:
-    label: Suggest a potential alternative/fix
-    description: >
-      Tell us how we could improve the documentation in this regard.
- type: markdown
-  attributes:
-    value: >
-      Thanks for contributing 🎉!
--- a/.github/ISSUE_TEMPLATE/feature-request.md
+++ b/.github/ISSUE_TEMPLATE/feature-request.md
@ -0,0 +1,24 @@
+---
+name: "\U0001F680 Feature Request"
+about: Submit a proposal/request for a new PyTorch feature
+
+---
+
+## 🚀 Feature
+<!-- A clear and concise description of the feature proposal -->
+
+## Motivation
+
+<!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too -->
+
+## Pitch
+
+<!-- A clear and concise description of what you want to happen. -->
+
+## Alternatives
+
+<!-- A clear and concise description of any alternative solutions or features you've considered, if any. -->
+
+## Additional context
+
+<!-- Add any other context or screenshots about the feature request here. -->
--- a/.github/ISSUE_TEMPLATE/feature-request.yml
+++ b/.github/ISSUE_TEMPLATE/feature-request.yml
@ -1,25 +0,0 @@
-name: 🚀 Feature request
-description: Submit a proposal/request for a new pytorch feature
-
-body:
- type: textarea
-  attributes:
-    label: 🚀 The feature, motivation and pitch
-    description: >
-      A clear and concise description of the feature proposal. Please outline the motivation for the proposal. Is your feature request related to a specific problem? e.g., *"I'm working on X and would like Y to be possible"*. If this is related to another GitHub issue, please link here too.
-  validations:
-    required: true
- type: textarea
-  attributes:
-    label: Alternatives
-    description: >
-      A description of any alternative solutions or features you've considered, if any.
- type: textarea
-  attributes:
-    label: Additional context
-    description: >
-      Add any other context or screenshots about the feature request.
- type: markdown
-  attributes:
-    value: >
-      Thanks for contributing 🎉!
--- a/.github/ISSUE_TEMPLATE/questions-help-support.md
+++ b/.github/ISSUE_TEMPLATE/questions-help-support.md
@ -0,0 +1,13 @@
+---
+name: "❓Questions/Help/Support"
+about: Do you need support? We have resources.
+
+---
+
+## ❓ Questions and Help
+
+### Please note that this issue tracker is not a help form and this issue will be closed.
+
+We have a set of [listed resources available on the website](https://pytorch.org/resources). Our primary means of support is our discussion forum:
+
+- [Discussion Forum](https://discuss.pytorch.org/)
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@ -1 +1 @@
-Fixes #ISSUE_NUMBER
+Fixes #{issue number}
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@ -1,9 +1,6 @@
 self-hosted-runner:
  labels:
-    - linux.large
    - linux.2xlarge
-    - linux.4xlarge
-    - linux.4xlarge.nvidia.gpu
    - linux.8xlarge.nvidia.gpu
    - linux.16xlarge.nvidia.gpu
    - windows.4xlarge
--- a/.github/generated-ciflow-ruleset.json
+++ b/.github/generated-ciflow-ruleset.json
@ -2,288 +2,100 @@
  "__comment": "@generated DO NOT EDIT MANUALLY, Generation script: .github/scripts/generate_ci_workflows.py",
  "label_rules": {
    "ciflow/all": [
-      "caffe2-linux-xenial-py3.7-gcc5.4",
-      "docker-builds",
-      "ios-12-5-1-arm64",
-      "ios-12-5-1-arm64-coreml",
-      "ios-12-5-1-arm64-custom-ops",
-      "ios-12-5-1-arm64-full-jit",
-      "ios-12-5-1-arm64-metal",
-      "ios-12-5-1-x86-64",
-      "ios-12-5-1-x86-64-coreml",
-      "ios-12-5-1-x86-64-full-jit",
-      "libtorch-linux-xenial-cuda10.2-py3.7-gcc7",
-      "libtorch-linux-xenial-cuda11.3-py3.7-gcc7",
+      "libtorch-linux-xenial-cuda10.2-py3.6-gcc7",
+      "libtorch-linux-xenial-cuda11.3-py3.6-gcc7",
      "linux-bionic-cuda10.2-py3.9-gcc7",
-      "linux-bionic-py3.7-clang9",
-      "linux-bionic-rocm4.5-py3.7",
-      "linux-docs",
-      "linux-docs-push",
-      "linux-vulkan-bionic-py3.7-clang9",
-      "linux-xenial-cuda11.3-py3.7-gcc7",
-      "linux-xenial-cuda11.3-py3.7-gcc7-bazel-test",
-      "linux-xenial-cuda11.3-py3.7-gcc7-no-ops",
-      "linux-xenial-py3-clang5-mobile-build",
-      "linux-xenial-py3-clang5-mobile-custom-build-static",
-      "linux-xenial-py3.7-clang7-asan",
-      "linux-xenial-py3.7-clang7-onnx",
-      "linux-xenial-py3.7-gcc5.4",
-      "linux-xenial-py3.7-gcc7",
-      "linux-xenial-py3.7-gcc7-no-ops",
-      "macos-10-15-py3-arm64",
-      "macos-10-15-py3-lite-interpreter-x86-64",
-      "macos-11-py3-x86-64",
-      "parallelnative-linux-xenial-py3.7-gcc5.4",
-      "periodic-libtorch-linux-bionic-cuda11.5-py3.7-gcc7",
-      "periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7",
-      "periodic-linux-bionic-cuda11.5-py3.7-gcc7",
-      "periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck",
-      "periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug",
+      "linux-bionic-py3.6-clang9",
+      "linux-bionic-py3.8-gcc9-coverage",
+      "linux-xenial-cuda10.2-py3.6-gcc7",
+      "linux-xenial-cuda11.3-py3.6-gcc7",
+      "linux-xenial-py3.6-gcc5.4",
+      "linux-xenial-py3.6-gcc7-bazel-test",
+      "parallelnative-linux-xenial-py3.6-gcc5.4",
+      "periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7",
+      "periodic-linux-xenial-cuda11.1-py3.6-gcc7",
      "periodic-win-vs2019-cuda11.1-py3",
-      "periodic-win-vs2019-cuda11.5-py3",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-build",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit",
-      "pytorch-xla-linux-bionic-py3.7-clang8",
+      "puretorch-linux-xenial-py3.6-gcc5.4",
      "win-vs2019-cpu-py3",
+      "win-vs2019-cuda10.2-py3",
      "win-vs2019-cuda11.3-py3"
    ],
-    "ciflow/android": [
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-build",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit"
-    ],
    "ciflow/bazel": [
-      "linux-xenial-cuda11.3-py3.7-gcc7-bazel-test"
+      "linux-xenial-py3.6-gcc7-bazel-test"
    ],
-    "ciflow/binaries": [
-      "linux-binary-conda",
-      "linux-binary-libtorch-cxx11-abi",
-      "linux-binary-libtorch-pre-cxx11",
-      "linux-binary-manywheel",
-      "windows-binary-libtorch-debug",
-      "windows-binary-libtorch-release",
-      "windows-binary-wheel"
-    ],
-    "ciflow/binaries_conda": [
-      "linux-binary-conda"
-    ],
-    "ciflow/binaries_libtorch": [
-      "linux-binary-libtorch-cxx11-abi",
-      "linux-binary-libtorch-pre-cxx11",
-      "windows-binary-libtorch-debug",
-      "windows-binary-libtorch-release"
-    ],
-    "ciflow/binaries_wheel": [
-      "linux-binary-manywheel",
-      "windows-binary-wheel"
+    "ciflow/coverage": [
+      "linux-bionic-py3.8-gcc9-coverage"
    ],
    "ciflow/cpu": [
-      "caffe2-linux-xenial-py3.7-gcc5.4",
-      "linux-bionic-py3.7-clang9",
-      "linux-docs",
-      "linux-docs-push",
-      "linux-vulkan-bionic-py3.7-clang9",
-      "linux-xenial-cuda11.3-py3.7-gcc7-bazel-test",
-      "linux-xenial-py3.7-clang7-asan",
-      "linux-xenial-py3.7-clang7-onnx",
-      "linux-xenial-py3.7-gcc5.4",
-      "linux-xenial-py3.7-gcc7",
-      "linux-xenial-py3.7-gcc7-no-ops",
-      "parallelnative-linux-xenial-py3.7-gcc5.4",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-build",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit",
-      "pytorch-xla-linux-bionic-py3.7-clang8",
+      "linux-bionic-py3.6-clang9",
+      "linux-bionic-py3.8-gcc9-coverage",
+      "linux-xenial-py3.6-gcc5.4",
+      "linux-xenial-py3.6-gcc7-bazel-test",
+      "parallelnative-linux-xenial-py3.6-gcc5.4",
+      "puretorch-linux-xenial-py3.6-gcc5.4",
      "win-vs2019-cpu-py3"
    ],
    "ciflow/cuda": [
-      "libtorch-linux-xenial-cuda10.2-py3.7-gcc7",
-      "libtorch-linux-xenial-cuda11.3-py3.7-gcc7",
+      "libtorch-linux-xenial-cuda10.2-py3.6-gcc7",
+      "libtorch-linux-xenial-cuda11.3-py3.6-gcc7",
      "linux-bionic-cuda10.2-py3.9-gcc7",
-      "linux-xenial-cuda11.3-py3.7-gcc7",
-      "linux-xenial-cuda11.3-py3.7-gcc7-no-ops",
-      "periodic-libtorch-linux-bionic-cuda11.5-py3.7-gcc7",
-      "periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7",
-      "periodic-linux-bionic-cuda11.5-py3.7-gcc7",
-      "periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck",
-      "periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug",
+      "linux-xenial-cuda10.2-py3.6-gcc7",
+      "linux-xenial-cuda11.3-py3.6-gcc7",
+      "periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7",
+      "periodic-linux-xenial-cuda11.1-py3.6-gcc7",
      "periodic-win-vs2019-cuda11.1-py3",
-      "periodic-win-vs2019-cuda11.5-py3",
+      "win-vs2019-cuda10.2-py3",
      "win-vs2019-cuda11.3-py3"
    ],
    "ciflow/default": [
-      "linux-binary-conda",
-      "linux-binary-libtorch-cxx11-abi",
-      "linux-binary-libtorch-pre-cxx11",
-      "linux-binary-manywheel",
-      "linux-bionic-py3.7-clang9",
-      "linux-bionic-rocm4.5-py3.7",
-      "linux-docs",
-      "linux-vulkan-bionic-py3.7-clang9",
-      "linux-xenial-cuda11.3-py3.7-gcc7",
-      "linux-xenial-cuda11.3-py3.7-gcc7-bazel-test",
-      "linux-xenial-py3-clang5-mobile-build",
-      "linux-xenial-py3-clang5-mobile-custom-build-static",
-      "linux-xenial-py3.7-clang7-asan",
-      "linux-xenial-py3.7-clang7-onnx",
-      "linux-xenial-py3.7-gcc5.4",
-      "linux-xenial-py3.7-gcc7",
-      "linux-xenial-py3.7-gcc7-no-ops",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit",
+      "linux-bionic-py3.6-clang9",
+      "linux-bionic-py3.8-gcc9-coverage",
+      "linux-xenial-cuda11.3-py3.6-gcc7",
+      "linux-xenial-py3.6-gcc5.4",
+      "linux-xenial-py3.6-gcc7-bazel-test",
      "win-vs2019-cpu-py3",
-      "win-vs2019-cuda11.3-py3",
-      "windows-binary-libtorch-debug",
-      "windows-binary-libtorch-release",
-      "windows-binary-wheel"
-    ],
-    "ciflow/docs": [
-      "linux-docs"
-    ],
-    "ciflow/ios": [
-      "ios-12-5-1-arm64",
-      "ios-12-5-1-arm64-coreml",
-      "ios-12-5-1-arm64-custom-ops",
-      "ios-12-5-1-arm64-full-jit",
-      "ios-12-5-1-arm64-metal",
-      "ios-12-5-1-x86-64",
-      "ios-12-5-1-x86-64-coreml",
-      "ios-12-5-1-x86-64-full-jit"
+      "win-vs2019-cuda11.3-py3"
    ],
    "ciflow/libtorch": [
-      "libtorch-linux-xenial-cuda10.2-py3.7-gcc7",
-      "libtorch-linux-xenial-cuda11.3-py3.7-gcc7",
-      "periodic-libtorch-linux-bionic-cuda11.5-py3.7-gcc7",
-      "periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7"
+      "libtorch-linux-xenial-cuda10.2-py3.6-gcc7",
+      "libtorch-linux-xenial-cuda11.3-py3.6-gcc7",
+      "periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7"
    ],
    "ciflow/linux": [
-      "caffe2-linux-xenial-py3.7-gcc5.4",
-      "libtorch-linux-xenial-cuda10.2-py3.7-gcc7",
-      "libtorch-linux-xenial-cuda11.3-py3.7-gcc7",
+      "libtorch-linux-xenial-cuda10.2-py3.6-gcc7",
+      "libtorch-linux-xenial-cuda11.3-py3.6-gcc7",
      "linux-bionic-cuda10.2-py3.9-gcc7",
-      "linux-bionic-py3.7-clang9",
-      "linux-bionic-rocm4.5-py3.7",
-      "linux-docs",
-      "linux-docs-push",
-      "linux-vulkan-bionic-py3.7-clang9",
-      "linux-xenial-cuda11.3-py3.7-gcc7",
-      "linux-xenial-cuda11.3-py3.7-gcc7-bazel-test",
-      "linux-xenial-cuda11.3-py3.7-gcc7-no-ops",
-      "linux-xenial-py3-clang5-mobile-build",
-      "linux-xenial-py3-clang5-mobile-custom-build-static",
-      "linux-xenial-py3.7-clang7-asan",
-      "linux-xenial-py3.7-clang7-onnx",
-      "linux-xenial-py3.7-gcc5.4",
-      "linux-xenial-py3.7-gcc7",
-      "linux-xenial-py3.7-gcc7-no-ops",
-      "parallelnative-linux-xenial-py3.7-gcc5.4",
-      "periodic-libtorch-linux-bionic-cuda11.5-py3.7-gcc7",
-      "periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7",
-      "periodic-linux-bionic-cuda11.5-py3.7-gcc7",
-      "periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck",
-      "periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-build",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit",
-      "pytorch-xla-linux-bionic-py3.7-clang8"
-    ],
-    "ciflow/macos": [
-      "ios-12-5-1-arm64",
-      "ios-12-5-1-arm64-coreml",
-      "ios-12-5-1-arm64-custom-ops",
-      "ios-12-5-1-arm64-full-jit",
-      "ios-12-5-1-arm64-metal",
-      "ios-12-5-1-x86-64",
-      "ios-12-5-1-x86-64-coreml",
-      "ios-12-5-1-x86-64-full-jit",
-      "macos-10-15-py3-arm64",
-      "macos-10-15-py3-lite-interpreter-x86-64",
-      "macos-11-py3-x86-64"
-    ],
-    "ciflow/mobile": [
-      "linux-xenial-py3-clang5-mobile-build",
-      "linux-xenial-py3-clang5-mobile-custom-build-static"
+      "linux-bionic-py3.6-clang9",
+      "linux-bionic-py3.8-gcc9-coverage",
+      "linux-xenial-cuda10.2-py3.6-gcc7",
+      "linux-xenial-cuda11.3-py3.6-gcc7",
+      "linux-xenial-py3.6-gcc5.4",
+      "linux-xenial-py3.6-gcc7-bazel-test",
+      "parallelnative-linux-xenial-py3.6-gcc5.4",
+      "periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7",
+      "periodic-linux-xenial-cuda11.1-py3.6-gcc7",
+      "puretorch-linux-xenial-py3.6-gcc5.4"
    ],
    "ciflow/noarch": [
-      "linux-bionic-py3.7-clang9"
-    ],
-    "ciflow/onnx": [
-      "linux-xenial-py3.7-clang7-onnx"
-    ],
-    "ciflow/rocm": [
-      "linux-bionic-rocm4.5-py3.7"
-    ],
-    "ciflow/sanitizers": [
-      "linux-xenial-py3.7-clang7-asan"
+      "linux-bionic-py3.6-clang9"
    ],
    "ciflow/scheduled": [
-      "linux-docs-push",
-      "periodic-libtorch-linux-bionic-cuda11.5-py3.7-gcc7",
-      "periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7",
-      "periodic-linux-bionic-cuda11.5-py3.7-gcc7",
-      "periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck",
-      "periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug",
-      "periodic-win-vs2019-cuda11.1-py3",
-      "periodic-win-vs2019-cuda11.5-py3"
+      "periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7",
+      "periodic-linux-xenial-cuda11.1-py3.6-gcc7",
+      "periodic-win-vs2019-cuda11.1-py3"
    ],
    "ciflow/slow": [
      "linux-bionic-cuda10.2-py3.9-gcc7",
-      "periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck"
-    ],
-    "ciflow/slow-gradcheck": [
-      "periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck"
-    ],
-    "ciflow/trunk": [
-      "caffe2-linux-xenial-py3.7-gcc5.4",
-      "docker-builds",
-      "ios-12-5-1-arm64",
-      "ios-12-5-1-arm64-coreml",
-      "ios-12-5-1-arm64-custom-ops",
-      "ios-12-5-1-arm64-full-jit",
-      "ios-12-5-1-arm64-metal",
-      "ios-12-5-1-x86-64",
-      "ios-12-5-1-x86-64-coreml",
-      "ios-12-5-1-x86-64-full-jit",
-      "libtorch-linux-xenial-cuda10.2-py3.7-gcc7",
-      "libtorch-linux-xenial-cuda11.3-py3.7-gcc7",
-      "linux-bionic-cuda10.2-py3.9-gcc7",
-      "linux-bionic-py3.7-clang9",
-      "linux-bionic-rocm4.5-py3.7",
-      "linux-docs",
-      "linux-vulkan-bionic-py3.7-clang9",
-      "linux-xenial-cuda11.3-py3.7-gcc7",
-      "linux-xenial-cuda11.3-py3.7-gcc7-bazel-test",
-      "linux-xenial-cuda11.3-py3.7-gcc7-no-ops",
-      "linux-xenial-py3-clang5-mobile-build",
-      "linux-xenial-py3-clang5-mobile-custom-build-static",
-      "linux-xenial-py3.7-clang7-asan",
-      "linux-xenial-py3.7-clang7-onnx",
-      "linux-xenial-py3.7-gcc5.4",
-      "linux-xenial-py3.7-gcc7",
-      "linux-xenial-py3.7-gcc7-no-ops",
-      "macos-10-15-py3-arm64",
-      "macos-10-15-py3-lite-interpreter-x86-64",
-      "macos-11-py3-x86-64",
-      "parallelnative-linux-xenial-py3.7-gcc5.4",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-build",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit",
-      "pytorch-xla-linux-bionic-py3.7-clang8",
-      "win-vs2019-cpu-py3",
-      "win-vs2019-cuda11.3-py3"
-    ],
-    "ciflow/vulkan": [
-      "linux-vulkan-bionic-py3.7-clang9"
+      "linux-xenial-cuda10.2-py3.6-gcc7"
    ],
    "ciflow/win": [
      "periodic-win-vs2019-cuda11.1-py3",
-      "periodic-win-vs2019-cuda11.5-py3",
      "win-vs2019-cpu-py3",
+      "win-vs2019-cuda10.2-py3",
      "win-vs2019-cuda11.3-py3"
    ],
    "ciflow/xla": [
-      "pytorch-xla-linux-bionic-py3.7-clang8"
+      "linux-bionic-py3.6-clang9"
    ]
  },
  "version": "v1"
--- a/.github/merge_rules.json
+++ b/.github/merge_rules.json
@ -1,20 +0,0 @@
-[
-   {
-    "name": "ONNX exporter",
-    "patterns": ["torch/onnx/**", "torch/csrc/jit/passes/onnx/**", "torch/csrc/jit/passes/onnx.*", "test/onnx/**", "docs/source/onnx.rst"],
-    "approved_by": ["BowenBao", "garymm"],
-    "mandatory_app_id": 12274
-   },
-   {
-    "name": "NVFuser",
-    "patterns": ["torch/csrc/jit/codegen/fuser/cuda/**", "torch/csrc/jit/codegen/cuda/**", "benchmarks/cpp/nvfuser/**"],
-    "approved_by": ["csarofeen", "ngimel"],
-    "mandatory_app_id": 12274
-   },
-   {
-    "name": "OSS CI",
-    "patterns": [".github/**", ".circleci/**", ".jenkins/**", "scripts/**"],
-    "approved_by": ["seemethere", "malfet", "suo"],
-    "mandatory_app_id": 12274
-   }
-]
--- a/.github/scale-config.yml
+++ b/.github/scale-config.yml
@ -5,9 +5,6 @@
 #
 # NOTE (Apr, 5, 2021): Linux runners are currently all an amazonlinux2
 #
-# NOTE (Jan 5, 2021): Linux runners are all non-ephemeral to reduce the amount of CreateInstaces calls
-#                     to avoid RequestLimitExceeded issues
-#
 # TODO: Add some documentation on how the auto-scaling works
 #
 # NOTE: Default values,
@ -18,45 +15,23 @@
 #     os: linux
 #     max_available: 20
 #     disk_size: 50
-#     is_ephemeral: true

 runner_types:
-  # mainly used for ciflow-should-run, not made to run any serious tests
-  linux.large:
-    instance_type: c5.large
-    os: linux
-    disk_size: 10
-    is_ephemeral: false
  linux.2xlarge:
    instance_type: c5.2xlarge
    os: linux
    max_available: 500
    disk_size: 150
-    is_ephemeral: false
-  linux.4xlarge: # for binary-builds
-    instance_type: c5.4xlarge
-    os: linux
-    max_available: 250
-    disk_size: 150
-    is_ephemeral: false
  linux.8xlarge.nvidia.gpu:
    instance_type: g3.8xlarge
    os: linux
-    max_available: 125
+    max_available: 50
    disk_size: 150
-    is_ephemeral: false
-  linux.4xlarge.nvidia.gpu:
-    instance_type: g3.4xlarge
-    os: linux
-    max_available: 125
-    disk_size: 150
-    is_ephemeral: false
  linux.16xlarge.nvidia.gpu:
    instance_type: g3.16xlarge
    os: linux
    max_available: 10
    disk_size: 150
-    is_ephemeral: false
  windows.4xlarge:
    instance_type: c5d.4xlarge
    os: windows
@ -65,5 +40,5 @@ runner_types:
  windows.8xlarge.nvidia.gpu:
    instance_type: p3.2xlarge
    os: windows
-    max_available: 50
+    max_available: 25
    disk_size: 256
--- a/.github/scripts/ensure_actions_will_cancel.py
+++ b/.github/scripts/ensure_actions_will_cancel.py
@ -46,20 +46,11 @@ if __name__ == "__main__":
            "group": concurrency_key(filename),
            "cancel-in-progress": True,
        }
-        actual = data.get("concurrency", None)
-        if actual != expected:
+        if data.get("concurrency", None) != expected:
            print(
                f"'concurrency' incorrect or not found in '{filename.relative_to(REPO_ROOT)}'",
                file=sys.stderr,
            )
-            print(
-                f"expected: {expected}",
-                file=sys.stderr,
-            )
-            print(
-                f"actual:   {actual}",
-                file=sys.stderr,
-            )
            errors_found = True

    if errors_found:
--- a/.github/scripts/export_pytorch_labels.py
+++ b/.github/scripts/export_pytorch_labels.py
@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-'''
-Test ownership was introduced in https://github.com/pytorch/pytorch/issues/66232.
-
-As a part of enforcing test ownership, we want to maintain a list of existing PyTorch labels
-to verify the owners' existence. This script outputs a file containing a list of existing
-pytorch/pytorch labels so that the file could be uploaded to S3.
-
-This script assumes the correct env vars are set for AWS permissions.
-
-'''
-
-import boto3  # type: ignore[import]
-import json
-from functools import lru_cache
-from typing import List, Any
-from urllib.request import urlopen, Request
-
-# Modified from https://github.com/pytorch/pytorch/blob/b00206d4737d1f1e7a442c9f8a1cadccd272a386/torch/hub.py#L129
-def _read_url(url: Any) -> Any:
-    with urlopen(url) as r:
-        return r.headers, r.read().decode(r.headers.get_content_charset('utf-8'))
-
-
-def request_for_labels(url: str) -> Any:
-    headers = {'Accept': 'application/vnd.github.v3+json'}
-    return _read_url(Request(url, headers=headers))
-
-
-def get_last_page(header: Any) -> int:
-    # Link info looks like: <https://api.github.com/repositories/65600975/labels?per_page=100&page=2>;
-    # rel="next", <https://api.github.com/repositories/65600975/labels?per_page=100&page=3>; rel="last"
-    link_info = header['link']
-    prefix = "&page="
-    suffix = ">;"
-    return int(link_info[link_info.rindex(prefix) + len(prefix):link_info.rindex(suffix)])
-
-
-def update_labels(labels: List[str], info: str) -> None:
-    labels_json = json.loads(info)
-    labels.extend([x["name"] for x in labels_json])
-
-
-@lru_cache()
-def get_pytorch_labels() -> List[str]:
-    prefix = "https://api.github.com/repos/pytorch/pytorch/labels?per_page=100"
-    header, info = request_for_labels(prefix + "&page=1")
-    labels: List[str] = []
-    update_labels(labels, info)
-
-    last_page = get_last_page(header)
-    assert last_page > 0, "Error reading header info to determine total number of pages of labels"
-    for page_number in range(2, last_page + 1):  # skip page 1
-        _, info = request_for_labels(prefix + f"&page={page_number}")
-        update_labels(labels, info)
-
-    return labels
-
-
-def send_labels_to_S3(labels: List[str]) -> None:
-    labels_file_name = "pytorch_labels.json"
-    obj = boto3.resource('s3').Object('ossci-metrics', labels_file_name)
-    obj.put(Body=json.dumps(labels).encode())
-
-
-def main() -> None:
-    send_labels_to_S3(get_pytorch_labels())
-
-
-if __name__ == '__main__':
-    main()
--- a/.github/scripts/generate_binary_build_matrix.py
+++ b/.github/scripts/generate_binary_build_matrix.py
@ -10,13 +10,19 @@ architectures:
    * Latest ROCM
 """

-from typing import Dict, List, Tuple
+import argparse
+import json
+from typing import Dict, List

+CUDA_ARCHES = [
+    "10.2",
+    "11.1"
+]

-CUDA_ARCHES = ["10.2", "11.1", "11.3", "11.5"]
-
-
-ROCM_ARCHES = ["4.3.1", "4.5.2"]
+ROCM_ARCHES = [
+    "3.10",
+    "4.0"
+]


 def arch_type(arch_version: str) -> str:
@ -30,168 +36,132 @@ def arch_type(arch_version: str) -> str:

 WHEEL_CONTAINER_IMAGES = {
    **{
-        gpu_arch: f"pytorch/manylinux-builder:cuda{gpu_arch}"
+        # TODO: Re-do manylinux CUDA image tagging scheme to be similar to
+        #       ROCM so we don't have to do this replacement
+        gpu_arch: f"pytorch/manylinux-cuda{gpu_arch.replace('.', '')}"
        for gpu_arch in CUDA_ARCHES
    },
    **{
-        gpu_arch: f"pytorch/manylinux-builder:rocm{gpu_arch}"
+        gpu_arch: f"pytorch/manylinux-rocm:{gpu_arch}"
        for gpu_arch in ROCM_ARCHES
    },
-    "cpu": "pytorch/manylinux-builder:cpu",
+    "cpu": "pytorch/manylinux-cpu"
 }

 CONDA_CONTAINER_IMAGES = {
-    **{gpu_arch: f"pytorch/conda-builder:cuda{gpu_arch}" for gpu_arch in CUDA_ARCHES},
-    "cpu": "pytorch/conda-builder:cpu",
+    **{
+        gpu_arch: f"pytorch/conda-builder:cuda{gpu_arch}"
+        for gpu_arch in CUDA_ARCHES
+    },
+    "cpu": "pytorch/conda-builder:cpu"
 }

-PRE_CXX11_ABI = "pre-cxx11"
-CXX11_ABI = "cxx11-abi"
-RELEASE = "release"
-DEBUG = "debug"
-
-LIBTORCH_CONTAINER_IMAGES: Dict[Tuple[str, str], str] = {
+LIBTORCH_CONTAINER_IMAGES = {
    **{
-        (gpu_arch, PRE_CXX11_ABI): f"pytorch/manylinux-builder:cuda{gpu_arch}"
+        # TODO: Re-do manylinux CUDA image tagging scheme to be similar to
+        #       ROCM so we don't have to do this replacement
+        (gpu_arch, "pre-cxx11"): f"pytorch/manylinux-cuda{gpu_arch.replace('.', '')}"
        for gpu_arch in CUDA_ARCHES
    },
    **{
-        (gpu_arch, CXX11_ABI): f"pytorch/libtorch-cxx11-builder:cuda{gpu_arch}"
+        (gpu_arch, "cxx11-abi"): f"pytorch/libtorch-cxx11-builder:cuda{gpu_arch}"
        for gpu_arch in CUDA_ARCHES
    },
-    ("cpu", PRE_CXX11_ABI): "pytorch/manylinux-builder:cpu",
-    ("cpu", CXX11_ABI): "pytorch/libtorch-cxx11-builder:cpu",
+    ("cpu", "pre-cxx11"): "pytorch/manylinux-cpu",
+    ("cpu", "cxx11-abi"): "pytorch/libtorch-cxx11-builder:cpu",
 }

-FULL_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10"]
+FULL_PYTHON_VERSIONS = [
+    "3.6",
+    "3.7",
+    "3.8",
+    "3.9",
+]


-def translate_desired_cuda(gpu_arch_type: str, gpu_arch_version: str) -> str:
-    return {
-        "cpu": "cpu",
-        "cuda": f"cu{gpu_arch_version.replace('.', '')}",
-        "rocm": f"rocm{gpu_arch_version}",
-    }.get(gpu_arch_type, gpu_arch_version)
+def is_pull_request() -> bool:
+    return False
+    # return os.environ.get("GITHUB_HEAD_REF")


-def list_without(in_list: List[str], without: List[str]) -> List[str]:
-    return [item for item in in_list if item not in without]
+def snip_if(is_pr: bool, versions: List[str]) -> List[str]:
+    """
+    Return the full list of versions, or just the latest if on a PR.
+    """
+    return [versions[-1]] if is_pr else versions


-def generate_conda_matrix(os: str) -> List[Dict[str, str]]:
-    ret: List[Dict[str, str]] = []
-    arches = ["cpu"]
-    if os == "linux":
-        arches += CUDA_ARCHES
-    elif os == "windows":
-        # We don't build CUDA 10.2 for window see https://github.com/pytorch/pytorch/issues/65648
-        arches += list_without(CUDA_ARCHES, ["10.2"])
-    for python_version in FULL_PYTHON_VERSIONS:
+def generate_conda_matrix(is_pr: bool) -> List[Dict[str, str]]:
+    return [
+        {
+            "python_version": python_version,
+            "gpu_arch_type": arch_type(arch_version),
+            "gpu_arch_version": arch_version,
+            "container_image": CONDA_CONTAINER_IMAGES[arch_version],
+        }
+        for python_version in snip_if(is_pr, FULL_PYTHON_VERSIONS)
        # We don't currently build conda packages for rocm
-        for arch_version in arches:
-            gpu_arch_type = arch_type(arch_version)
-            gpu_arch_version = "" if arch_version == "cpu" else arch_version
-            ret.append(
-                {
-                    "python_version": python_version,
-                    "gpu_arch_type": gpu_arch_type,
-                    "gpu_arch_version": gpu_arch_version,
-                    "desired_cuda": translate_desired_cuda(
-                        gpu_arch_type, gpu_arch_version
-                    ),
-                    "container_image": CONDA_CONTAINER_IMAGES[arch_version],
-                    "package_type": "conda",
-                    "build_name": f"conda-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace(
-                        ".", "_"
-                    ),
-                }
-            )
-    return ret
+        for arch_version in ["cpu"] + snip_if(is_pr, CUDA_ARCHES)
+    ]


-def generate_libtorch_matrix(os: str, abi_version: str) -> List[Dict[str, str]]:
+def generate_libtorch_matrix(is_pr: bool) -> List[Dict[str, str]]:
    libtorch_variants = [
        "shared-with-deps",
        "shared-without-deps",
        "static-with-deps",
        "static-without-deps",
    ]
-    ret: List[Dict[str, str]] = []
+    return [
+        {
+            "gpu_arch_type": arch_type(arch_version),
+            "gpu_arch_version": arch_version,
+            "libtorch_variant": libtorch_variant,
+            "devtoolset": abi_version,
+            "container_image": LIBTORCH_CONTAINER_IMAGES[(arch_version, abi_version)],
+        }
+        # We don't currently build libtorch for rocm
+        for arch_version in ["cpu"] + snip_if(is_pr, CUDA_ARCHES)
+        for libtorch_variant in libtorch_variants
+        # one of the values in the following list must be exactly
+        # "cxx11-abi", but the precise value of the other one doesn't
+        # matter
+        for abi_version in ["cxx11-abi", "pre-cxx11"]
+    ]
+
+
+def generate_wheels_matrix(is_pr: bool) -> List[Dict[str, str]]:
    arches = ["cpu"]
-    if os == "linux":
-        arches += CUDA_ARCHES
-    elif os == "windows":
-        # We don't build CUDA 10.2 for window see https://github.com/pytorch/pytorch/issues/65648
-        arches += list_without(CUDA_ARCHES, ["10.2"])
-    for arch_version in arches:
-        for libtorch_variant in libtorch_variants:
-            # We don't currently build libtorch for rocm
-            # one of the values in the following list must be exactly
-            # CXX11_ABI, but the precise value of the other one doesn't
-            # matter
-            gpu_arch_type = arch_type(arch_version)
-            gpu_arch_version = "" if arch_version == "cpu" else arch_version
-            ret.append(
-                {
-                    "gpu_arch_type": gpu_arch_type,
-                    "gpu_arch_version": gpu_arch_version,
-                    "desired_cuda": translate_desired_cuda(
-                        gpu_arch_type, gpu_arch_version
-                    ),
-                    "libtorch_variant": libtorch_variant,
-                    "libtorch_config": abi_version if os == "windows" else "",
-                    "devtoolset": abi_version if os != "windows" else "",
-                    "container_image": LIBTORCH_CONTAINER_IMAGES[
-                        (arch_version, abi_version)
-                    ] if os != "windows" else "",
-                    "package_type": "libtorch",
-                    "build_name": f"libtorch-{gpu_arch_type}{gpu_arch_version}-{libtorch_variant}-{abi_version}".replace(
-                        ".", "_"
-                    ),
-                }
-            )
-    return ret
+    arches += snip_if(is_pr, CUDA_ARCHES)
+    arches += snip_if(is_pr, ROCM_ARCHES)
+    return [
+        {
+            "python_version": python_version,
+            "gpu_arch_type": arch_type(arch_version),
+            "gpu_arch_version": arch_version,
+            "container_image": WHEEL_CONTAINER_IMAGES[arch_version],
+        }
+        for python_version in snip_if(is_pr, FULL_PYTHON_VERSIONS)
+        for arch_version in arches
+    ]


-def generate_wheels_matrix(os: str) -> List[Dict[str, str]]:
-    arches = ["cpu"]
-    package_type = "wheel"
-    if os == "linux":
-        arches += CUDA_ARCHES + ROCM_ARCHES
-        # NOTE: We only build manywheel packages for linux
-        package_type = "manywheel"
-    elif os == "windows":
-        # We don't build CUDA 10.2 for window see https://github.com/pytorch/pytorch/issues/65648
-        arches += list_without(CUDA_ARCHES, ["10.2"])
-    ret: List[Dict[str, str]] = []
-    for python_version in FULL_PYTHON_VERSIONS:
-        for arch_version in arches:
-            gpu_arch_type = arch_type(arch_version)
-            gpu_arch_version = "" if arch_version == "cpu" else arch_version
-            ret.append(
-                {
-                    "python_version": python_version,
-                    "gpu_arch_type": gpu_arch_type,
-                    "gpu_arch_version": gpu_arch_version,
-                    "desired_cuda": translate_desired_cuda(
-                        gpu_arch_type, gpu_arch_version
-                    ),
-                    "container_image": WHEEL_CONTAINER_IMAGES[arch_version],
-                    "package_type": package_type,
-                    "build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace(
-                        ".", "_"
-                    ),
-                }
-            )
-    return ret
+def from_includes(includes: List[Dict[str, str]]) -> str:
+    return json.dumps({"include": includes})


-def generate_binary_build_matrix(os: str) -> List[Dict[str, str]]:
-    return {
-        "linux": [
-            *generate_conda_matrix(os),
-            *generate_libtorch_matrix(os, abi_version=PRE_CXX11_ABI),
-            *generate_libtorch_matrix(os, abi_version=CXX11_ABI),
-            *generate_wheels_matrix(os),
-        ]
-    }[os]
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('mode', choices=['conda', 'libtorch', 'wheels'])
+    args = parser.parse_args()
+
+    is_pr = is_pull_request()
+    print(from_includes({
+        'conda': generate_conda_matrix,
+        'libtorch': generate_libtorch_matrix,
+        'wheels': generate_wheels_matrix,
+    }[args.mode](is_pr)))
+
+
+if __name__ == "__main__":
+    main()
--- a/.github/scripts/generate_ci_workflows.py
+++ b/.github/scripts/generate_ci_workflows.py
--- a/.github/scripts/generate_pytorch_test_matrix.py
+++ b/.github/scripts/generate_pytorch_test_matrix.py
@ -15,9 +15,6 @@ from typing import Dict
 from typing_extensions import TypedDict


-BUILD_ENVIRONMENT = os.getenv('BUILD_ENVIRONMENT')
-assert BUILD_ENVIRONMENT is not None
-
 class Config(TypedDict):
    num_shards: int
    runner: str
@ -34,64 +31,28 @@ def get_disabled_issues() -> str:
    issue_numbers = [x[4] for x in re.findall(regex, pr_body)]
    return ','.join(issue_numbers)

-# When the user specifies labels that are NOT ciflow/default, the expectation is
-# that the workflows should be triggered as if they are on trunk. For example, when
-# ciflow/all is specified, we should run the full test suite for Windows CUDA
-# and NOT only the smoke tests.
-def run_as_if_on_trunk() -> bool:
-    ON_PULL_REQUEST = os.getenv('GITHUB_HEAD_REF')
-    if not ON_PULL_REQUEST:
-        return True
-
-    from pathlib import Path
-    GITHUB_DIR = Path(__file__).resolve().parent.parent
-
-    with open(f'{GITHUB_DIR}/generated-ciflow-ruleset.json') as f:
-        labels_to_workflows = json.load(f)['label_rules']
-
-    pr_labels = json.loads(os.getenv('PR_LABELS', '[]'))
-    current_workflow_triggered_by_label = False
-    for label in pr_labels:
-        if label != 'ciflow/default' and label in labels_to_workflows:
-            workflows_triggered_by_label = labels_to_workflows[label]
-            if any([BUILD_ENVIRONMENT in workflow for workflow in workflows_triggered_by_label]):
-                current_workflow_triggered_by_label = True
-                break
-
-    return current_workflow_triggered_by_label

 def main() -> None:
-    INCLUDE_DEFAULT_TEST = True
    TEST_RUNNER_TYPE = os.getenv('TEST_RUNNER_TYPE')
    assert TEST_RUNNER_TYPE is not None
-    RUN_SMOKE_TESTS_ONLY_ON_PR = os.getenv('RUN_SMOKE_TESTS_ONLY_ON_PR')
-    RUN_SMOKE_TESTS = RUN_SMOKE_TESTS_ONLY_ON_PR == "true" and not run_as_if_on_trunk()
+    ON_PULL_REQUEST = os.getenv('GITHUB_HEAD_REF')
    NUM_TEST_SHARDS_ON_PULL_REQUEST = os.getenv('NUM_TEST_SHARDS_ON_PULL_REQUEST')
-    NUM_TEST_SHARDS = int(os.getenv('NUM_TEST_SHARDS', '0'))
-    if not run_as_if_on_trunk() and NUM_TEST_SHARDS_ON_PULL_REQUEST:
+    NUM_TEST_SHARDS = int(os.getenv('NUM_TEST_SHARDS', '1'))
+    if ON_PULL_REQUEST and NUM_TEST_SHARDS_ON_PULL_REQUEST:
        NUM_TEST_SHARDS = int(NUM_TEST_SHARDS_ON_PULL_REQUEST)
    MULTIGPU_RUNNER_TYPE = os.getenv('MULTIGPU_RUNNER_TYPE')
-    DISTRIBUTED_GPU_RUNNER_TYPE = os.getenv('DISTRIBUTED_GPU_RUNNER_TYPE', TEST_RUNNER_TYPE)
    NOGPU_RUNNER_TYPE = os.getenv('NOGPU_RUNNER_TYPE')
    configs: Dict[str, Config] = {}
    if os.getenv('ENABLE_JIT_LEGACY_TEST'):
        configs['jit_legacy'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
    if MULTIGPU_RUNNER_TYPE is not None and os.getenv('ENABLE_MULTIGPU_TEST'):
        configs['multigpu'] = {'num_shards': 1, 'runner': MULTIGPU_RUNNER_TYPE}
-    if NOGPU_RUNNER_TYPE is not None:
-        if os.getenv('ENABLE_NOGPU_NO_AVX_TEST'):
-            configs['nogpu_NO_AVX'] = {'num_shards': 1, 'runner': NOGPU_RUNNER_TYPE}
-        if os.getenv('ENABLE_NOGPU_NO_AVX2_TEST'):
-            configs['nogpu_NO_AVX2'] = {'num_shards': 1, 'runner': NOGPU_RUNNER_TYPE}
-        if os.getenv('ENABLE_FORCE_ON_CPU_TEST'):
-            configs['force_on_cpu'] = {'num_shards': 1, 'runner': NOGPU_RUNNER_TYPE}
+    if NOGPU_RUNNER_TYPE is not None and os.getenv('ENABLE_NOGPU_NO_AVX_TEST'):
+        configs['nogpu_NO_AVX'] = {'num_shards': 1, 'runner': NOGPU_RUNNER_TYPE}
+    if NOGPU_RUNNER_TYPE is not None and os.getenv('ENABLE_NOGPU_NO_AVX2_TEST'):
+        configs['nogpu_NO_AVX2'] = {'num_shards': 1, 'runner': NOGPU_RUNNER_TYPE}
    if os.getenv('ENABLE_DISTRIBUTED_TEST'):
-        configs['distributed'] = {
-            'num_shards': 1,
-            'runner': DISTRIBUTED_GPU_RUNNER_TYPE if "cuda" in str(BUILD_ENVIRONMENT) else TEST_RUNNER_TYPE
-        }
-    if os.getenv('ENABLE_FX2TRT_TEST'):
-        configs['fx2trt'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
+        configs['distributed'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
    if os.getenv('ENABLE_SLOW_TEST'):
        configs['slow'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
    if os.getenv('ENABLE_DOCS_TEST'):
@ -100,11 +61,8 @@ def main() -> None:
        configs['backwards_compat'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
    if os.getenv('ENABLE_XLA_TEST'):
        configs['xla'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
-        INCLUDE_DEFAULT_TEST = False
    if os.getenv('ENABLE_NOARCH_TEST'):
        configs['noarch'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
-    if RUN_SMOKE_TESTS:
-        configs['smoke_tests'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
    matrix = {
        'include': [
            {
@ -114,7 +72,6 @@ def main() -> None:
                'runner': TEST_RUNNER_TYPE,
            }
            for shard in range(1, NUM_TEST_SHARDS + 1)
-            if INCLUDE_DEFAULT_TEST
        ] + [
            {
                'config': name,
--- a/.github/scripts/gitutils.py
+++ b/.github/scripts/gitutils.py
@ -1,290 +0,0 @@
-#!/usr/bin/env python3
-
-from collections import defaultdict
-from datetime import datetime
-from typing import cast, Any, Dict, Iterator, List, Optional, Tuple, Union
-import os
-import re
-
-
-RE_GITHUB_URL_MATCH = re.compile("^https://.*@?github.com/(.+)/(.+)$")
-
-
-def get_git_remote_name() -> str:
-    return os.getenv("GIT_REMOTE_NAME", "origin")
-
-
-def get_git_repo_dir() -> str:
-    from pathlib import Path
-    return os.getenv("GIT_REPO_DIR", str(Path(__file__).resolve().parent.parent.parent))
-
-
-def fuzzy_list_to_dict(items: List[Tuple[str, str]]) -> Dict[str, List[str]]:
-    """
-    Converts list to dict preserving elements with duplicate keys
-    """
-    rc: Dict[str, List[str]] = defaultdict(lambda: [])
-    for (key, val) in items:
-        rc[key].append(val)
-    return dict(rc)
-
-
-def _check_output(items: List[str], encoding: str = "utf-8") -> str:
-    from subprocess import check_output, CalledProcessError
-    try:
-        return check_output(items).decode(encoding)
-    except CalledProcessError as e:
-        msg = f"Command `{' '.join(e.cmd)}` returned non-zero exit code {e.returncode}"
-        stdout = e.stdout.decode(encoding) if e.stdout is not None else ""
-        stderr = e.stderr.decode(encoding) if e.stderr is not None else ""
-        if len(stderr) == 0:
-            msg += f"\n{stdout}"
-        else:
-            msg += f"\nstdout:\n{stdout}\nstderr:\n{stderr}"
-        raise RuntimeError(msg) from e
-
-
-class GitCommit:
-    commit_hash: str
-    title: str
-    body: str
-    author: str
-    author_date: datetime
-    commit_date: Optional[datetime]
-
-    def __init__(self,
-                 commit_hash: str,
-                 author: str,
-                 author_date: datetime,
-                 title: str,
-                 body: str,
-                 commit_date: Optional[datetime] = None) -> None:
-        self.commit_hash = commit_hash
-        self.author = author
-        self.author_date = author_date
-        self.commit_date = commit_date
-        self.title = title
-        self.body = body
-
-    def __repr__(self) -> str:
-        return f"{self.title} ({self.commit_hash})"
-
-    def __contains__(self, item: Any) -> bool:
-        return item in self.body or item in self.title
-
-
-def parse_fuller_format(lines: Union[str, List[str]]) -> GitCommit:
-    """
-    Expect commit message generated using `--format=fuller --date=unix` format, i.e.:
-        commit <sha1>
-        Author:     <author>
-        AuthorDate: <author date>
-        Commit:     <committer>
-        CommitDate: <committer date>
-
-        <title line>
-
-        <full commit message>
-
-    """
-    if isinstance(lines, str):
-        lines = lines.split("\n")
-    # TODO: Handle merge commits correctly
-    if len(lines) > 1 and lines[1].startswith("Merge:"):
-        del lines[1]
-    assert len(lines) > 7
-    assert lines[0].startswith("commit")
-    assert lines[1].startswith("Author: ")
-    assert lines[2].startswith("AuthorDate: ")
-    assert lines[3].startswith("Commit: ")
-    assert lines[4].startswith("CommitDate: ")
-    assert len(lines[5]) == 0
-    return GitCommit(commit_hash=lines[0].split()[1].strip(),
-                     author=lines[1].split(":", 1)[1].strip(),
-                     author_date=datetime.fromtimestamp(int(lines[2].split(":", 1)[1].strip())),
-                     commit_date=datetime.fromtimestamp(int(lines[4].split(":", 1)[1].strip())),
-                     title=lines[6].strip(),
-                     body="\n".join(lines[7:]),
-                     )
-
-
-class GitRepo:
-    def __init__(self, path: str, remote: str = "origin", debug: bool = False) -> None:
-        self.repo_dir = path
-        self.remote = remote
-        self.debug = debug
-
-    def _run_git(self, *args: Any) -> str:
-        if self.debug:
-            print(f"+ git -C {self.repo_dir} {' '.join(args)}")
-        return _check_output(["git", "-C", self.repo_dir] + list(args))
-
-    def revlist(self, revision_range: str) -> List[str]:
-        rc = self._run_git("rev-list", revision_range, "--", ".").strip()
-        return rc.split("\n") if len(rc) > 0 else []
-
-    def current_branch(self) -> str:
-        return self._run_git("symbolic-ref", "--short", "HEAD").strip()
-
-    def checkout(self, branch: str) -> None:
-        self._run_git('checkout', branch)
-
-    def show_ref(self, name: str) -> str:
-        refs = self._run_git('show-ref', '-s', name).strip().split('\n')
-        if not all(refs[i] == refs[0] for i in range(1, len(refs))):
-            raise RuntimeError(f"referce {name} is ambigous")
-        return refs[0]
-
-    def rev_parse(self, name: str) -> str:
-        return self._run_git('rev-parse', '--verify', name).strip()
-
-    def get_merge_base(self, from_ref: str, to_ref: str) -> str:
-        return self._run_git('merge-base', from_ref, to_ref).strip()
-
-    def patch_id(self, ref: Union[str, List[str]]) -> List[Tuple[str, str]]:
-        is_list = isinstance(ref, list)
-        if is_list:
-            if len(ref) == 0:
-                return []
-            ref = " ".join(ref)
-        rc = _check_output(['sh', '-c', f'git -C {self.repo_dir} show {ref}|git patch-id --stable']).strip()
-        return [cast(Tuple[str, str], x.split(" ", 1)) for x in rc.split("\n")]
-
-    def commits_resolving_gh_pr(self, pr_num: int) -> List[str]:
-        owner, name = self.gh_owner_and_name()
-        msg = f"Pull Request resolved: https://github.com/{owner}/{name}/pull/{pr_num}"
-        rc = self._run_git('log', '--format=%H', '--grep', msg).strip()
-        return rc.split("\n") if len(rc) > 0 else []
-
-    def get_commit(self, ref: str) -> GitCommit:
-        return parse_fuller_format(self._run_git('show', '--format=fuller', '--date=unix', '--shortstat', ref))
-
-    def cherry_pick(self, ref: str) -> None:
-        self._run_git('cherry-pick', '-x', ref)
-
-    def revert(self, ref: str) -> None:
-        self._run_git("revert", "--no-edit", ref)
-
-    def compute_branch_diffs(self, from_branch: str, to_branch: str) -> Tuple[List[str], List[str]]:
-        """
-        Returns list of commmits that are missing in each other branch since their merge base
-        Might be slow if merge base is between two branches is pretty far off
-        """
-        from_ref = self.rev_parse(from_branch)
-        to_ref = self.rev_parse(to_branch)
-        merge_base = self.get_merge_base(from_ref, to_ref)
-        from_commits = self.revlist(f'{merge_base}..{from_ref}')
-        to_commits = self.revlist(f'{merge_base}..{to_ref}')
-        from_ids = fuzzy_list_to_dict(self.patch_id(from_commits))
-        to_ids = fuzzy_list_to_dict(self.patch_id(to_commits))
-        for patch_id in set(from_ids).intersection(set(to_ids)):
-            from_values = from_ids[patch_id]
-            to_values = to_ids[patch_id]
-            if len(from_values) != len(to_values):
-                # Eliminate duplicate commits+reverts from the list
-                while len(from_values) > 0 and len(to_values) > 0:
-                    frc = self.get_commit(from_values.pop())
-                    toc = self.get_commit(to_values.pop())
-                    if frc.title != toc.title or frc.author_date != toc.author_date:
-                        raise RuntimeError(f"Unexpected differences between {frc} and {toc}")
-                    from_commits.remove(frc.commit_hash)
-                    to_commits.remove(toc.commit_hash)
-                continue
-            for commit in from_values:
-                from_commits.remove(commit)
-            for commit in to_values:
-                to_commits.remove(commit)
-        return (from_commits, to_commits)
-
-    def cherry_pick_commits(self, from_branch: str, to_branch: str) -> None:
-        orig_branch = self.current_branch()
-        self.checkout(to_branch)
-        from_commits, to_commits = self.compute_branch_diffs(from_branch, to_branch)
-        if len(from_commits) == 0:
-            print("Nothing to do")
-            self.checkout(orig_branch)
-            return
-        for commit in reversed(from_commits):
-            print(f"Cherry picking commit {commit}")
-            self.cherry_pick(commit)
-        self.checkout(orig_branch)
-
-    def push(self, branch: str, dry_run: bool) -> None:
-        if dry_run:
-            self._run_git("push", "--dry-run", self.remote, branch)
-        else:
-            self._run_git("push", self.remote, branch)
-
-    def head_hash(self) -> str:
-        return self._run_git("show-ref", "--hash", "HEAD").strip()
-
-    def remote_url(self) -> str:
-        return self._run_git("remote", "get-url", self.remote)
-
-    def gh_owner_and_name(self) -> Tuple[str, str]:
-        url = os.getenv("GIT_REMOTE_URL", None)
-        if url is None:
-            url = self.remote_url()
-        rc = RE_GITHUB_URL_MATCH.match(url)
-        if rc is None:
-            raise RuntimeError(f"Unexpected url format {url}")
-        return cast(Tuple[str, str], rc.groups())
-
-    def commit_message(self, ref: str) -> str:
-        return self._run_git("log", "-1", "--format=%B", ref)
-
-    def amend_commit_message(self, msg: str) -> None:
-        self._run_git("commit", "--amend", "-m", msg)
-
-
-class PeekableIterator(Iterator[str]):
-    def __init__(self, val: str) -> None:
-        self._val = val
-        self._idx = -1
-
-    def peek(self) -> Optional[str]:
-        if self._idx + 1 >= len(self._val):
-            return None
-        return self._val[self._idx + 1]
-
-    def __iter__(self) -> "PeekableIterator":
-        return self
-
-    def __next__(self) -> str:
-        rc = self.peek()
-        if rc is None:
-            raise StopIteration
-        self._idx += 1
-        return rc
-
-
-def patterns_to_regex(allowed_patterns: List[str]) -> Any:
-    """
-    pattern is glob-like, i.e. the only special sequences it has are:
-      - ? - matches single character
-      - * - matches any non-folder separator characters
-      - ** - matches any characters
-      Assuming that patterns are free of braces and backslashes
-      the only character that needs to be escaped are dot and plus
-    """
-    rc = "("
-    for idx, pattern in enumerate(allowed_patterns):
-        if idx > 0:
-            rc += "|"
-        pattern_ = PeekableIterator(pattern)
-        assert not any(c in pattern for c in "{}()[]\\")
-        for c in pattern_:
-            if c == ".":
-                rc += "\\."
-            elif c == "+":
-                rc += "\\+"
-            elif c == "*":
-                if pattern_.peek() == "*":
-                    next(pattern_)
-                    rc += ".+"
-                else:
-                    rc += "[^/]+"
-            else:
-                rc += c
-    rc += ")"
-    return re.compile(rc)
--- a/.github/scripts/install_nvidia_utils_linux.sh
+++ b/.github/scripts/install_nvidia_utils_linux.sh
@ -3,7 +3,7 @@
 set -eou pipefail

 DISTRIBUTION=$(. /etc/os-release;echo $ID$VERSION_ID) \
-DRIVER_FN="NVIDIA-Linux-x86_64-495.44.run"
+DRIVER_FN="NVIDIA-Linux-x86_64-460.39.run"
 YUM_REPO_URL="https://nvidia.github.io/nvidia-docker/${DISTRIBUTION}/nvidia-docker.repo"

 install_nvidia_docker2_amzn2() {
--- a/.github/scripts/lint_test_ownership.py
+++ b/.github/scripts/lint_test_ownership.py
@ -1,88 +0,0 @@
-#!/usr/bin/env python3
-'''
-Test ownership was introduced in https://github.com/pytorch/pytorch/issues/66232.
-
-This lint verifies that every Python test file (file that matches test_*.py or *_test.py in the test folder)
-has valid ownership information in a comment header. Valid means:
-  - The format of the header follows the pattern "# Owner(s): ["list", "of owner", "labels"]
-  - Each owner label actually exists in PyTorch
-  - Each owner label starts with "module: " or "oncall: " or is in ACCEPTABLE_OWNER_LABELS
-
-This file is expected to run in the root directory of pytorch/pytorch.
-'''
-import boto3  # type: ignore[import]
-import botocore  # type: ignore[import]
-import fnmatch
-import json
-import sys
-from pathlib import Path
-from typing import List, Any
-
-
-# Team/owner labels usually start with "module: " or "oncall: ", but the following are acceptable exceptions
-ACCEPTABLE_OWNER_LABELS = ["NNC", "high priority"]
-GLOB_EXCEPTIONS = [
-    "**/test/run_test.py"
-]
-
-PYTORCH_ROOT = Path(__file__).resolve().parent.parent.parent
-TEST_DIR = PYTORCH_ROOT / "test"
-CURRENT_FILE_NAME = Path(__file__).resolve().relative_to(PYTORCH_ROOT)
-
-S3_RESOURCE_READ_ONLY = boto3.resource("s3", config=botocore.config.Config(signature_version=botocore.UNSIGNED))
-
-
-def get_all_test_files() -> List[Path]:
-    test_files = list(TEST_DIR.glob("**/test_*.py"))
-    test_files.extend(list(TEST_DIR.glob("**/*_test.py")))
-    return [f for f in test_files if not any([fnmatch.fnmatch(str(f), g) for g in GLOB_EXCEPTIONS])]
-
-
-def get_pytorch_labels() -> Any:
-    bucket = S3_RESOURCE_READ_ONLY.Bucket("ossci-metrics")
-    summaries = bucket.objects.filter(Prefix="pytorch_labels.json")
-    for summary in summaries:
-        labels = summary.get()["Body"].read()
-    return json.loads(labels)
-
-
-# Returns a string denoting the error invalidating the label OR an empty string if nothing is wrong
-def validate_label(label: str, pytorch_labels: List[str]) -> str:
-    if label not in pytorch_labels:
-        return f"{label} is not a PyTorch label (please choose from https://github.com/pytorch/pytorch/labels)"
-    if label.startswith("module:") or label.startswith("oncall:") or label in ACCEPTABLE_OWNER_LABELS:
-        return ""
-    return f"{label} is not an acceptable owner (please update to another label or edit ACCEPTABLE_OWNERS_LABELS " \
-        "in {CURRENT_FILE_NAME}"
-
-
-# Returns a string denoting the error invalidating the file OR an empty string if nothing is wrong
-def validate_file(filename: Path, pytorch_labels: List[str]) -> str:
-    prefix = "# Owner(s): "
-    relative_name = Path(filename).relative_to(PYTORCH_ROOT)
-    with open(filename) as f:
-        for line in f.readlines():
-            if line.startswith(prefix):
-                labels = json.loads(line[len(prefix):])
-                labels_msgs = [validate_label(label, pytorch_labels) for label in labels]
-                file_msg = ", ".join([x for x in labels_msgs if x != ""])
-                return f"{relative_name}: {file_msg}" if file_msg != "" else ""
-    return f"{relative_name}: missing a comment header with ownership information."
-
-
-def main() -> None:
-    test_file_paths = get_all_test_files()
-    pytorch_labels = get_pytorch_labels()
-
-    file_msgs = [validate_file(f, pytorch_labels) for f in test_file_paths]
-    err_msg = "\n".join([x for x in file_msgs if x != ""])
-    if err_msg != "":
-        err_msg = err_msg + "\n\nIf you see files with missing ownership information above, " \
-            "please add the following line\n\n# Owner(s): [\"<owner: label>\"]\n\nto the top of each test file. " \
-            "The owner should be an existing pytorch/pytorch label."
-        print(err_msg)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
--- a/.github/scripts/process_commit.py
+++ b/.github/scripts/process_commit.py
@ -1,106 +0,0 @@
-#!/usr/bin/env python3
-"""
-This script finds the user/pr creator responsible for labeling a PR by a commit SHA. It is used by the workflow in
-'.github/workflows/pr-labels.yml'. If there exists no PR associated with the commit or the PR is properly labeled,
-this script is a no-op.
-
-Note: we ping the user only, not the reviewers, as the reviewers can sometimes be external to pytorch
-with no labeling responsibility, so we don't want to bother them.
-This script is based on: https://github.com/pytorch/vision/blob/main/.github/process_commit.py
-"""
-
-import sys
-from typing import Any, Set, Tuple, List
-import re
-import os
-import json
-import requests
-
-# For a PR to be properly labeled it should have release notes label and one topic label
-PULL_REQUEST_EXP = "Pull Request resolved:.*pull/(.*)"
-PRIMARY_LABEL_FILTER = "release notes:"
-SECONDARY_LABELS = {
-    "topic: bc_breaking",
-    "topic: deprecation",
-    "topic: new feature",
-    "topic: improvements",
-    "topic: bug fixes",
-    "topic: performance",
-    "topic: documentation",
-    "topic: developer feature",
-    "topic: not user facing",
-}
-# This secondary does not require a primary
-ALLOWED_ONLY_SECONDARY = {"topic: not user facing"}
-PYTORCH_REPO = "https://api.github.com/repos/pytorch/pytorch"
-GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN')
-REQUEST_HEADERS = {'Accept': 'application/vnd.github.v3+json', 'Authorization': f'token {GITHUB_TOKEN}'}
-
-
-def query_pytorch(cmd: str) -> Any:
-    response = requests.get(f"{PYTORCH_REPO}/{cmd}", headers=REQUEST_HEADERS)
-    return response.json()
-
-
-def get_pr_number(commit_hash: str) -> Any:
-    data = query_pytorch(f"commits/{commit_hash}")
-    if not data or (not data["commit"]["message"]):
-        return None
-    message = data["commit"]["message"]
-    p = re.compile(PULL_REQUEST_EXP)
-    result = p.search(message)
-    if not result:
-        return None
-    return result.group(1)
-
-
-def get_pr_author_and_labels(pr_number: int) -> Tuple[str, Set[str]]:
-    # See https://docs.github.com/en/rest/reference/pulls#get-a-pull-request
-    data = query_pytorch(f"pulls/{pr_number}")
-    user = data["user"]["login"]
-    labels = {label["name"] for label in data["labels"]}
-    return user, labels
-
-def get_repo_labels() -> List[str]:
-    collected_labels: List[str] = list()
-    for page in range(0, 10):
-        response = query_pytorch(f"labels?per_page=100&page={page}")
-        page_labels = list(map(lambda x: str(x["name"]), response))
-        if not page_labels:
-            break
-            collected_labels += page_labels
-    return collected_labels
-
-def post_pytorch_comment(pr_number: int, merger: str) -> Any:
-    message = {'body' : f"Hey {merger}." + """
-You've committed this PR, but it does not have both a 'release notes: ...' and 'topics: ...' label. \
-Please add one of each to the PR. The 'release notes: ...' label should represent the part of \
-PyTorch that this PR changes (fx, autograd, distributed, etc) and the 'topics: ...' label should \
-represent the kind of PR it is (not user facing, new feature, bug fix, perf improvement, etc). \
-The list of valid labels can be found [here](https://github.com/pytorch/pytorch/labels?q=release+notes) \
-for the 'release notes: ...' and [here](https://github.com/pytorch/pytorch/labels?q=topic) for the \
-'topics: ...'.
-For changes that are 'topic: not user facing' there is no need for a release notes label."""}
-
-    response = requests.post(
-        f"{PYTORCH_REPO}/issues/{pr_number}/comments",
-        json.dumps(message),
-        headers=REQUEST_HEADERS)
-    return response.json()
-
-if __name__ == "__main__":
-    commit_hash = sys.argv[1]
-    pr_number = get_pr_number(commit_hash)
-
-    if not pr_number:
-        sys.exit(0)
-
-    user, labels = get_pr_author_and_labels(pr_number)
-    repo_labels = get_repo_labels()
-
-    primary_labels = set(filter(lambda x: x.startswith(PRIMARY_LABEL_FILTER), repo_labels))
-    has_both_labels = bool(primary_labels.intersection(labels) and SECONDARY_LABELS.intersection(labels))
-    is_properly_labeled = has_both_labels or bool(ALLOWED_ONLY_SECONDARY.intersection(labels))
-
-    if not is_properly_labeled:
-        post_pytorch_comment(pr_number, user)
--- a/.github/scripts/run_torchbench.py
+++ b/.github/scripts/run_torchbench.py
@ -20,6 +20,8 @@ import subprocess

 from typing import List

+CUDA_VERSION = "cu102"
+PYTHON_VERSION = "3.7"
 TORCHBENCH_CONFIG_NAME = "config.yaml"
 MAGIC_PREFIX = "RUN_TORCHBENCH:"
 MAGIC_TORCHBENCH_PREFIX = "TORCHBENCH_BRANCH:"
@ -43,17 +45,6 @@ def gen_abtest_config(control: str, treatment: str, models: List[str]) -> str:
    config = config + "\n"
    return config

-def setup_gha_env(name: str, val: str) -> None:
-    fname = os.environ["GITHUB_ENV"]
-    content = f"{name}={val}\n"
-    with open(fname, "a") as fo:
-        fo.write(content)
-
-def find_current_branch(repo_path: str) -> str:
-    repo = git.Repo(repo_path)
-    name: str = repo.active_branch.name
-    return name
-
 def deploy_torchbench_config(output_dir: str, config: str) -> None:
    # Create test dir if needed
    pathlib.Path(output_dir).mkdir(exist_ok=True)
@ -82,18 +73,25 @@ def extract_models_from_pr(torchbench_path: str, prbody_file: str) -> List[str]:
            return []
    return model_list

-def find_torchbench_branch(prbody_file: str) -> str:
-    branch_name: str = ""
+def identify_torchbench_branch(torchbench_path: str, prbody_file: str) -> None:
+    branch_name: str
    with open(prbody_file, "r") as pf:
        lines = map(lambda x: x.strip(), pf.read().splitlines())
        magic_lines = list(filter(lambda x: x.startswith(MAGIC_TORCHBENCH_PREFIX), lines))
        if magic_lines:
            # Only the first magic line will be recognized.
            branch_name = magic_lines[0][len(MAGIC_TORCHBENCH_PREFIX):].strip()
-    # If not specified, use main as the default branch
+    # If not specified, directly return without the branch checkout
    if not branch_name:
-        branch_name = "main"
-    return branch_name
+        return
+    try:
+        print(f"Checking out the TorchBench branch: {branch_name} ...")
+        repo = git.Repo(torchbench_path)
+        origin = repo.remotes.origin
+        origin.fetch(branch_name)
+        repo.create_head(branch_name, origin.refs[branch_name]).checkout()
+    except git.exc.GitCommandError:
+        raise RuntimeError(f'{branch_name} doesn\'t exist in the pytorch/benchmark repository. Please double check.')

 def run_torchbench(pytorch_path: str, torchbench_path: str, output_dir: str) -> None:
    # Copy system environment so that we will not override
@ -106,41 +104,28 @@ def run_torchbench(pytorch_path: str, torchbench_path: str, output_dir: str) ->

 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Run TorchBench tests based on PR')
+    parser.add_argument('--pr-num', required=True, type=str, help="The Pull Request number")
+    parser.add_argument('--pr-base-sha', required=True, type=str, help="The Pull Request base hash")
+    parser.add_argument('--pr-head-sha', required=True, type=str, help="The Pull Request head hash")
    parser.add_argument('--pr-body', required=True, help="The file that contains body of a Pull Request")
-
-    subparsers = parser.add_subparsers(dest='command')
-    # parser for setup the torchbench branch name env
-    branch_parser = subparsers.add_parser("set-torchbench-branch")
-    # parser to run the torchbench branch
-    run_parser = subparsers.add_parser("run")
-    run_parser.add_argument('--pr-num', required=True, type=str, help="The Pull Request number")
-    run_parser.add_argument('--pr-base-sha', required=True, type=str, help="The Pull Request base hash")
-    run_parser.add_argument('--pr-head-sha', required=True, type=str, help="The Pull Request head hash")
-    run_parser.add_argument('--pytorch-path', required=True, type=str, help="Path to pytorch repository")
-    run_parser.add_argument('--torchbench-path', required=True, type=str, help="Path to TorchBench repository")
+    parser.add_argument('--pytorch-path', required=True, type=str, help="Path to pytorch repository")
+    parser.add_argument('--torchbench-path', required=True, type=str, help="Path to TorchBench repository")
    args = parser.parse_args()

-    if args.command == 'set-torchbench-branch':
-        branch_name = find_torchbench_branch(args.pr_body)
-        # env name: "TORCHBENCH_BRANCH"
-        setup_gha_env(MAGIC_TORCHBENCH_PREFIX[:-1], branch_name)
-    elif args.command == 'run':
-        output_dir: str = os.path.join(os.environ["HOME"], ".torchbench", "bisection", f"pr{args.pr_num}")
-        # Identify the specified models and verify the input
-        models = extract_models_from_pr(args.torchbench_path, args.pr_body)
-        if not models:
-            print("Can't parse the model filter from the pr body. Currently we only support allow-list.")
-            exit(-1)
-        # Assert the current branch in args.torchbench_path is the same as the one specified in pr body
-        branch_name = find_torchbench_branch(args.pr_body)
-        current_branch = find_current_branch(args.torchbench_path)
-        assert branch_name == current_branch, f"Torchbench repo {args.torchbench_path} is on branch {current_branch}, \
-                                                but user specified to run on branch {branch_name}."
-        print(f"Ready to run TorchBench with benchmark. Result will be saved in the directory: {output_dir}.")
-        # Run TorchBench with the generated config
-        torchbench_config = gen_abtest_config(args.pr_base_sha, args.pr_head_sha, models)
-        deploy_torchbench_config(output_dir, torchbench_config)
-        run_torchbench(pytorch_path=args.pytorch_path, torchbench_path=args.torchbench_path, output_dir=output_dir)
-    else:
-        print(f"The command {args.command} is not supported.")
-        exit(-1)
+    output_dir: str = os.path.join(os.environ["HOME"], ".torchbench", "bisection", f"pr{args.pr_num}")
+    # Identify the specified models and verify the input
+    models = extract_models_from_pr(args.torchbench_path, args.pr_body)
+    if not models:
+        print("Can't parse the model filter from the pr body. Currently we only support allow-list.")
+        exit(1)
+    # Identify the specified TorchBench branch, verify the branch exists, and checkout the branch
+    try:
+        identify_torchbench_branch(args.torchbench_path, args.pr_body)
+    except RuntimeError as e:
+        print(f"Identify TorchBench branch failed: {str(e)}")
+        exit(1)
+    print(f"Ready to run TorchBench with benchmark. Result will be saved in the directory: {output_dir}.")
+    # Run TorchBench with the generated config
+    torchbench_config = gen_abtest_config(args.pr_base_sha, args.pr_head_sha, models)
+    deploy_torchbench_config(output_dir, torchbench_config)
+    run_torchbench(pytorch_path=args.pytorch_path, torchbench_path=args.torchbench_path, output_dir=output_dir)
--- a/.github/scripts/syncbranches.py
+++ b/.github/scripts/syncbranches.py
@ -1,25 +0,0 @@
-#!/usr/bin/env python3
-
-from gitutils import get_git_repo_dir, GitRepo
-from typing import Any
-
-
-def parse_args() -> Any:
-    from argparse import ArgumentParser
-    parser = ArgumentParser("Merge PR/branch into default branch")
-    parser.add_argument("--sync-branch", default="sync")
-    parser.add_argument("--default-branch", type=str, default="main")
-    parser.add_argument("--dry-run", action="store_true")
-    parser.add_argument("--debug", action="store_true")
-    return parser.parse_args()
-
-
-def main() -> None:
-    args = parse_args()
-    repo = GitRepo(get_git_repo_dir(), debug=args.debug)
-    repo.cherry_pick_commits(args.sync_branch, args.default_branch)
-    repo.push(args.default_branch, args.dry_run)
-
-
-if __name__ == '__main__':
-    main()
--- a/.github/scripts/test_gitutils.py
+++ b/.github/scripts/test_gitutils.py
@ -1,27 +0,0 @@
-#!/usr/bin/env python3
-from gitutils import PeekableIterator
-from unittest import TestCase, main
-
-class TestPeekableIterator(TestCase):
-    def test_iterator(self, input_: str = "abcdef") -> None:
-        iter_ = PeekableIterator(input_)
-        for idx, c in enumerate(iter_):
-            self.assertEqual(c, input_[idx])
-
-    def test_is_iterable(self) -> None:
-        from collections.abc import Iterator
-        iter_ = PeekableIterator("")
-        self.assertTrue(isinstance(iter_, Iterator))
-
-    def test_peek(self, input_: str = "abcdef") -> None:
-        iter_ = PeekableIterator(input_)
-        for idx, c in enumerate(iter_):
-            if idx + 1 < len(input_):
-                self.assertEqual(iter_.peek(), input_[idx + 1])
-            else:
-                self.assertTrue(iter_.peek() is None)
-
-
-
-if __name__ == '__main__':
-    main()
--- a/.github/scripts/trymerge.py
+++ b/.github/scripts/trymerge.py
@ -1,440 +0,0 @@
-#!/usr/bin/env python3
-
-import json
-import os
-import re
-from dataclasses import dataclass
-from urllib.request import urlopen, Request
-from urllib.error import HTTPError
-from typing import cast, Any, Callable, Dict, List, Optional, Tuple, Union
-from gitutils import get_git_remote_name, get_git_repo_dir, patterns_to_regex, GitRepo
-
-
-GH_GET_PR_INFO_QUERY = """
-query ($owner: String!, $name: String!, $number: Int!) {
-  repository(owner: $owner, name: $name) {
-    pullRequest(number: $number) {
-      closed
-      isCrossRepository
-      author {
-        login
-      }
-      title
-      body
-      headRefName
-      headRepository {
-        nameWithOwner
-      }
-      baseRefName
-      baseRepository {
-        nameWithOwner
-        isPrivate
-        defaultBranchRef {
-          name
-        }
-      }
-      mergeCommit {
-        oid
-      }
-      commits(first: 100) {
-        nodes {
-          commit {
-            author {
-              user {
-                login
-              }
-              email
-              name
-            }
-            oid
-            checkSuites(filterBy: {appId: 12274}, first: 1) {
-              nodes {
-                app {
-                  databaseId
-                }
-                conclusion
-              }
-            }
-          }
-        }
-        totalCount
-      }
-      changedFiles
-      files(last: 100) {
-        nodes {
-          path
-        }
-      }
-      latestReviews(last: 100) {
-        nodes {
-          author {
-            login
-          }
-          state
-        }
-        totalCount
-      }
-      comments(last: 1) {
-        nodes {
-          bodyText
-          author {
-            login
-          }
-          authorAssociation
-          editor {
-            login
-          }
-        }
-      }
-    }
-  }
-}
-"""
-
-RE_GHSTACK_HEAD_REF = re.compile(r"^(gh/[^/]+/[0-9]+/)head$")
-RE_GHSTACK_SOURCE_ID = re.compile(r'^ghstack-source-id: (.+)\n?', re.MULTILINE)
-RE_PULL_REQUEST_RESOLVED = re.compile(
-    r'Pull Request resolved: '
-    r'https://github.com/(?P<owner>[^/]+)/(?P<repo>[^/]+)/pull/(?P<number>[0-9]+)',
-    re.MULTILINE
-)
-RE_REVERT_CMD = re.compile(r"@pytorch(merge|)bot\s+revert\s+this")
-RE_DIFF_REV = re.compile(r'^Differential Revision:.+?(D[0-9]+)', re.MULTILINE)
-
-
-def _fetch_url(url: str, *,
-               headers: Optional[Dict[str, str]] = None,
-               data: Optional[Dict[str, Any]] = None,
-               method: Optional[str] = None,
-               reader: Callable[[Any], Any] = lambda x: x.read()) -> Any:
-    if headers is None:
-        headers = {}
-    token = os.environ.get("GITHUB_TOKEN")
-    if token is not None and url.startswith('https://api.github.com/'):
-        headers['Authorization'] = f'token {token}'
-    data_ = json.dumps(data).encode() if data is not None else None
-    try:
-        with urlopen(Request(url, headers=headers, data=data_, method=method)) as conn:
-            return reader(conn)
-    except HTTPError as err:
-        if err.code == 403 and all(key in err.headers for key in ['X-RateLimit-Limit', 'X-RateLimit-Used']):
-            print(f"Rate limit exceeded: {err.headers['X-RateLimit-Used']}/{err.headers['X-RateLimit-Limit']}")
-        raise
-
-
-def fetch_json(url: str,
-               params: Optional[Dict[str, Any]] = None,
-               data: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
-    headers = {'Accept': 'application/vnd.github.v3+json'}
-    if params is not None and len(params) > 0:
-        url += '?' + '&'.join(f"{name}={val}" for name, val in params.items())
-    return cast(List[Dict[str, Any]], _fetch_url(url, headers=headers, data=data, reader=json.load))
-
-
-def gh_post_comment(org: str, project: str, pr_num: int, comment: str, dry_run: bool = False) -> List[Dict[str, Any]]:
-    if dry_run:
-        print(comment)
-        return []
-    return fetch_json(f'https://api.github.com/repos/{org}/{project}/issues/{pr_num}/comments',
-                      data={"body": comment})
-
-
-def gh_add_labels(org: str, project: str, pr_num: int, labels: Union[str, List[str]]) -> None:
-    fetch_json(f'https://api.github.com/repos/{org}/{project}/issues/{pr_num}/labels',
-               data={"labels": labels})
-
-
-def gh_graphql(query: str, **kwargs: Any) -> Dict[str, Any]:
-    rc = _fetch_url("https://api.github.com/graphql", data={"query": query, "variables": kwargs}, reader=json.load)
-    if "errors" in rc:
-        raise RuntimeError(f"GraphQL query {query} failed: {rc['errors']}")
-    return cast(Dict[str, Any], rc)
-
-
-def gh_get_pr_info(org: str, proj: str, pr_no: int) -> Any:
-    rc = gh_graphql(GH_GET_PR_INFO_QUERY, name=proj, owner=org, number=pr_no)
-    return rc["data"]["repository"]["pullRequest"]
-
-
-def parse_args() -> Any:
-    from argparse import ArgumentParser
-    parser = ArgumentParser("Merge PR into default branch")
-    parser.add_argument("--dry-run", action="store_true")
-    parser.add_argument("--revert", action="store_true")
-    parser.add_argument("pr_num", type=int)
-    return parser.parse_args()
-
-
-class GitHubPR:
-    def __init__(self, org: str, project: str, pr_num: int) -> None:
-        assert isinstance(pr_num, int)
-        self.org = org
-        self.project = project
-        self.pr_num = pr_num
-        self.info = gh_get_pr_info(org, project, pr_num)
-
-    def is_closed(self) -> bool:
-        return bool(self.info["closed"])
-
-    def is_cross_repo(self) -> bool:
-        return bool(self.info["isCrossRepository"])
-
-    def base_ref(self) -> str:
-        return cast(str, self.info["baseRefName"])
-
-    def default_branch(self) -> str:
-        return cast(str, self.info["baseRepository"]["defaultBranchRef"]["name"])
-
-    def head_ref(self) -> str:
-        return cast(str, self.info["headRefName"])
-
-    def is_ghstack_pr(self) -> bool:
-        return RE_GHSTACK_HEAD_REF.match(self.head_ref()) is not None
-
-    def is_base_repo_private(self) -> bool:
-        return bool(self.info["baseRepository"]["isPrivate"])
-
-    def get_changed_files_count(self) -> int:
-        return int(self.info["changedFiles"])
-
-    def get_changed_files(self) -> List[str]:
-        rc = [x["path"] for x in self.info["files"]["nodes"]]
-        if len(rc) != self.get_changed_files_count():
-            raise RuntimeError("Changed file count mismatch")
-        return rc
-
-    def _get_reviewers(self) -> List[Tuple[str, str]]:
-        reviews_count = int(self.info["latestReviews"]["totalCount"])
-        if len(self.info["latestReviews"]["nodes"]) != reviews_count:
-            raise RuntimeError("Can't fetch all PR reviews")
-        return [(x["author"]["login"], x["state"]) for x in self.info["latestReviews"]["nodes"]]
-
-    def get_approved_by(self) -> List[str]:
-        return [login for (login, state) in self._get_reviewers() if state == "APPROVED"]
-
-    def get_commit_count(self) -> int:
-        return int(self.info["commits"]["totalCount"])
-
-    def get_pr_creator_login(self) -> str:
-        return cast(str, self.info["author"]["login"])
-
-    def get_committer_login(self, num: int = 0) -> str:
-        return cast(str, self.info["commits"]["nodes"][num]["commit"]["author"]["user"]["login"])
-
-    def get_committer_author(self, num: int = 0) -> str:
-        node = self.info["commits"]["nodes"][num]["commit"]["author"]
-        return f"{node['name']} <{node['email']}>"
-
-    def get_check_suite_conclusions(self) -> Dict[int, str]:
-        last_commit = self.info["commits"]["nodes"][-1]["commit"]
-        rc = {}
-        for node in last_commit["checkSuites"]["nodes"]:
-            rc[int(node["app"]["databaseId"])] = node["conclusion"]
-        return rc
-
-    def get_authors(self) -> Dict[str, str]:
-        rc = {}
-        for idx in range(self.get_commit_count()):
-            rc[self.get_committer_login(idx)] = self.get_committer_author(idx)
-
-        return rc
-
-    def get_author(self) -> str:
-        authors = self.get_authors()
-        if len(authors) == 1:
-            return next(iter(authors.values()))
-        return self.get_authors()[self.get_pr_creator_login()]
-
-    def get_title(self) -> str:
-        return cast(str, self.info["title"])
-
-    def get_body(self) -> str:
-        return cast(str, self.info["body"])
-
-    def get_merge_commit(self) -> Optional[str]:
-        mc = self.info["mergeCommit"]
-        return mc["oid"] if mc is not None else None
-
-    def get_pr_url(self) -> str:
-        return f"https://github.com/{self.org}/{self.project}/pull/{self.pr_num}"
-
-    def get_comment_body(self, num: int = -1) -> str:
-        return cast(str, self.info["comments"]["nodes"][num]["bodyText"])
-
-    def get_comment_author_login(self, num: int = -1) -> str:
-        return cast(str, self.info["comments"]["nodes"][num]["author"]["login"])
-
-    def get_comment_editor_login(self, num: int = -1) -> Optional[str]:
-        rc = self.info["comments"]["nodes"][num]["editor"]
-        return rc["login"] if rc is not None else None
-
-    def get_comment_author_association(self, num: int = -1) -> str:
-        return cast(str, self.info["comments"]["nodes"][num]["authorAssociation"])
-
-    def merge_ghstack_into(self, repo: GitRepo) -> None:
-        assert self.is_ghstack_pr()
-        # For ghstack, cherry-pick commits based from origin
-        orig_ref = f"{repo.remote}/{re.sub(r'/head$', '/orig', self.head_ref())}"
-        rev_list = repo.revlist(f"{self.default_branch()}..{orig_ref}")
-        for idx, rev in enumerate(reversed(rev_list)):
-            msg = repo.commit_message(rev)
-            m = RE_PULL_REQUEST_RESOLVED.search(msg)
-            if m is None:
-                raise RuntimeError(f"Could not find PR-resolved string in {msg} of ghstacked PR {self.pr_num}")
-            if self.org != m.group('owner') or self.project != m.group('repo'):
-                raise RuntimeError(f"PR {m.group('number')} resolved to wrong owner/repo pair")
-            pr_num = int(m.group('number'))
-            if pr_num != self.pr_num:
-                pr = GitHubPR(self.org, self.project, pr_num)
-                if pr.is_closed():
-                    print(f"Skipping {idx+1} of {len(rev_list)} PR (#{pr_num}) as its already been merged")
-                    continue
-                # Raises exception if matching rule is not found
-                find_matching_merge_rule(pr, repo)
-
-            repo.cherry_pick(rev)
-            repo.amend_commit_message(re.sub(RE_GHSTACK_SOURCE_ID, "", msg))
-
-    def merge_into(self, repo: GitRepo, dry_run: bool = False) -> None:
-        # Raises exception if matching rule is not found
-        find_matching_merge_rule(self, repo)
-        if repo.current_branch() != self.default_branch():
-            repo.checkout(self.default_branch())
-        if not self.is_ghstack_pr():
-            msg = self.get_title() + "\n\n" + self.get_body()
-            msg += f"\nPull Request resolved: {self.get_pr_url()}\n"
-            repo._run_git("merge", "--squash", f"{repo.remote}/{self.head_ref()}")
-            repo._run_git("commit", f"--author=\"{self.get_author()}\"", "-m", msg)
-        else:
-            self.merge_ghstack_into(repo)
-
-        repo.push(self.default_branch(), dry_run)
-
-
-@dataclass
-class MergeRule:
-    name: str
-    patterns: List[str]
-    approved_by: List[str]
-    mandatory_app_id: Optional[int]
-
-
-def read_merge_rules(repo: GitRepo) -> List[MergeRule]:
-    from pathlib import Path
-    rules_path = Path(repo.repo_dir) / ".github" / "merge_rules.json"
-    if not rules_path.exists():
-        print(f"{rules_path} does not exist, returning empty rules")
-        return []
-    with open(rules_path) as fp:
-        rc = json.load(fp, object_hook=lambda x: MergeRule(**x))
-    return cast(List[MergeRule], rc)
-
-
-
-def find_matching_merge_rule(pr: GitHubPR, repo: GitRepo) -> MergeRule:
-    """Returns merge rule matching to this pr or raises an exception"""
-    changed_files = pr.get_changed_files()
-    approved_by = set(pr.get_approved_by())
-    rules = read_merge_rules(repo)
-    for rule in rules:
-        rule_name = rule.name
-        rule_approvers_set = set(rule.approved_by)
-        patterns_re = patterns_to_regex(rule.patterns)
-        approvers_intersection = approved_by.intersection(rule_approvers_set)
-        # If rule requires approvers but they aren't the ones that reviewed PR
-        if len(approvers_intersection) == 0 and len(rule_approvers_set) > 0:
-            print(f"Skipping rule {rule_name} due to no approvers overlap")
-            continue
-        if rule.mandatory_app_id is not None:
-            cs_conslusions = pr.get_check_suite_conclusions()
-            mandatory_app_id = rule.mandatory_app_id
-            if mandatory_app_id not in cs_conslusions or cs_conslusions[mandatory_app_id] != "SUCCESS":
-                print(f"Skipping rule {rule_name} as mandatory app {mandatory_app_id} is not in {cs_conslusions}")
-                continue
-        non_matching_files = []
-        for fname in changed_files:
-            if not patterns_re.match(fname):
-                non_matching_files.append(fname)
-        if len(non_matching_files) > 0:
-            print(f"Skipping rule {rule_name} due to non-matching files: {non_matching_files}")
-            continue
-        print(f"Matched rule {rule_name} for {pr.pr_num}")
-        return rule
-    raise RuntimeError(f"PR {pr.pr_num} does not match merge rules")
-
-
-def try_revert(repo: GitRepo, pr: GitHubPR, dry_run: bool = False) -> None:
-    def post_comment(msg: str) -> None:
-        gh_post_comment(pr.org, pr.project, pr.pr_num, msg, dry_run=dry_run)
-    if not pr.is_closed():
-        return post_comment(f"Can't revert open PR #{pr.pr_num}")
-    if not RE_REVERT_CMD.match(pr.get_comment_body()):
-        raise RuntimeError(f"Comment {pr.get_comment_body()} does not seem to be a valid revert command")
-    if pr.get_comment_editor_login() is not None:
-        return post_comment("Don't want to revert based on edited command")
-    author_association = pr.get_comment_author_association()
-    author_login = pr.get_comment_author_login()
-    # For some reason, one can not be a member of private repo, only CONTRIBUTOR
-    expected_association = "CONTRIBUTOR" if pr.is_base_repo_private() else "MEMBER"
-    if author_association != expected_association and author_association != "OWNER":
-        return post_comment(f"Will not revert as @{author_login} is not a {expected_association}, but {author_association}")
-
-    # Raises exception if matching rule is not found
-    find_matching_merge_rule(pr, repo)
-    commit_sha = pr.get_merge_commit()
-    if commit_sha is None:
-        commits = repo.commits_resolving_gh_pr(pr.pr_num)
-        if len(commits) == 0:
-            raise RuntimeError("Can't find any commits resolving PR")
-        commit_sha = commits[0]
-    msg = repo.commit_message(commit_sha)
-    rc = RE_DIFF_REV.search(msg)
-    if rc is not None:
-        raise RuntimeError(f"Can't revert PR that was landed via phabricator as {rc.group(1)}")
-    repo.checkout(pr.default_branch())
-    repo.revert(commit_sha)
-    msg = repo.commit_message("HEAD")
-    msg = re.sub(RE_PULL_REQUEST_RESOLVED, "", msg)
-    msg += f"\nReverted {pr.get_pr_url()} on behalf of @{author_login}\n"
-    repo.amend_commit_message(msg)
-    repo.push(pr.default_branch(), dry_run)
-    if not dry_run:
-        gh_add_labels(pr.org, pr.project, pr.pr_num, ["reverted"])
-
-def main() -> None:
-    args = parse_args()
-    repo = GitRepo(get_git_repo_dir(), get_git_remote_name())
-    org, project = repo.gh_owner_and_name()
-
-    pr = GitHubPR(org, project, args.pr_num)
-    if args.revert:
-        try:
-            try_revert(repo, pr, dry_run=args.dry_run)
-        except Exception as e:
-            msg = f"Reverting PR {args.pr_num} failed due to {e}"
-            run_url = os.getenv("GH_RUN_URL")
-            if run_url is not None:
-                msg += f"\nRaised by {run_url}"
-            gh_post_comment(org, project, args.pr_num, msg, dry_run=args.dry_run)
-        return
-
-    if pr.is_closed():
-        gh_post_comment(org, project, args.pr_num, f"Can't merge closed PR #{args.pr_num}", dry_run=args.dry_run)
-        return
-
-    if pr.is_cross_repo():
-        gh_post_comment(org, project, args.pr_num, "Cross-repo merges are not supported at the moment", dry_run=args.dry_run)
-        return
-
-    try:
-        pr.merge_into(repo, dry_run=args.dry_run)
-    except Exception as e:
-        msg = f"Merge failed due to {e}"
-        run_url = os.getenv("GH_RUN_URL")
-        if run_url is not None:
-            msg += f"\nRaised by {run_url}"
-        gh_post_comment(org, project, args.pr_num, msg, dry_run=args.dry_run)
-
-
-if __name__ == "__main__":
-    main()
--- a/.github/templates/android_ci_full_workflow.yml.j2
+++ b/.github/templates/android_ci_full_workflow.yml.j2
@ -1,165 +0,0 @@
-{%- extends "linux_ci_workflow.yml.j2" -%}
-{% import 'common_android.yml.j2' as common_android %}
-{%- set exclude_test = true -%}
-{% block name -%}
-# Template is at:    .github/templates/android_ci_full_workflow.yml.j2
-# Generation script: .github/scripts/generate_ci_workflows.py
-name: !{{ build_environment }}
-{%- endblock %}
-
-on:
-{%- if is_default %}
-  pull_request:
-{%- endif -%}
-{%- for label in ciflow_config.labels | sort %}
-  {%- if loop.first %}
-  push:
-    tags:
-  {%- endif %}
-  {%- if label != "ciflow/default" %}
-      - '!{{ label }}/*'
-  {%- endif %}
-{%- endfor %}
-
-{% block build +%}
-  # building and testing in a single job since bazel runs only small subset of tests
-  build-and-test:
-    runs-on: !{{ test_runner_type }}
-    env:
-      JOB_BASE_NAME: !{{ build_environment }}-build-and-test
-      NUM_TEST_SHARDS: !{{ num_test_shards }}
-    steps:
-      - name: print labels
-        run: echo "${PR_LABELS}"
-      !{{ common.setup_ec2_linux() }}
-      !{{ common.checkout() }}
-      !{{ common.calculate_docker_image(false) }}
-      - name: Pull Docker image
-        run: |
-          !{{ common.add_retry_to_env() }}
-          retry docker pull "${DOCKER_IMAGE}"
-      - name: Determine shm-size
-        run: |
-          shm_size="1g"
-          case "${BUILD_ENVIRONMENT}" in
-            *cuda*)
-              shm_size="2g"
-              ;;
-            *rocm*)
-              shm_size="8g"
-              ;;
-          esac
-          echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
-      - name: Output disk space left
-        run: |
-          sudo df -H
-      - name: Preserve github env variables for use in docker
-        run: |
-          env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
-      !{{ common.parse_ref() }}
-      !{{ common_android.build_android("pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v7a-build", "arm-v7a") }}
-      !{{ common_android.build_android("pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v8a-build", "arm-v8a") }}
-      !{{ common_android.build_android("pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_32-build", "x86_32") }}
-      !{{ common_android.build_android("pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_64-build", "x86_64") }}
-      - name: Build-Final-Artifcact
-        env:
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
-        run: |
-          set -eux
-
-          docker_image_libtorch_android_x86_32="${DOCKER_IMAGE}-x86_32"
-          docker_image_libtorch_android_x86_64="${DOCKER_IMAGE}-x86_64"
-          docker_image_libtorch_android_arm_v7a="${DOCKER_IMAGE}-arm-v7a"
-          docker_image_libtorch_android_arm_v8a="${DOCKER_IMAGE}-arm-v8a"
-
-          echo "docker_image_commit: ${DOCKER_IMAGE}"
-          echo "docker_image_libtorch_android_x86_32: ${docker_image_libtorch_android_x86_32}"
-          echo "docker_image_libtorch_android_x86_64: ${docker_image_libtorch_android_x86_64}"
-          echo "docker_image_libtorch_android_arm_v7a: ${docker_image_libtorch_android_arm_v7a}"
-          echo "docker_image_libtorch_android_arm_v8a: ${docker_image_libtorch_android_arm_v8a}"
-
-          # x86_32
-          time docker pull "${docker_image_libtorch_android_x86_32}" >/dev/null
-          export id_x86_32
-          id_x86_32=$(docker run -e GRADLE_OFFLINE=1 --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins "${docker_image_libtorch_android_x86_32}")
-
-          # shellcheck disable=SC1105
-          ((echo "sudo chown -R jenkins workspace") | docker exec -u jenkins -i "${id_x86_32}" bash) 2>&1
-
-          # arm-v7a
-          time docker pull "${docker_image_libtorch_android_arm_v7a}" >/dev/null
-          export id_arm_v7a
-          id_arm_v7a=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins "${docker_image_libtorch_android_arm_v7a}")
-
-          # shellcheck disable=SC1105
-          ((echo "sudo chown -R jenkins workspace") | docker exec -u jenkins -i "${id_arm_v7a}" bash) 2>&1
-
-          mkdir -p "${GITHUB_WORKSPACE}/build_android_install_arm_v7a"
-          docker cp "${id_arm_v7a}:/var/lib/jenkins/workspace/build_android/install" "${GITHUB_WORKSPACE}/build_android_install_arm_v7a"
-
-          # x86_64
-          time docker pull "${docker_image_libtorch_android_x86_64}" >/dev/null
-          export id_x86_64
-          id_x86_64=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins "${docker_image_libtorch_android_x86_64}")
-
-          # shellcheck disable=SC1105
-          ((echo "sudo chown -R jenkins workspace") | docker exec -u jenkins -i "${id_x86_64}" bash) 2>&1
-
-          mkdir -p "${GITHUB_WORKSPACE}/build_android_install_x86_64"
-          docker cp "${id_x86_64}:/var/lib/jenkins/workspace/build_android/install" "${GITHUB_WORKSPACE}/build_android_install_x86_64"
-
-          # arm-v8a
-          time docker pull "${docker_image_libtorch_android_arm_v8a}" >/dev/null
-          export id_arm_v8a
-          id_arm_v8a=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins "${docker_image_libtorch_android_arm_v8a}")
-
-          # shellcheck disable=SC1105
-          ((echo "sudo chown -R jenkins workspace") | docker exec -u jenkins -i "$id_arm_v8a" bash) 2>&1
-
-          mkdir -p "${GITHUB_WORKSPACE}/build_android_install_arm_v8a"
-          docker cp "${id_arm_v8a}:/var/lib/jenkins/workspace/build_android/install" "${GITHUB_WORKSPACE}/build_android_install_arm_v8a"
-
-          # Putting everything together
-          docker cp "${GITHUB_WORKSPACE}/build_android_install_arm_v7a" "${id_x86_32}:/var/lib/jenkins/workspace/build_android_install_arm_v7a"
-          docker cp "${GITHUB_WORKSPACE}/build_android_install_x86_64" "${id_x86_32}:/var/lib/jenkins/workspace/build_android_install_x86_64"
-          docker cp "${GITHUB_WORKSPACE}/build_android_install_arm_v8a" "${id_x86_32}:/var/lib/jenkins/workspace/build_android_install_arm_v8a"
-
-          # run gradle buildRelease
-          # shellcheck disable=SC1105
-          ((echo "sudo chown -R jenkins workspace && cd workspace && ./.circleci/scripts/build_android_gradle.sh") | docker exec \
-            -e BUILD_ENVIRONMENT="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-build" \
-            -e MAX_JOBS="$(nproc --ignore=2)" \
-            -e AWS_DEFAULT_REGION \
-            -e IS_GHA \
-            -e PR_NUMBER \
-            -e SHA1 \
-            -e BRANCH \
-            -e GITHUB_RUN_ID \
-            -e SCCACHE_BUCKET \
-            -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-            -e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-            -e SKIP_SCCACHE_INITIALIZATION=1 \
-            -e TORCH_CUDA_ARCH_LIST \
-            -e PR_LABELS \
-            -e http_proxy="!{{ common.squid_proxy }}" -e https_proxy="!{{ common.squid_proxy }}" -e no_proxy="!{{ common.squid_no_proxy }}" \
-            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
-            --user jenkins \
-            -u jenkins -i "${id_x86_32}" bash) 2>&1
-
-          mkdir -p "${GITHUB_WORKSPACE}/build_android_artifacts"
-          docker cp "${id_x86_32}:/var/lib/jenkins/workspace/android/artifacts.tgz" "${GITHUB_WORKSPACE}/build_android_artifacts/"
-
-          output_image="${DOCKER_IMAGE}-android-x86_32-gradle"
-          docker commit "${id_x86_32}" "${output_image}"
-          time docker push "${output_image}"
-      !{{ common_android.upload_androind_binary_size("prebuilt", "${GITHUB_WORKSPACE}/build_android_artifacts/artifacts.tgz") }}
-      - uses: !{{ common.upload_artifact_s3_action }}
-        name: Store PyTorch Android Build Artifacts on S3
-        with:
-          name: ${{ env.BUILD_ENVIRONMENT }}
-          retention-days: 14
-          if-no-files-found: error
-          path:
-            build_android_artifacts/artifacts.tgz
-      !{{ common.teardown_ec2_linux() }}
-{%- endblock %}
--- a/.github/templates/android_ci_workflow.yml.j2
+++ b/.github/templates/android_ci_workflow.yml.j2
@ -1,111 +0,0 @@
-{%- extends "linux_ci_workflow.yml.j2" -%}
-{% import 'common_android.yml.j2' as common_android %}
-{%- set exclude_test = true -%}
-{% block name -%}
-# Template is at:    .github/templates/android_ci_workflow.yml.j2
-# Generation script: .github/scripts/generate_ci_workflows.py
-name: !{{ build_environment }}
-{%- endblock %}
-
-on:
-{%- if is_default %}
-  pull_request:
-{%- endif -%}
-{%- for label in ciflow_config.labels | sort %}
-  {%- if loop.first %}
-  push:
-    tags:
-  {%- endif %}
-  {%- if label != "ciflow/default" %}
-      - '!{{ label }}/*'
-  {%- endif %}
-{%- endfor %}
-
-{% block build +%}
-  # building and testing in a single job since bazel runs only small subset of tests
-  build-and-test:
-    runs-on: !{{ test_runner_type }}
-    env:
-      JOB_BASE_NAME: !{{ build_environment }}-build-and-test
-      NUM_TEST_SHARDS: !{{ num_test_shards }}
-    steps:
-      - name: print labels
-        run: echo "${PR_LABELS}"
-      !{{ common.setup_ec2_linux() }}
-      !{{ common.checkout() }}
-      !{{ common.calculate_docker_image(false) }}
-      - name: Pull Docker image
-        run: |
-          !{{ common.add_retry_to_env() }}
-          retry docker pull "${DOCKER_IMAGE}"
-      - name: Determine shm-size
-        run: |
-          shm_size="1g"
-          case "${BUILD_ENVIRONMENT}" in
-            *cuda*)
-              shm_size="2g"
-              ;;
-            *rocm*)
-              shm_size="8g"
-              ;;
-          esac
-          echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
-      - name: Output disk space left
-        run: |
-          sudo df -H
-      - name: Preserve github env variables for use in docker
-        run: |
-          env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
-      - name: Build
-        run: |
-          set -e
-          # Unlike other gradle jobs, it's not worth building libtorch in a separate CI job and share via docker, because:
-          # 1) Not shareable: it's custom selective build, which is different from default libtorch mobile build;
-          # 2) Not parallelizable by architecture: it only builds libtorch for one architecture;
-
-          echo "DOCKER_IMAGE: ${DOCKER_IMAGE}"
-          time docker pull "${DOCKER_IMAGE}" >/dev/null
-
-          export BUILD_LITE_INTERPRETER
-          BUILD_LITE_INTERPRETER="1"
-          if [[ "${BUILD_ENVIRONMENT}" == *"full-jit" ]]; then
-            BUILD_LITE_INTERPRETER="0"
-          fi
-
-          git submodule sync && git submodule update -q --init --recursive --depth 1 --jobs 0
-          # shellcheck disable=SC2016
-          export id
-          id=$(docker run -e BUILD_ENVIRONMENT \
-            -e JOB_BASE_NAME \
-            -e MAX_JOBS="$(nproc --ignore=2)" \
-            -e SCCACHE_BUCKET \
-            -e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-            -e PR_LABELS \
-            -e SKIP_SCCACHE_INITIALIZATION=1 \
-            -e TORCH_CUDA_ARCH_LIST \
-            -e BUILD_LITE_INTERPRETER \
-            -e http_proxy="!{{ common.squid_proxy }}" -e https_proxy="!{{ common.squid_proxy }}" -e no_proxy="!{{ common.squid_no_proxy }}" \
-            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
-            --security-opt seccomp=unconfined \
-            --cap-add=SYS_PTRACE \
-            --tty \
-            --detach \
-            --user jenkins \
-            -v "$(pwd):/var/lib/jenkins/workspace" \
-            --cap-add=SYS_PTRACE \
-            --security-opt seccomp=unconfined \
-            --cap-add=SYS_PTRACE \
-            --security-opt seccomp=unconfined \
-            -t -d -w /var/lib/jenkins "${DOCKER_IMAGE}")
-
-          # shellcheck disable=SC2016
-          export COMMAND
-          # shellcheck disable=SC2016
-          COMMAND='((echo "export GRADLE_OFFLINE=1" && echo "export BUILD_LITE_INTERPRETER=${BUILD_LITE_INTERPRETER}" && echo "sudo chown -R jenkins workspace && cd workspace && ./.circleci/scripts/build_android_gradle.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
-          echo "${COMMAND}" > ./command.sh && bash ./command.sh
-          # Skip docker push as this job is purely for size analysis purpose.
-          # Result binaries are already in `/home/circleci/project/` as it's mounted instead of copied.
-      !{{ common.parse_ref() }}
-      !{{ common_android.upload_androind_binary_size("custom-build-single", "") }}
-      !{{ common.teardown_ec2_linux() }}
-{%- endblock %}
--- a/.github/templates/bazel_ci_workflow.yml.j2
+++ b/.github/templates/bazel_ci_workflow.yml.j2
@ -1,5 +1,4 @@
 {%- extends "linux_ci_workflow.yml.j2" -%}
-{% import 'common_android.yml.j2' as common_android %}
 {%- set exclude_test = true -%}
 {% block name -%}
 # Template is at:    .github/templates/bazel_ci_workflow.yml.j2
@ -8,36 +7,35 @@ name: !{{ build_environment }}
 {%- endblock %}

 on:
-{%- if is_default %}
+{%- if on_pull_request %}
  pull_request:
-{%- endif -%}
-{%- for label in ciflow_config.labels | sort %}
-  {%- if loop.first %}
-  push:
-    tags:
+  {%- if ciflow_config.enabled %}
+    {%- if ciflow_config.trigger_action_only %}
+    types: [!{{ ciflow_config.trigger_action }}]
+    {%- else %}
+    types: [opened, synchronize, reopened, !{{ ciflow_config.trigger_action }}]
+    {%- endif %}
  {%- endif %}
-  {%- if label != "ciflow/default" %}
-      - '!{{ label }}/*'
-  {%- endif %}
-{%- endfor %}
+{%- else %}
+  # TODO: Enable pull_request builds when we can verify capacity can be met by auto-scalers
+{%- endif %}

 {% block build +%}
  # building and testing in a single job since bazel runs only small subset of tests
  build-and-test:
    runs-on: !{{ test_runner_type }}
+    needs: [calculate-docker-image, !{{ ciflow_config.root_job_name }}]
    env:
+      DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
      JOB_BASE_NAME: !{{ build_environment }}-build-and-test
      NUM_TEST_SHARDS: !{{ num_test_shards }}
+      CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }}
    steps:
-      - name: print labels
-        run: echo "${PR_LABELS}"
      !{{ common.setup_ec2_linux() }}
-      !{{ common.checkout() }}
-      !{{ common.calculate_docker_image(false) }}
-      - name: Pull Docker image
+      !{{ common.checkout_pytorch("recursive") }}
+      - name: Pull docker image
        run: |
-          !{{ common.add_retry_to_env() }}
-          retry docker pull "${DOCKER_IMAGE}"
+          docker pull "${DOCKER_IMAGE}"
      - name: Determine shm-size
        run: |
          shm_size="1g"
@ -81,10 +79,23 @@ on:
          )
          docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && sudo chown -R jenkins /dev && .jenkins/pytorch/build.sh'
      !{{ common.parse_ref() }}
-      !{{ common_android.upload_androind_binary_size("", "")}}
+      - name: Display and upload binary build size statistics (Click Me)
+        # temporary hack: set CIRCLE_* vars, until we update
+        # tools/stats/print_test_stats.py to natively support GitHub Actions
+        env:
+          AWS_DEFAULT_REGION: us-east-1
+          SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
+          CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
+          CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
+          CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
+          CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
+          CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
+        run: |
+          COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
+          export COMMIT_TIME
+          pip3 install requests==2.26
+          python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
      - name: Test
-        # Time out the test phase after 3.5 hours
-        timeout-minutes: 210
        run: |
          # detached container should get cleaned up by teardown_ec2_linux
          export SHARD_NUMBER=0
@ -97,10 +108,10 @@ on:
            -e GITHUB_ACTIONS \
            -e IN_CI \
            -e SHARD_NUMBER \
-            -e NUM_TEST_SHARDS \
            -e JOB_BASE_NAME \
            -e MAX_JOBS="$(nproc --ignore=2)" \
            -e SCCACHE_BUCKET \
+            -e CONTINUE_THROUGH_ERROR \
            -e PR_LABELS \
            -e http_proxy="!{{ common.squid_proxy }}" -e https_proxy="!{{ common.squid_proxy }}" -e no_proxy="!{{ common.squid_no_proxy }}" \
            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
@ -121,7 +132,6 @@ on:
          # Ensure the working directory gets chowned back to the current user
          docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
      !{{ common.upload_test_reports(name='bazel') }}
-      !{{ common.upload_downloaded_files(name='bazel') }}
      !{{ common.upload_test_statistics(build_environment) }}
      !{{ common.teardown_ec2_linux() }}
 {%- endblock %}
--- a/.github/templates/common.yml.j2
+++ b/.github/templates/common.yml.j2
@ -4,7 +4,6 @@
 {%- set squid_proxy    = "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -%}
 {# squid_no_proxy is a list of common set of fixed domains or IPs that we don't need to proxy. See https://docs.aws.amazon.com/AmazonECS/latest/developerguide/http_proxy_config.html#windows-proxy #}
 {%- set squid_no_proxy = "localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" -%}
-{%- set timeout_minutes = 240 -%}

 {%- macro concurrency(build_environment) -%}
 concurrency:
@ -12,12 +11,6 @@ concurrency:
  cancel-in-progress: true
 {%- endmacro -%}

-{%- macro add_retry_to_env() -%}
-          retry () {
-              "$@"  || (sleep 1 && "$@") || (sleep 2 && "$@")
-          }
-{%- endmacro -%}
-
 {%- macro display_ec2_information() -%}
      - name: Display EC2 information
        shell: bash
@ -34,54 +27,32 @@ concurrency:
          echo "instance-type: $(get_ec2_metadata instance-type)"
 {%- endmacro -%}

-{%- macro parse_ref(pytorch_directory="") -%}
+{%- macro parse_ref() -%}
      - name: Parse ref
-{%- if pytorch_directory %}
-        working-directory: !{{ pytorch_directory }}
-{%- endif %}
        id: parse-ref
        run: .github/scripts/parse_ref.py
 {%- endmacro -%}

-{%- macro upload_test_statistics(build_environment, when="always()", pytorch_directory="") -%}
+{%- macro upload_test_statistics(build_environment) -%}
      - name: Display and upload test statistics (Click Me)
-{%- if pytorch_directory %}
-        working-directory: !{{ pytorch_directory }}
-{%- endif %}
-        if: !{{ when }}
+        if: always()
        # temporary hack: set CIRCLE_* vars, until we update
        # tools/stats/print_test_stats.py to natively support GitHub Actions
        env:
          AWS_DEFAULT_REGION: us-east-1
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
+          CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
          JOB_BASE_NAME: !{{ build_environment }}-test
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-          TAG: ${{ steps.parse-ref.outputs.tag }}
-          WORKFLOW_ID: '${{ github.run_id }}'
+          CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
+          CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
+          CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
+          CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
        shell: bash
        run: |
          python3 -m pip install -r requirements.txt
-          python3 -m pip install boto3==1.19.12
+          python3 -m pip install boto3==1.16.34
          python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
 {%- endmacro -%}

-{%- macro chown_dir(dir) -%}
-      - name: Chown artifacts
-        if: always()
-        run: |
-          # Ensure the working directory gets chowned back to the current user
-          docker run --rm -v "!{{ dir }}:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
-{%- endmacro -%}
-
-{%- macro setup_ec2_windows() -%}
-      !{{ display_ec2_information() }}
-      - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
-        uses: seemethere/add-github-ssh-key@v1
-        with:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-{%- endmacro -%}
-
 {%- macro setup_ec2_linux() -%}
      !{{ display_ec2_information() }}
      - name: Log in to ECR
@ -89,20 +60,23 @@ concurrency:
          AWS_RETRY_MODE: standard
          AWS_MAX_ATTEMPTS: 5
        run: |
-          AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
-          !{{ add_retry_to_env() }}
-          retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
-              --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
+          aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
+          bash /tmp/ecr-login.sh
+          rm /tmp/ecr-login.sh
      - name: Chown workspace
+        env:
+          ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
        run: |
-          !{{ add_retry_to_env() }}
-          retry docker pull "${ALPINE_IMAGE}"
+          retry () {
+              "$@"  || (sleep 1 && "$@") || (sleep 2 && "$@")
+          }
          # Ensure the working directory gets chowned back to the current user
+          retry docker pull "${ALPINE_IMAGE}"
          docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
      - name: Clean workspace
        run: |
-          rm -rf "${GITHUB_WORKSPACE}"
-          mkdir "${GITHUB_WORKSPACE}"
+          rm -rf "${GITHUB_WORKSPACE:?}/*"
+          rm -f ~/.ssh/authorized_keys
      - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
        uses: seemethere/add-github-ssh-key@v1
        with:
@ -112,55 +86,15 @@ concurrency:
          env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
 {%- endmacro -%}

-{%- macro setup_rocm_linux() -%}
-      - name: Clean workspace
-        run: |
-          rm -rf "${GITHUB_WORKSPACE}"
-          mkdir "${GITHUB_WORKSPACE}"
-      - name: Set DOCKER_HOST
-        run: echo "DOCKER_HOST=unix:///run/user/$(id -u)/docker.sock" >> "${GITHUB_ENV}"
-      - name: Runner health check system info
-        if: always()
-        run: |
-          cat /etc/os-release || true
-          cat /etc/apt/sources.list.d/rocm.list || true
-          cat /opt/rocm/.info/version || true
-          whoami
-      - name: Runner health check rocm-smi
-        if: always()
-        run: |
-          rocm-smi
-      - name: Runner health check rocminfo
-        if: always()
-        run: |
-          rocminfo
-      - name: Runner health check GPU count
-        if: always()
-        run: |
-          ngpu=$(rocminfo | grep -c -E 'Name:.*\sgfx')
-          if [[ "x$ngpu" != "x2" && "x$ngpu" != "x4" ]]; then
-              echo "Failed to detect GPUs on the runner"
-              exit 1
-          fi
-      - name: Runner health check disconnect on failure
-        if: ${{ failure() }}
-        run: |
-          killall runsvc.sh
-      - name: Preserve github env variables for use in docker
-        run: |
-          env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
-{%- endmacro -%}
-
-{%- macro teardown_ec2_linux(pytorch_directory="") -%}
+{%- macro teardown_ec2_linux() -%}
      - name: Hold runner for 2 hours or until ssh sessions have drained
-{%- if pytorch_directory %}
-        working-directory: !{{ pytorch_directory }}
-{%- endif %}
        # Always hold for active ssh sessions
        if: always()
        run: .github/scripts/wait_for_ssh_to_drain.sh
      - name: Chown workspace
        if: always()
+        env:
+          ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
        run: |
          # Ensure the working directory gets chowned back to the current user
          docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
@ -174,87 +108,21 @@ concurrency:
          docker system prune -af
 {%- endmacro -%}

-{%- macro teardown_rocm_linux() -%}
-      - name: Kill containers, clean up images
-        if: always()
-        run: |
-          # ignore expansion of "docker ps -q" since it could be empty
-          # shellcheck disable=SC2046
-          docker stop $(docker ps -q) || true
-          # Prune all of the docker images
-          docker system prune -af
-{%- endmacro -%}
-
-{%- macro checkout(submodules="recursive", deep_clone=True, directory="", repository="pytorch/pytorch", branch="") -%}
-      - name: Checkout !{{ 'PyTorch' if repository == "pytorch/pytorch" else repository }}
+{%- macro checkout_pytorch(submodules) -%}
+      - name: Checkout PyTorch
        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
        with:
-      {%- if deep_clone %}
          # deep clone, to allow use of git merge-base
          fetch-depth: 0
-      {%- endif %}
          submodules: !{{ submodules }}
-      {%- if repository != "pytorch/pytorch" %}
-          repository: !{{ repository }}
-      {%- endif %}
-      {%- if branch %}
-          ref: !{{ branch }}
-      {%- endif %}
-      {%- if directory %}
-          path: !{{ directory }}
-      {%- endif %}
-      - name: Clean !{{ 'PyTorch' if repository == "pytorch/pytorch" else repository }} checkout
-        run: |
-          # Remove any artifacts from the previous checkouts
-          git clean -fxd
-      {%- if directory%}
-        working-directory: !{{ directory }}
-      {%- endif %}
 {%- endmacro -%}

-{%- macro upload_downloaded_files(name, artifact_name="", use_s3=True, when="always()") -%}
-      - name: Zip JSONs for upload
-        if: !{{ when }}
-        env:
-{%- if name == 'linux' or name == 'windows' or name == 'macos' %}
-          FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
-{%- else %}
-          FILE_SUFFIX: '!{{ name }}-${{ github.job }}'
-{%- endif %}
-{%- if name == 'windows' %}
-        shell: powershell
-        run: |
-          # -ir => recursive include all files in pattern
-          7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json'
-{%- else %}
-        run: |
-          # Remove any previous test jsons if they exist
-          rm -f test-jsons-*.zip
-          zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
-{%- endif %}
-{%- if use_s3 %}
-      - uses: !{{ upload_artifact_s3_action }}
-        name: Store Test Downloaded JSONs on S3
-{%- else %}
-      - uses: actions/upload-artifact@v2
-        name: Store Test Downloaded JSONs on Github
-{%- endif %}
-        if: !{{ when }}
-        with:
-{%- if artifact_name != "" %}
-          name: !{{ artifact_name }}
-{%- endif %}
-          retention-days: 14
-          if-no-files-found: warn
-          path:
-            test-jsons-*.zip
-{%- endmacro -%}

-{%- macro upload_test_reports(name, artifact_name="", use_s3=True) -%}
+{%- macro upload_test_reports(name) -%}
      - name: Zip test reports for upload
        if: always()
        env:
-{%- if name == 'linux' or name == 'windows' or name == 'macos' %}
+{%- if name == 'linux' or name == 'windows' %}
          FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
 {%- else %}
          FILE_SUFFIX: '!{{ name }}-${{ github.job }}'
@ -270,22 +138,35 @@ concurrency:
          rm -f test-reports-*.zip
          zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
 {%- endif %}
-{%- if use_s3 %}
-      - uses: !{{ upload_artifact_s3_action }}
-        name: Store Test Reports on S3
-{%- else %}
      - uses: actions/upload-artifact@v2
-        name: Store Test Reports on Github
-{%- endif %}
+        name: Store Test Reports
        if: always()
        with:
-{%- if artifact_name != "" %}
-          name: !{{ artifact_name }}
+{%- if name == 'linux' or name == 'windows' %}
+          name: test-reports-${{ matrix.config }}
+{%- else %}
+          name: test-reports-!{{ name }}
 {%- endif %}
          retention-days: 14
          if-no-files-found: error
          path:
+{%- if name == 'windows' %}
+            pytorch-${{ github.run_id }}/test-reports-*.zip
+{%- else %}
            test-reports-*.zip
+{%- endif %}
+      - uses: !{{ upload_artifact_s3_action }}
+        name: Store Test Reports on S3
+        if: always()
+        with:
+          retention-days: 14
+          if-no-files-found: error
+          path:
+{%- if name == 'windows' %}
+            pytorch-${{ github.run_id }}/test-reports-*.zip
+{%- else %}
+            test-reports-*.zip
+{%- endif %}
 {%- endmacro -%}

 {%- macro render_test_results() -%}
@ -303,91 +184,3 @@ concurrency:
        run: |
          python3 tools/render_junit.py test/
 {%- endmacro -%}
-
-{%- macro calculate_docker_image(always_rebuild) -%}
-      - name: Calculate docker image tag
-        id: calculate-tag
-        run: |
-          DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
-          echo "DOCKER_TAG=${DOCKER_TAG}" >> "${GITHUB_ENV}"
-          echo "DOCKER_IMAGE=${DOCKER_IMAGE_BASE}:${DOCKER_TAG}" >> "${GITHUB_ENV}"
-          echo "::set-output name=docker_tag::${DOCKER_TAG}"
-          echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
-      - name: Check if image should be built
-        id: check
-        env:
-          BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
-        run: |
-          set -x
-{%- if not always_rebuild %}
-          # Check if image already exists, if it does then skip building it
-          if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
-            exit 0
-          fi
-          if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
-            # if we're on the base branch then use the parent commit
-            MERGE_BASE=$(git rev-parse HEAD~)
-          else
-            # otherwise we're on a PR, so use the most recent base commit
-            MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
-          fi
-          # Covers the case where a previous tag doesn't exist for the tree
-          # this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
-          if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
-            echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
-            exit 1
-          fi
-          PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
-          # If no image exists but the hash is the same as the previous hash then we should error out here
-          if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
-            echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
-            echo "       contact the PyTorch team to restore the original images"
-            exit 1
-          fi
-{%- endif %}
-          echo ::set-output name=rebuild::yes
-      - name: Build and push docker image
-        if: ${{ steps.check.outputs.rebuild }}
-        env:
-          DOCKER_SKIP_S3_UPLOAD: 1
-        working-directory: .circleci/docker
-        run: |
-          export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
-          ./build_docker.sh
-{%- endmacro -%}
-
-{%- macro setup_miniconda(python_version) -%}
-      - name: Setup miniconda
-        uses: conda-incubator/setup-miniconda@v2
-        with:
-          auto-update-conda: true
-          python-version: !{{ python_version }}
-          activate-environment: build
-{%- endmacro -%}
-
-{%- macro set_xcode_version(xcode_version) -%}
-{%- if xcode_version != '' %}
-  # Set xcode xcode version to !{{ xcode_version }}
-  DEVELOPER_DIR: /Applications/Xcode_!{{ xcode_version }}.app/Contents/Developer
-{%- endif %}
-{%- endmacro -%}
-
-{%- macro wait_and_kill_ssh_windows(pytorch_directory="") -%}
-      - name: Wait until all sessions have drained
-        shell: powershell
-{%- if pytorch_directory %}
-        working-directory: !{{ pytorch_directory }}
-{%- endif %}
-        if: always()
-        timeout-minutes: 120
-        run: |
-          .github\scripts\wait_for_ssh_to_drain.ps1
-      - name: Kill active ssh sessions if still around (Useful if workflow was cancelled)
-        shell: powershell
-{%- if pytorch_directory %}
-        working-directory: !{{ pytorch_directory }}
-{%- endif %}
-        if: always()
-        run: |
-          .github\scripts\kill_active_ssh_sessions.ps1
-{%- endmacro -%}
--- a/.github/templates/common_android.yml.j2
+++ b/.github/templates/common_android.yml.j2
@ -1,81 +0,0 @@
-{% import 'common.yml.j2' as common %}
-
-{%- macro upload_androind_binary_size(build_type, artifacts) -%}
-      - name: Display and upload binary build size statistics (Click Me)
-        # temporary hack: set CIRCLE_* vars, until we update
-        # tools/stats/print_test_stats.py to natively support GitHub Actions
-        env:
-          AWS_DEFAULT_REGION: us-east-1
-          SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-          TAG: ${{ steps.parse-ref.outputs.tag }}
-          WORKFLOW_ID: '${{ github.run_id }}'
-        run: |
-          # The artifact file is created inside docker container, which contains the result binaries.
-          # Now unpackage it into the project folder. The subsequent script will scan project folder
-          # to locate result binaries and report their sizes.
-          # If artifact file is not provided it assumes that the project folder has been mounted in
-          # the docker during build and already contains the result binaries, so this step can be skipped.
-          export ARTIFACTS=!{{ artifacts }}
-          if [ -n "${ARTIFACTS}" ]; then
-            tar xf "${ARTIFACTS}" -C "${GITHUB_WORKSPACE}"
-            cd "${GITHUB_WORKSPACE}"
-          fi
-          COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
-          export COMMIT_TIME
-          ANDROID_BUILD_TYPE=!{{ build_type}}
-          export ANDROID_BUILD_TYPE
-          pip3 install requests==2.26 boto3==1.16.34
-          python3 -m tools.stats.upload_binary_size_to_scuba "android" || exit 0
-{%- endmacro -%}
-
-{%- macro build_android(env_name, container_suffix) -%}
-      - name: Build-!{{ container_suffix }}
-        env:
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
-        run: |
-          # detached container should get cleaned up by teardown_ec2_linux
-          #!/bin/bash -eo pipefail
-          # Pull Docker image and run build
-          time docker pull "${DOCKER_IMAGE}" >/dev/null
-          echo "${DOCKER_IMAGE}"
-          export container_name
-          container_name=$(docker run \
-            -e BUILD_ENVIRONMENT=!{{ env_name }} \
-            -e JOB_BASE_NAME \
-            -e MAX_JOBS="$(nproc --ignore=2)" \
-            -e AWS_DEFAULT_REGION \
-            -e IS_GHA \
-            -e PR_NUMBER \
-            -e SHA1 \
-            -e BRANCH \
-            -e GITHUB_RUN_ID \
-            -e SCCACHE_BUCKET \
-            -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-            -e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-            -e SKIP_SCCACHE_INITIALIZATION=1 \
-            -e TORCH_CUDA_ARCH_LIST \
-            -e PR_LABELS \
-            -e http_proxy="!{{ common.squid_proxy }}" -e https_proxy="!{{ common.squid_proxy }}" -e no_proxy="!{{ common.squid_no_proxy }}" \
-            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
-            --security-opt seccomp=unconfined \
-            --cap-add=SYS_PTRACE \
-            --tty \
-            --detach \
-            --user jenkins \
-            -w /var/lib/jenkins/workspace \
-            "${DOCKER_IMAGE}"
-          )
-          git submodule sync && git submodule update -q --init --recursive --depth 1 --jobs 0
-          docker cp "${GITHUB_WORKSPACE}/." "${container_name}:/var/lib/jenkins/workspace"
-          # shellcheck disable=SC1105
-          ((echo "sudo chown -R jenkins . && .jenkins/pytorch/build.sh && find ${BUILD_ROOT} -type f -name "*.a" -or -name "*.o" -delete") | docker exec -u jenkins -i "${container_name}" bash) 2>&1
-
-          # Copy dist folder back
-          export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}-!{{ container_suffix }}
-          docker cp "${container_name}:/var/lib/jenkins/workspace/dist" "${GITHUB_WORKSPACE}/." || echo "Dist folder not found"
-          docker commit "${container_name}" "${COMMIT_DOCKER_IMAGE}"
-          time docker push "${COMMIT_DOCKER_IMAGE}"
-{%- endmacro -%}
--- a/.github/templates/docker_builds_ci_workflow.yml.j2
+++ b/.github/templates/docker_builds_ci_workflow.yml.j2
@ -1,60 +0,0 @@
-{% import 'common.yml.j2' as common %}
-
-{%- block name -%}
-# Template is at:    .github/templates/docker_builds_ci_workflow.yml.j2
-# Generation script: .github/scripts/generate_ci_workflows.py
-name: !{{ build_environment }}
-{%- endblock %}
-
-on:
-  workflow_dispatch:
-  pull_request:
-    types: [opened, synchronize, reopened]
-    paths:
-      - '.circleci/docker/**'
-      - '.github/workflows/generated-docker-builds.yml'
-{%- if is_scheduled %}
-  schedule:
-    - cron: !{{ is_scheduled }}
-{%- endif %}
-!{{ common.concurrency(build_environment) }}
-
-env:
-  ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
-  AWS_DEFAULT_REGION: us-east-1
-
-jobs:
-{% block docker_build +%}
-  docker-build:
-    runs-on: linux.2xlarge
-    timeout-minutes: !{{ common.timeout_minutes }}
-    strategy:
-      matrix:
-        include:
-          {%- for docker_image in docker_images %}
-            - docker_image_base: '!{{ docker_image }}'
-              docker_image_short_name: '!{{ docker_image.split('/')[-1] }}'
-          {%- endfor %}
-    env:
-      DOCKER_IMAGE_BASE: '${{ matrix.docker_image_base }}'
-    name: docker-build (${{ matrix.docker_image_short_name }})
-    steps:
-      !{{ common.setup_ec2_linux() }}
-      !{{ common.checkout() }}
-      !{{ common.calculate_docker_image(true) }}
-      - name: Pull Docker image
-        run: |
-          !{{ common.add_retry_to_env() }}
-          retry docker pull "${DOCKER_IMAGE}"
-      !{{ common.parse_ref() }}
-      !{{ common.teardown_ec2_linux() }}
-      - name: Hold runner for 2 hours or until ssh sessions have drained
-        # Always hold for active ssh sessions
-        if: always()
-        run: .github/scripts/wait_for_ssh_to_drain.sh
-      - name: Clean up docker images
-        if: always()
-        run: |
-          # Prune all of the docker images
-          docker system prune -af
-{%- endblock %}
--- a/.github/templates/ios_ci_workflow.yml.j2
+++ b/.github/templates/ios_ci_workflow.yml.j2
@ -1,183 +0,0 @@
-{% import 'common.yml.j2' as common %}
-
-{%- block name -%}
-# Template is at:    .github/templates/ios_ci_workflow.yml.j2
-# Generation script: .github/scripts/generate_ci_workflows.py
-name: !{{ build_environment }}
-{%- endblock %}
-
-on:
-{%- if is_default %}
-  pull_request:
-{%- endif -%}
-
-{%- if is_scheduled %}
-  schedule:
-    - cron: !{{ is_scheduled }}
-{%- else %}
-  push:
-    branches:
-      - master
-      - release/*
-{%- endif %}
-{%- for label in ciflow_config.labels | sort %}
-  {%- if loop.first %}
-    tags:
-  {%- endif %}
-  {%- if label != "ciflow/default" %}
-      - '!{{ label }}/*'
-  {%- endif %}
-{%- endfor %}
-  workflow_dispatch:
-
-env:
-  BUILD_ENVIRONMENT: !{{ build_environment }}
-  IN_CI: 1
-  IS_GHA: 1
-  IOS_PLATFORM: !{{ ios_platform }}
-  IOS_ARCH: !{{ ios_arch }}
-!{{ common.set_xcode_version(xcode_version) }}
-
-jobs:
-{% block build +%}
-  build:
-    # NOTE: These builds will not run successfully without running on `pytorch/pytorch` due to the limitations
-    #       of accessing secrets from forked pull requests and IOS' dependency on secrets for their build/test
-    if: ${{ github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }}
-    runs-on: macos-10.15
-    timeout-minutes: !{{ common.timeout_minutes }}
-    env:
-      JOB_BASE_NAME: !{{ build_environment }}-build
-      IOS_CERT_KEY_2022: ${{ secrets.IOS_CERT_KEY_2022 }}
-      IOS_CERT_SECRET: ${{ secrets.IOS_CERT_SECRET }}
-      IOS_DEV_TEAM_ID: ${{ secrets.IOS_DEV_TEAM_ID }}
-      IOS_SIGN_KEY_2022: ${{ secrets.IOS_SIGN_KEY_2022 }}
-      PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
-    steps:
-      - name: print labels
-        run: echo "${PR_LABELS}"
-      !{{ common.checkout() }}
-      - name: Populate CI build options
-        run: |
-          # Most builds use the lite interpreter, if certain builds shouldn't
-          # build the lite interpreter this env variable should get over-written
-          # in the following case statement
-          echo "BUILD_LITE_INTERPRETER=1" >> "${GITHUB_ENV}"
-
-          case ${BUILD_ENVIRONMENT} in
-            *metal*)
-              echo "USE_PYTORCH_METAL=1" >> "${GITHUB_ENV}"
-              ;;
-            *full_jit*)
-              echo "BUILD_LITE_INTERPRETER=0" >> "${GITHUB_ENV}"
-              ;;
-            *custom*)
-              echo "SELECTED_OP_LIST=${GITHUB_WORKSPACE}/ios/TestApp/custom_build/mobilenetv2.yaml" >> "${GITHUB_ENV}"
-              ;;
-            *coreml*)
-              echo "USE_COREML_DELEGATE=1" >> "${GITHUB_ENV}"
-              ;;
-          esac
-      - name: Install brew dependencies
-        run: |
-          # Install dependencies
-          brew install libtool
-      - name: Install conda and dependencies
-        run: |
-          # Install conda, setup-miniconda messes with the path that messes with the ruby stuff we do later on
-          curl --retry 3 -o "${RUNNER_TEMP}/conda.sh" https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
-          chmod +x "${RUNNER_TEMP}/conda.sh"
-          /bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda"
-          echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}"
-          # shellcheck disable=SC1091
-          source "${RUNNER_TEMP}/anaconda/bin/activate"
-          conda install -y \
-            cffi \
-            cmake \
-            mkl \
-            mkl-include \
-            ninja \
-            numpy \
-            pyyaml \
-            requests \
-            setuptools \
-            typing_extensions
-      - name: Run Fastlane
-        run: |
-          set -x
-          cd ios/TestApp
-          # install fastlane
-          sudo gem install bundler && bundle install
-          # install certificates
-          echo "${IOS_CERT_KEY_2022}" >> cert.txt
-          base64 --decode cert.txt -o Certificates.p12
-          rm cert.txt
-          bundle exec fastlane install_root_cert
-          bundle exec fastlane install_dev_cert
-          # install the provisioning profile
-          PROFILE=PyTorch_CI_2022.mobileprovision
-          PROVISIONING_PROFILES=~/Library/MobileDevice/Provisioning\ Profiles
-          mkdir -pv "${PROVISIONING_PROFILES}"
-          cd "${PROVISIONING_PROFILES}"
-          echo "${IOS_SIGN_KEY_2022}" >> cert.txt
-          base64 --decode cert.txt -o ${PROFILE}
-          rm cert.txt
-      - name: Build
-        run: |
-          # shellcheck disable=SC1091
-          source "${RUNNER_TEMP}/anaconda/bin/activate"
-          export TCLLIBPATH="/usr/local/lib"
-          python -VV
-          export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname "$(which conda)")/../"}
-          scripts/build_ios.sh
-      - name: Run Build Test
-        run: |
-          PROFILE=PyTorch_CI_2022
-          # run the ruby build script
-          if ! [ -x "$(command -v xcodebuild)" ]; then
-            echo 'Error: xcodebuild is not installed.'
-            exit 1
-          fi
-          if [ "${IOS_PLATFORM}" != "SIMULATOR" ]; then
-            ruby scripts/xcode_build.rb -i build_ios/install -x ios/TestApp/TestApp.xcodeproj -p "${IOS_PLATFORM}" -c "${PROFILE}" -t "${IOS_DEV_TEAM_ID}"
-          else
-            ruby scripts/xcode_build.rb -i build_ios/install -x ios/TestApp/TestApp.xcodeproj -p "${IOS_PLATFORM}"
-          fi
-{%- if ios_platform == "SIMULATOR" %}
-      - name: Run Simulator Tests
-        run: |
-          # shellcheck disable=SC1091
-          source "${RUNNER_TEMP}/anaconda/bin/activate"
-          pip3 install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
-          # generate models for differnet backends
-          cd "${GITHUB_WORKSPACE}/ios/TestApp/benchmark"
-          mkdir -p ../models
-          if [ "${USE_COREML_DELEGATE}" == 1 ]; then
-            pip install coremltools==5.0b5
-            pip install six==1.16.0
-            python coreml_backend.py
-          else
-            python trace_model.py
-          fi
-          if [ "${BUILD_LITE_INTERPRETER}" == 1 ]; then
-            echo "Setting up the TestApp for LiteInterpreter"
-            ruby setup.rb --lite 1
-          else
-            echo "Setting up the TestApp for Full JIT"
-            ruby setup.rb
-          fi
-          cd "${GITHUB_WORKSPACE}/ios/TestApp"
-          instruments -s -devices
-          if [ "${BUILD_LITE_INTERPRETER}" == 1 ]; then
-            if [ "${USE_COREML_DELEGATE}" == 1 ]; then
-              fastlane scan --only_testing TestAppTests/TestAppTests/testCoreML
-            else
-              fastlane scan --only_testing TestAppTests/TestAppTests/testLiteInterpreter
-            fi
-          else
-            fastlane scan --only_testing TestAppTests/TestAppTests/testFullJIT
-          fi
-{%- endif -%}
-{% endblock +%}
-
-!{{ common.concurrency(build_environment) }}
--- a/.github/templates/linux_binary_build_workflow.yml.j2
+++ b/.github/templates/linux_binary_build_workflow.yml.j2
@ -1,245 +0,0 @@
-{% import 'common.yml.j2' as common %}
-
-{%- block name -%}
-# Template is at:    .github/templates/linux_binary_build_workflow.yml.j2
-# Generation script: .github/scripts/generate_ci_workflows.py
-name: !{{ build_environment }}
-{%- endblock %}
-
-{%- macro binary_env(config) -%}
-    env:
-      PACKAGE_TYPE: !{{ config["package_type"] }}
-      # TODO: This is a legacy variable that we eventually want to get rid of in
-      #       favor of GPU_ARCH_VERSION
-      DESIRED_CUDA: !{{ config["desired_cuda"] }}
-{%- if config["gpu_arch_version"] %}
-      GPU_ARCH_VERSION: !{{ config["gpu_arch_version"] }}
-{%- endif %}
-      GPU_ARCH_TYPE: !{{ config["gpu_arch_type"] }}
-      DOCKER_IMAGE: !{{ config["container_image"] }}
-      SKIP_ALL_TESTS: 1
-{%- if config["package_type"] == "libtorch" %}
-      LIBTORCH_VARIANT: !{{ config["libtorch_variant"] }}
-      DESIRED_DEVTOOLSET: !{{ config["devtoolset"] }}
-{%- else %}
-      DESIRED_PYTHON: "!{{ config["python_version"] }}"
-{%- endif %}
-{%- endmacro %}
-
-on:
-  push:
-    # NOTE: Meta Employees can trigger new nightlies using: https://fburl.com/trigger_pytorch_nightly_build
-    branches:
-      - nightly
-    tags:
-      # NOTE: Binary build pipelines should only get triggered on release candidate builds
-      # Release candidate tags look like: v1.11.0-rc1
-      - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
-{%- for label in ciflow_config.labels | sort %}
-  {%- if label != "ciflow/default" %}
-      - '!{{ label }}/*'
-  {%- endif %}
-{%- endfor %}
-  workflow_dispatch:
-
-env:
-  # Needed for conda builds
-  ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
-  ANACONDA_USER: pytorch
-  AWS_DEFAULT_REGION: us-east-1
-  BINARY_ENV_FILE: /tmp/env
-  BUILD_ENVIRONMENT: !{{ build_environment }}
-  BUILDER_ROOT: /builder
-  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-  IN_CI: 1
-  IS_GHA: 1
-  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
-  PR_NUMBER: ${{ github.event.pull_request.number }}
-  PYTORCH_FINAL_PACKAGE_DIR: /artifacts
-  PYTORCH_RETRY_TEST_CASES: 1
-  PYTORCH_ROOT: /pytorch
-  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-  SKIP_ALL_TESTS: 1
-!{{ common.concurrency(build_environment) }}
-
-jobs:
-{%- for config in build_configs %}
-  !{{ config["build_name"] }}-build:
-    if: ${{ github.repository_owner == 'pytorch' }}
-    runs-on: linux.4xlarge
-    timeout-minutes: !{{ common.timeout_minutes }}
-    !{{ binary_env(config) }}
-    steps:
-      !{{ common.setup_ec2_linux() }}
-      !{{ common.checkout(deep_clone=False, directory="pytorch") }}
-      !{{ common.checkout(deep_clone=False, directory="builder", repository="pytorch/builder", branch="release/1.11") }}
-{%- if config["gpu_arch_type"] == 'cuda' and config["gpu_arch_version"].startswith('11') %}
-      - name: Set BUILD_SPLIT_CUDA
-        run: |
-          echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV"
-{%- endif %}
-      - name: Pull Docker image
-        run: |
-          !{{ common.add_retry_to_env() }}
-          retry docker pull "${DOCKER_IMAGE}"
-      - name: Build PyTorch binary
-        run: |
-          set -x
-          mkdir -p artifacts/
-          container_name=$(docker run \
-            -e BINARY_ENV_FILE \
-            -e BUILDER_ROOT \
-            -e BUILD_ENVIRONMENT \
-            -e BUILD_SPLIT_CUDA \
-            -e DESIRED_CUDA \
-            -e DESIRED_DEVTOOLSET \
-            -e DESIRED_PYTHON \
-            -e GPU_ARCH_TYPE \
-            -e GPU_ARCH_VERSION \
-            -e IS_GHA \
-            -e LIBTORCH_VARIANT \
-            -e PACKAGE_TYPE \
-            -e PYTORCH_FINAL_PACKAGE_DIR \
-            -e PYTORCH_ROOT \
-            -e SKIP_ALL_TESTS \
-            --tty \
-            --detach \
-            -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
-            -v "${GITHUB_WORKSPACE}/builder:/builder" \
-            -v "${RUNNER_TEMP}/artifacts:/artifacts" \
-            -w / \
-            "${DOCKER_IMAGE}"
-          )
-          docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
-          docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/!{{ config["package_type"] }}/build.sh"
-      !{{ common.chown_dir("${RUNNER_TEMP}/artifacts") }}
-      - uses: !{{ common.upload_artifact_s3_action }}
-        with:
-          name: !{{ config["build_name"] }}
-          retention-days: 14
-          if-no-files-found: error
-          path:
-            ${{ runner.temp }}/artifacts/*
-      !{{ common.teardown_ec2_linux("pytorch/") }}
-  !{{ config["build_name"] }}-test:  # Testing
-    if: ${{ github.repository_owner == 'pytorch' }}
-    needs: !{{ config["build_name"] }}-build
-{%- if config["gpu_arch_type"] == "cuda" %}
-    runs-on: linux.4xlarge.nvidia.gpu
-{%- else %}
-    runs-on: linux.4xlarge
-{%- endif %}
-    timeout-minutes: !{{ common.timeout_minutes }}
-    !{{ binary_env(config) }}
-    steps:
-      !{{ common.setup_ec2_linux() }}
-      - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
-        name: Download Build Artifacts
-        with:
-          name: !{{ config["build_name"] }}
-          path: "${{ runner.temp }}/artifacts/"
-      - name: Clone pytorch/pytorch
-        uses: actions/checkout@v2
-        with:
-          path: pytorch
-          submodules: recursive
-      - name: Clone pytorch/builder
-        uses: actions/checkout@v2
-        with:
-          repository: pytorch/builder
-          path: builder
-{%- if config["gpu_arch_type"] == "cuda" %}
-      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
-        working-directory: pytorch/
-        run: |
-          bash .github/scripts/install_nvidia_utils_linux.sh
-          echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
-{%- endif %}
-      - name: Pull Docker image
-        run: |
-          !{{ common.add_retry_to_env() }}
-          retry docker pull "${DOCKER_IMAGE}"
-      - name: Test PyTorch binary
-        run: |
-          set -x
-          # shellcheck disable=SC2086,SC2090
-          container_name=$(docker run \
-            ${GPU_FLAG:-} \
-            -e BINARY_ENV_FILE \
-            -e BUILDER_ROOT \
-            -e BUILD_ENVIRONMENT \
-            -e BUILD_SPLIT_CUDA \
-            -e DESIRED_CUDA \
-            -e DESIRED_DEVTOOLSET \
-            -e DESIRED_PYTHON \
-            -e GPU_ARCH_TYPE \
-            -e GPU_ARCH_VERSION \
-            -e IS_GHA \
-            -e LIBTORCH_VARIANT \
-            -e PACKAGE_TYPE \
-            -e PYTORCH_FINAL_PACKAGE_DIR \
-            -e PYTORCH_ROOT \
-            -e SKIP_ALL_TESTS \
-            --tty \
-            --detach \
-            -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
-            -v "${GITHUB_WORKSPACE}/builder:/builder" \
-            -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
-            -w / \
-            "${DOCKER_IMAGE}"
-          )
-          docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
-          # Generate test script
-          docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
-          docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
-      !{{ common.teardown_ec2_linux("pytorch/") }}
-  !{{ config["build_name"] }}-upload:  # Uploading
-    runs-on: linux.2xlarge  # self hosted runner to download ec2 artifacts
-    if: ${{ github.repository_owner == 'pytorch' }}
-    needs: !{{ config["build_name"] }}-test
-    !{{ binary_env(config) }}
-    steps:
-      !{{ common.setup_ec2_linux() }}
-      - name: Clone pytorch/pytorch
-        uses: actions/checkout@v2
-      - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
-        name: Download Build Artifacts
-        with:
-          name: !{{ config["build_name"] }}
-          path: "${{ runner.temp }}/artifacts/"
-      - name: Set DRY_RUN (only for tagged pushes)
-        if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/nightly' || startsWith(github.event.ref, 'refs/tags/'))}}
-        run: |
-          echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
-      - name: Set UPLOAD_CHANNEL (only for tagged pushes)
-        if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}}
-        run: |
-          # reference ends with an RC suffix
-          if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
-            echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
-          fi
-      - name: Upload binaries
-        env:
-          PKG_DIR: "${{ runner.temp }}/artifacts"
-          UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
-          # When running these on pull_request events these should be blank
-          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
-          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
-          ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
-        run: |
-          docker run --rm -i \
-            -e ANACONDA_API_TOKEN \
-            -e AWS_ACCESS_KEY_ID \
-            -e AWS_SECRET_ACCESS_KEY \
-            -e DRY_RUN \
-            -e PACKAGE_TYPE \
-            -e PKG_DIR=/artifacts \
-            -e UPLOAD_CHANNEL \
-            -e UPLOAD_SUBFOLDER \
-            -v "${RUNNER_TEMP}/artifacts:/artifacts" \
-            -v "${GITHUB_WORKSPACE}:/v" \
-            -w /v \
-            308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
-            bash -c '.circleci/scripts/binary_upload.sh'
-      !{{ common.teardown_ec2_linux() }}
-{%- endfor %}
--- a/.github/templates/linux_ci_workflow.yml.j2
+++ b/.github/templates/linux_ci_workflow.yml.j2
@ -7,32 +7,27 @@ name: !{{ build_environment }}
 {%- endblock %}

 on:
-{%- if is_default %}
+{%- if on_pull_request %}
  pull_request:
+  {%- if ciflow_config.enabled %}
+    {%- if ciflow_config.trigger_action_only %}
+    types: [!{{ ciflow_config.trigger_action }}]
+    {%- else %}
+    types: [opened, synchronize, reopened, !{{ ciflow_config.trigger_action }}]
+    {%- endif %}
+  {%- endif %}
+{%- else %}
+  # TODO: Enable pull_request builds when we can verify capacity can be met by auto-scalers
 {%- endif %}
+
+{%- if is_scheduled %}
+  schedule:
+    - cron: !{{ is_scheduled }}
+{%- else %}
  push:
-{%- if enable_doc_jobs and is_scheduled %}
-    tags:
-      # NOTE: Binary build pipelines should only get triggered on release candidate builds
-      # Release candidate tags look like: v1.11.0-rc1
-      - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
-{%- endif %}
-{%- for label in ciflow_config.labels | sort %}
-  {%- if loop.first and not (enable_doc_jobs  and is_scheduled) %}
-    tags:
-  {%- endif %}
-  {%- if label != "ciflow/default" %}
-      - '!{{ label }}/*'
-  {%- endif %}
-{%- endfor %}
-{%- if not is_scheduled and not only_on_pr %}
    branches:
      - master
      - release/*
-{%- endif %}
-{%- if is_scheduled and not only_on_pr %}
-  schedule:
-    - cron: !{{ is_scheduled }}
 {%- endif %}
  workflow_dispatch:

@ -43,7 +38,6 @@ env:
  XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
  TORCH_CUDA_ARCH_LIST: 5.2
  IN_CI: 1
-  IS_GHA: 1
  # This is used for the phase of adding wheel tests only, will be removed once completed
  IN_WHEEL_TEST: 1
  # Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
@ -51,71 +45,102 @@ env:
  ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-  AWS_DEFAULT_REGION: us-east-1
-  PR_NUMBER: ${{ github.event.pull_request.number }}
-  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-  PYTORCH_RETRY_TEST_CASES: 1
-{%- if enable_xla_test == 1 %}
-  # This is used for XLA tests only
-  XLA_CUDA: 0
-  XLA_IMAGE_TAG: v0.2
-{%- endif %}
-{%- if build_with_debug %}
-  DEBUG: 1
-{%- endif %}
+
 !{{ common.concurrency(build_environment) }}

 jobs:
-{% block build +%}
-  build:
-    runs-on: linux.2xlarge
-    timeout-minutes: !{{ common.timeout_minutes }}
+{%- if ciflow_config.enabled %}
+  !{{ ciflow_config.root_job_name }}:
+    runs-on: ubuntu-18.04
+    if: ${{ !{{ ciflow_config.root_job_condition }} }}
    env:
-      JOB_BASE_NAME: !{{ build_environment }}-build
+      LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
+    steps:
+      - name: noop
+        run: echo running !{{ ciflow_config.root_job_name }}
+      - name: print labels
+        run: echo "${LABELS}"
+{%- endif %}
+  calculate-docker-image:
+    runs-on: linux.2xlarge
+    {%- if ciflow_config.enabled %}
+    needs: [!{{ ciflow_config.root_job_name }}]
+    {%- endif %}
+    env:
+      DOCKER_BUILDKIT: 1
+    timeout-minutes: 90
    outputs:
      docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
    steps:
-      - name: print labels
-        run: echo "${PR_LABELS}"
      !{{ common.setup_ec2_linux() }}
-      !{{ common.checkout() }}
-    {%- if enable_xla_test == 1 %}
+      !{{ common.checkout_pytorch("false") }}
      - name: Calculate docker image tag
        id: calculate-tag
        run: |
-          echo "XLA workflow uses pre-built test image at ${XLA_IMAGE_TAG}"
          DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
-          echo "DOCKER_TAG=${DOCKER_TAG}" >> "${GITHUB_ENV}"
-          echo "DOCKER_IMAGE=${DOCKER_IMAGE_BASE}:${XLA_IMAGE_TAG}" >> "${GITHUB_ENV}"
          echo "::set-output name=docker_tag::${DOCKER_TAG}"
-          echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${XLA_IMAGE_TAG}"
-    {%- else %}
-      !{{ common.calculate_docker_image(false) }}
-    {%- endif %}
-      - name: Pull Docker image
-        run: |
-          !{{ common.add_retry_to_env() }}
-          retry docker pull "${DOCKER_IMAGE}"
-      !{{ common.parse_ref() }}
-      - name: Build
+          echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
+      - name: Check if image should be built
+        id: check
        env:
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
+          DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
+          BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
+        run: |
+          set -x
+          # Check if image already exists, if it does then skip building it
+          if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
+            exit 0
+          fi
+          if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
+            # if we're on the base branch then use the parent commit
+            MERGE_BASE=$(git rev-parse HEAD~)
+          else
+            # otherwise we're on a PR, so use the most recent base commit
+            MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
+          fi
+          # Covers the case where a previous tag doesn't exist for the tree
+          # this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
+          if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
+            echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
+            exit 1
+          fi
+          PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
+          # If no image exists but the hash is the same as the previous hash then we should error out here
+          if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
+            echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
+            echo "       contact the PyTorch team to restore the original images"
+            exit 1
+          fi
+          echo ::set-output name=rebuild::yes
+      - name: Build and push docker image
+        if: ${{ steps.check.outputs.rebuild }}
+        env:
+          DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
+          DOCKER_SKIP_S3_UPLOAD: 1
+        run: |
+          export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
+          cd .circleci/docker && ./build_docker.sh
+{% block build +%}
+  build:
+    runs-on: linux.2xlarge
+    needs: [calculate-docker-image, !{{ ciflow_config.root_job_name }}]
+    env:
+      DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
+      JOB_BASE_NAME: !{{ build_environment }}-build
+    steps:
+      !{{ common.setup_ec2_linux() }}
+      !{{ common.checkout_pytorch("recursive") }}
+      - name: Pull docker image
+        run: |
+          docker pull "${DOCKER_IMAGE}"
+      - name: Build
        run: |
          # detached container should get cleaned up by teardown_ec2_linux
          container_name=$(docker run \
            -e BUILD_ENVIRONMENT \
            -e JOB_BASE_NAME \
            -e MAX_JOBS="$(nproc --ignore=2)" \
-            -e AWS_DEFAULT_REGION \
-            -e IS_GHA \
-            -e PR_NUMBER \
-            -e SHA1 \
-            -e BRANCH \
-            -e GITHUB_RUN_ID \
            -e SCCACHE_BUCKET \
-          {%- if enable_xla_test == 1 %}
-            -e XLA_CUDA \
-          {%- endif %}
            -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
            -e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
            -e SKIP_SCCACHE_INITIALIZATION=1 \
@ -133,14 +158,19 @@ jobs:
            "${DOCKER_IMAGE}"
          )
          docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
+      !{{ common.parse_ref() }}
      - name: Display and upload binary build size statistics (Click Me)
        # temporary hack: set CIRCLE_* vars, until we update
        # tools/stats/print_test_stats.py to natively support GitHub Actions
        env:
+          AWS_DEFAULT_REGION: us-east-1
+          IS_GHA: 1
          SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
-          TAG: ${{ steps.parse-ref.outputs.tag }}
-          WORKFLOW_ID: '${{ github.run_id }}'
+          CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
+          CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
+          CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
+          CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
+          CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
        run: |
          COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
          export COMMIT_TIME
@ -150,7 +180,7 @@ jobs:
        run: |
          # Ensure the working directory gets chowned back to the current user
          docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
-      {%- if build_generates_artifacts %}
+      {%- if not is_libtorch %}
      - name: Archive artifacts into zip
        run: |
          zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .pytorch-test-times.json
@ -177,14 +207,14 @@ jobs:
 {%- if not exclude_test %}
 {% block test +%}
  generate-test-matrix:
-    needs: build
    runs-on: ubuntu-18.04
-    timeout-minutes: !{{ common.timeout_minutes }}
+    {%- if ciflow_config.enabled %}
+    needs: [!{{ ciflow_config.root_job_name }}]
+    {%- endif %}
    env:
      TEST_RUNNER_TYPE: !{{ test_runner_type }}
      ENABLE_DISTRIBUTED_TEST: !{{ enable_distributed_test }}
      ENABLE_JIT_LEGACY_TEST: !{{ enable_jit_legacy_test }}
-      ENABLE_FX2TRT_TEST: !{{ enable_fx2trt_test }}
      ENABLE_MULTIGPU_TEST: !{{ enable_multigpu_test }}
      ENABLE_NOGPU_NO_AVX_TEST: !{{ enable_nogpu_no_avx_test }}
      ENABLE_NOGPU_NO_AVX2_TEST: !{{ enable_nogpu_no_avx2_test }}
@ -194,8 +224,7 @@ jobs:
      ENABLE_XLA_TEST: !{{ enable_xla_test }}
      ENABLE_NOARCH_TEST: !{{ enable_noarch_test }}
      NUM_TEST_SHARDS: !{{ num_test_shards }}
-      MULTIGPU_RUNNER_TYPE: !{{ multigpu_runner_type }}
-      DISTRIBUTED_GPU_RUNNER_TYPE: !{{ distributed_gpu_runner_type }}
+      MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
      NOGPU_RUNNER_TYPE: linux.2xlarge
      PR_BODY: ${{ github.event.pull_request.body }}
    outputs:
@ -214,42 +243,30 @@ jobs:
        run: .github/scripts/generate_pytorch_test_matrix.py

  test:
-    needs: [build, generate-test-matrix]
+    needs: [calculate-docker-image, build, generate-test-matrix, !{{ ciflow_config.root_job_name }}]
    strategy:
      matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
      fail-fast: false
    runs-on: ${{ matrix.runner }}
-    timeout-minutes: !{{ common.timeout_minutes }}
    env:
-      DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
+      DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
      JOB_BASE_NAME: !{{ build_environment }}-test
      TEST_CONFIG: ${{ matrix.config }}
      SHARD_NUMBER: ${{ matrix.shard }}
      NUM_TEST_SHARDS: ${{ matrix.num_shards }}
      PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
+      CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }}
    steps:
-{%- if 'rocm' in test_runner_type %}
-      !{{ common.setup_rocm_linux() }}
-{%- else %}
      !{{ common.setup_ec2_linux() }}
-{%- endif %}
-      !{{ common.checkout() }}
-      - name: Pull Docker image
+      !{{ common.checkout_pytorch("recursive") }}
+      - name: Pull docker image
        run: |
-          !{{ common.add_retry_to_env() }}
-          retry docker pull "${DOCKER_IMAGE}"
-{%- if 'rocm' in test_runner_type %}
-      - name: ROCm set GPU_FLAG
-        if: ${{ contains(env.BUILD_ENVIRONMENT, 'rocm') && !contains(matrix.config, 'nogpu') }}
-        run: |
-          echo "GPU_FLAG=--device=/dev/mem --device=/dev/kfd --device=/dev/dri --group-add video --group-add daemon" >> "${GITHUB_ENV}"
-{%- else %}
+          docker pull "${DOCKER_IMAGE}"
      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
        if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
        run: |
          bash .github/scripts/install_nvidia_utils_linux.sh
          echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
-{%- endif %}
      - name: Determine shm-size
        run: |
          shm_size="1g"
@ -271,42 +288,29 @@ jobs:
          unzip -o artifacts.zip
      - name: Output disk space left
        run: |
-{%- if 'rocm' in test_runner_type %}
-          df -H
-{%- else %}
          sudo df -H
-{%- endif %}
      !{{ common.parse_ref() }}
      - name: Test
        env:
          PR_NUMBER: ${{ github.event.pull_request.number }}
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
-        # Time out the test phase after !{{ timeout_after }} minutes
-        timeout-minutes: !{{ timeout_after }}
+          IS_GHA: 1
+          CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
+          CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
+          CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
+          AWS_DEFAULT_REGION: us-east-1
        run: |
-          set -x
-
          if [[ $TEST_CONFIG == 'multigpu' ]]; then
            TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
-          elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
-            TEST_COMMAND=.jenkins/caffe2/test.sh
          else
            TEST_COMMAND=.jenkins/pytorch/test.sh
          fi
-{%- if 'rocm' not in test_runner_type %}
-          PROXY_ENV=
-          # NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
-          #       We should investigate whether or not there's a list of hostnames we can add to no_proxy to
-          #       make it so that we shouldn't have to fully disable squid for XLA tests
-          if [[ $TEST_CONFIG != 'xla' ]]; then
-            # shellcheck disable=SC2089
-            PROXY_ENV="-e http_proxy=!{{ common.squid_proxy }} -e https_proxy=!{{ common.squid_proxy }} -e no_proxy=!{{ common.squid_no_proxy }}"
+          if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
+            export SHARD_NUMBER=0
          fi
-{%- endif %}
          # detached container should get cleaned up by teardown_ec2_linux
          # TODO: Stop building test binaries as part of the build phase
          # Used for GPU_FLAG since that doesn't play nice
-          # shellcheck disable=SC2086,SC2090
+          # shellcheck disable=SC2086
          container_name=$(docker run \
            ${GPU_FLAG:-} \
            -e BUILD_ENVIRONMENT \
@ -315,8 +319,9 @@ jobs:
            -e GITHUB_ACTIONS \
            -e IN_CI \
            -e IS_GHA \
-            -e BRANCH \
-            -e SHA1 \
+            -e CIRCLE_BRANCH \
+            -e CIRCLE_SHA1 \
+            -e CIRCLE_PR_NUMBER \
            -e AWS_DEFAULT_REGION \
            -e IN_WHEEL_TEST \
            -e SHARD_NUMBER \
@ -324,24 +329,15 @@ jobs:
            -e TEST_CONFIG \
            -e NUM_TEST_SHARDS \
            -e PYTORCH_IGNORE_DISABLED_ISSUES \
-            -e PYTORCH_RETRY_TEST_CASES \
            -e PR_LABELS \
+            -e CONTINUE_THROUGH_ERROR \
            -e MAX_JOBS="$(nproc --ignore=2)" \
            -e SCCACHE_BUCKET \
-          {%- if enable_xla_test == 1 %}
-            -e XLA_CUDA \
-          {%- endif %}
+            -e http_proxy="!{{ common.squid_proxy }}" -e https_proxy="!{{ common.squid_proxy }}" -e no_proxy="!{{ common.squid_no_proxy }}" \
            -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-{%- if 'rocm' not in test_runner_type %}
-            ${PROXY_ENV} \
-{%- endif %}
            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
-            --ulimit stack=10485760:83886080 \
            --security-opt seccomp=unconfined \
            --cap-add=SYS_PTRACE \
-{%- if 'rocm' not in test_runner_type %}
-            --ipc=host \
-{%- endif %}
            --shm-size="${SHM_SIZE}" \
            --tty \
            --detach \
@ -351,56 +347,40 @@ jobs:
            -w /var/lib/jenkins/workspace \
            "${DOCKER_IMAGE}"
          )
-{%- if 'rocm' in test_runner_type %}
-          # jenkins user does not have write permission to mounted workspace; work-around by copying within container to jenkins home
-          docker exec -t "${container_name}" sh -c "cd .. && cp -R workspace pytorch && cd pytorch && pip install dist/*.whl && ${TEST_COMMAND}"
-          # copy test results back to the mounted workspace, needed sudo, resulting permissions were correct
-          docker exec -t "${container_name}" sh -c "cd ../pytorch && sudo cp -R test/test-reports ../workspace/test"
-{%- else %}
          docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
-{%- endif %}
-{%- if 'rocm' not in test_runner_type %}
      - name: Chown workspace
        if: always()
        run: |
          # Ensure the working directory gets chowned back to the current user
          docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
-{%- endif %}
      !{{ common.render_test_results() }}
-{%- if 'rocm' in test_runner_type %}
-      !{{ common.upload_downloaded_files(name='linux', use_s3=False) }}
-      !{{ common.upload_test_reports(name='linux', artifact_name="test-reports", use_s3=False) }}
-{%- else %}
-      !{{ common.upload_downloaded_files(name='linux') }}
+      {%- if is_coverage %}
+      - name: Report coverage
+        run: |
+          python3 -mpip install codecov==2.1.12
+          python3 -mcodecov
+      {%- endif %}
      !{{ common.upload_test_reports(name='linux') }}
-{%- endif %}
      !{{ common.upload_test_statistics(build_environment) }}
-{%- if 'rocm' in test_runner_type %}
-      !{{ common.teardown_rocm_linux() }}
-{%- else %}
      !{{ common.teardown_ec2_linux() }}
-{%- endif %}
 {% endblock %}
 {%- endif -%}
 {%- if enable_doc_jobs %}
  build-docs:
    runs-on: linux.2xlarge
-    timeout-minutes: !{{ common.timeout_minutes }}
    strategy:
      matrix:
        docs_type: [cpp, python]
-    needs: [build]
+    needs: [calculate-docker-image, build, !{{ ciflow_config.root_job_name }}]
    env:
-      DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
+      DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
      DOCS_TYPE: ${{ matrix.docs_type }}
-      WITH_PUSH: ${{ github.event_name == 'schedule' || startsWith(github.event.ref, 'refs/tags/v') }}
    steps:
      !{{ common.setup_ec2_linux() }}
-      !{{ common.checkout() }}
-      - name: Pull Docker image
+      !{{ common.checkout_pytorch("recursive") }}
+      - name: Pull docker image
        run: |
-          !{{ common.add_retry_to_env() }}
-          retry docker pull "${DOCKER_IMAGE}"
+          docker pull "${DOCKER_IMAGE}"
      - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
        name: Download PyTorch Build Artifacts
        with:
@ -408,47 +388,29 @@ jobs:
      - name: Unzip artifacts
        run: |
          unzip -o artifacts.zip
-{%- if is_scheduled %}
-      - name: Generate netrc (only for docs-push)
-        if: ${{ github.event_name == 'schedule' || startsWith(github.event.ref, 'refs/tags/v') }}
-        env:
-          GITHUB_PYTORCHBOT_TOKEN: ${{ secrets.GH_PYTORCHBOT_TOKEN }}
-        run: |
-          # set credentials for https pushing
-          echo "machine github.com" > "${RUNNER_TEMP}/.netrc"
-          echo "login pytorchbot" >> "${RUNNER_TEMP}/.netrc"
-          echo "password ${GITHUB_PYTORCHBOT_TOKEN}" >> "${RUNNER_TEMP}/.netrc"
-{%- endif %}
      - name: Build ${{ matrix.docs_type }} docs
        run: |
          set -ex
          time docker pull "${DOCKER_IMAGE}" > /dev/null
-          # Convert refs/tags/v1.12.0rc3 into 1.12
-          if [[ "${GITHUB_REF}" =~ ^refs/tags/v([0-9]+\.[0-9]+)\.* ]]; then
-            target="${BASH_REMATCH[1]}"
-          else
-            target="master"
-          fi
+          echo "${GITHUB_REF}"
+          ref=${GITHUB_REF##*/}
+          target=${ref//v}
          # detached container should get cleaned up by teardown_ec2_linux
          container_name=$(docker run \
            -e BUILD_ENVIRONMENT \
            -e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
            -e IN_CI \
            -e MAX_JOBS="$(nproc --ignore=2)" \
-            -e SHA1="$GITHUB_SHA" \
+            -e CIRCLE_SHA1="$GITHUB_SHA" \
            -e DOCS_VERSION="${target}" \
            -e DOCS_TYPE \
            -e PR_LABELS \
-            -e WITH_PUSH \
            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
            --security-opt seccomp=unconfined \
            --cap-add=SYS_PTRACE \
            --tty \
            --detach \
            --user jenkins \
-{%- if is_scheduled %}
-            -v "${RUNNER_TEMP}/.netrc":/var/lib/jenkins/.netrc \
-{%- endif %}
            -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
            -w /var/lib/jenkins/workspace \
            "${DOCKER_IMAGE}"
@ -465,7 +427,7 @@ jobs:
          retention-days: 14
          s3-bucket: doc-previews
          if-no-files-found: error
-          path: pytorch.github.io/docs/master/
+          path: pytorch.github.io/docs/merge/
          s3-prefix: pytorch/${{ github.event.pull_request.number }}
      - uses: !{{ common.upload_artifact_s3_action }}
        name: Upload C++ Docs Preview
@ -476,4 +438,14 @@ jobs:
          s3-bucket: doc-previews
          path: cppdocs/
          s3-prefix: pytorch/${{ github.event.pull_request.number }}/cppdocs
+      - name: Archive artifacts into zip
+        run: |
+          zip -r "docs_${DOCS_TYPE}.zip" "${GITHUB_WORKSPACE}/pytorch.github.io" "${GITHUB_WORKSPACE}/cppdocs"
+      - uses: actions/upload-artifact@v2
+        name: Store PyTorch Build Artifacts
+        with:
+          name: docs_${{ matrix.docs_type }}
+          path: docs_${{ matrix.docs_type }}.zip
+          if-no-files-found: error
+      !{{ common.teardown_ec2_linux() }}
 {%- endif -%}
--- a/.github/templates/macos_ci_workflow.yml.j2
+++ b/.github/templates/macos_ci_workflow.yml.j2
@ -1,154 +0,0 @@
-{% import 'common.yml.j2' as common %}
-
-{%- block name -%}
-# Template is at:    .github/templates/macos_ci_workflow.yml.j2
-# Generation script: .github/scripts/generate_ci_workflows.py
-name: !{{ build_environment }}
-{%- endblock %}
-
-on:
-{%- if is_default -%}
-  pull_request:
-{%- endif -%}
-
-{%- if is_scheduled %}
-  schedule:
-    - cron: !{{ is_scheduled }}
-{%- else %}
-  push:
-    branches:
-      - master
-      - release/*
-{%- endif %}
-{%- for label in ciflow_config.labels | sort %}
-  {%- if loop.first %}
-    tags:
-  {%- endif %}
-  {%- if label != "ciflow/default" %}
-      - '!{{ label }}/*'
-  {%- endif %}
-{%- endfor %}
-  workflow_dispatch:
-
-# For setup-miniconda, see https://github.com/conda-incubator/setup-miniconda/issues/179
-defaults:
-  run:
-    shell: bash -e -l {0}
-env:
-  BUILD_ENVIRONMENT: !{{ build_environment }}
-  COMPACT_JOB_NAME: !{{ build_environment }}
-  IN_CI: 1
-  IS_GHA: 1
-  PYTORCH_RETRY_TEST_CASES: 1
-!{{ common.set_xcode_version(xcode_version) }}
-
-jobs:
-{% block build +%}
-  build:
-    runs-on: !{{ test_runner_type }}
-    env:
-      JOB_BASE_NAME: !{{ build_environment }}
-      # For sccache access (only on non-forked PRs)
-      AWS_ACCESS_KEY_ID: ${{ secrets.MACOS_SCCACHE_S3_ACCESS_KEY_ID }}
-      AWS_SECRET_ACCESS_KEY: ${{ secrets.MACOS_SCCACHE_S3_SECRET_ACCESS_KEY }}
-      PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
-    steps:
-      - name: print labels
-        run: echo "${PR_LABELS}"
-      !{{ common.checkout() }}
-      !{{ common.setup_miniconda("3.8") }}
-      - name: Install macOS homebrew dependencies
-        run: |
-          # Install dependencies
-          brew install libomp
-      - name: Install sccache (only for non-forked PRs, and pushes to trunk)
-        if: ${{ github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }}
-        run: |
-          sudo curl --retry 3 https://s3.amazonaws.com/ossci-macos/sccache_v2.15 --output /usr/local/bin/sccache
-          sudo chmod +x /usr/local/bin/sccache
-          echo "SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2" >> "${GITHUB_ENV}"
-      - name: Build
-        run: |
-          echo "CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname "$(which conda)")/../"}" >> "${GITHUB_ENV}"
-          .jenkins/pytorch/macos-build.sh
-{%- if build_generates_artifacts %}
-      - name: Archive artifacts into zip
-        run: |
-          zip -1 -r artifacts.zip dist/
-      - uses: actions/upload-artifact@v2
-        name: Store PyTorch Build Artifacts on GHA
-        with:
-          name: ${{ env.BUILD_ENVIRONMENT }}
-          retention-days: 14
-          if-no-files-found: error
-          path:
-            artifacts.zip
-{%- endif %}
-{% endblock +%}
-{%- if not exclude_test %}
-{% block test +%}
-  generate-test-matrix:
-    needs: build
-    runs-on: ubuntu-18.04
-    timeout-minutes: !{{ common.timeout_minutes }}
-    env:
-      TEST_RUNNER_TYPE: !{{ test_runner_type }}
-      ENABLE_DISTRIBUTED_TEST: !{{ enable_distributed_test }}
-      NUM_TEST_SHARDS: !{{ num_test_shards }}
-      PR_BODY: ${{ github.event.pull_request.body }}
-    outputs:
-      matrix: ${{ steps.set-matrix.outputs.matrix }}
-      render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
-      ignore-disabled-issues: ${{ steps.set-matrix.outputs.ignore-disabled-issues }}
-    container:
-      image: python:3.9
-    steps:
-      - name: Install dependencies
-        run: pip install typing-extensions==3.10
-      - name: Clone pytorch/pytorch
-        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
-      - name: Generating test matrix
-        id: set-matrix
-        run: .github/scripts/generate_pytorch_test_matrix.py
-
-  test:
-    needs: [build, generate-test-matrix]
-    strategy:
-      matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
-      fail-fast: false
-    runs-on: ${{ matrix.runner }}
-    timeout-minutes: !{{ common.timeout_minutes }}
-    env:
-      JOB_BASE_NAME: !{{ build_environment }}-test
-      TEST_CONFIG: ${{ matrix.config }}
-      SHARD_NUMBER: ${{ matrix.shard }}
-      NUM_TEST_SHARDS: ${{ matrix.num_shards }}
-      PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
-    steps:
-      !{{ common.checkout(submodules="false") }}
-      - uses: actions/download-artifact@v2
-        name: Download PyTorch Build Artifacts from GHA
-        with:
-          name: ${{ env.BUILD_ENVIRONMENT }}
-          path: .
-      - name: Unzip artifacts
-        run: |
-          unzip -o artifacts.zip
-      !{{ common.setup_miniconda("3.8") }}
-      - name: Install macOS homebrew dependencies
-        run: |
-          # Install dependencies
-          brew install libomp
-      !{{ common.parse_ref() }}
-      - name: Test
-        run: |
-          python3 -mpip install dist/*.whl
-          .jenkins/pytorch/macos-test.sh
-      !{{ common.render_test_results() }}
-      !{{ common.upload_downloaded_files(name='macos', artifact_name="test-jsons", use_s3=False) }}
-      !{{ common.upload_test_reports("macos", artifact_name="test-reports", use_s3=False) }}
-      !{{ common.upload_test_statistics(build_environment) }}
-{% endblock +%}
-{%- endif %}
-
-!{{ common.concurrency(build_environment) }}
--- a/.github/templates/windows_binary_build_workflow.yml.j2
+++ b/.github/templates/windows_binary_build_workflow.yml.j2
@ -1,205 +0,0 @@
-{% import 'common.yml.j2' as common %}
-
-{%- block name -%}
-# Template is at:    .github/templates/windows_binary_build_workflow.yml.j2
-# Generation script: .github/scripts/generate_ci_workflows.py
-name: !{{ build_environment }}
-{%- endblock %}
-
-{%- macro binary_env(config) -%}
-    env:
-      PYTORCH_ROOT: ${{ github.workspace }}/pytorch
-      BUILDER_ROOT: ${{ github.workspace }}/builder
-      PACKAGE_TYPE: !{{ config["package_type"] }}
-      # TODO: This is a legacy variable that we eventually want to get rid of in
-      #       favor of GPU_ARCH_VERSION
-      DESIRED_CUDA: !{{ config["desired_cuda"] }}
-{%- if config["gpu_arch_version"] %}
-      GPU_ARCH_VERSION: !{{ config["gpu_arch_version"] }}
-{%- endif %}
-      GPU_ARCH_TYPE: !{{ config["gpu_arch_type"] }}
-      SKIP_ALL_TESTS: 1
-{%- if config["package_type"] == "libtorch" %}
-{%- if config["libtorch_config"] %}
-      LIBTORCH_CONFIG: !{{ config["libtorch_config"] }}
-{%- endif %}
-      LIBTORCH_VARIANT: !{{ config["libtorch_variant"] }}
-{%- if config["devtoolset"] %}
-      DESIRED_DEVTOOLSET: !{{ config["devtoolset"] }}
-{%- endif %}
-      # This is a dummy value for libtorch to work correctly with our batch scripts
-      # without this value pip does not get installed for some reason
-      DESIRED_PYTHON: "3.7"
-{%- else %}
-      DESIRED_PYTHON: "!{{ config["python_version"] }}"
-{%- endif %}
-{%- endmacro %}
-
-{%- macro set_runner_specific_vars() -%}
-      # NOTE: These environment variables are put here so that they can be applied on every job equally
-      #       They are also here because setting them at a workflow level doesn't give us access to the
-      #       runner.temp variable, which we need.
-      - name: Populate binary env
-        shell: bash
-        run: |
-          echo "BINARY_ENV_FILE=${RUNNER_TEMP}/env" >> "${GITHUB_ENV}"
-          echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}"
-          echo "WIN_PACKAGE_WORK_DIR=${RUNNER_TEMP}"
-{%- endmacro %}
-
-on:
-  push:
-    # NOTE: Meta Employees can trigger new nightlies using: https://fburl.com/trigger_pytorch_nightly_build
-    branches:
-      - nightly
-    tags:
-      # NOTE: Binary build pipelines should only get triggered on release candidate builds
-      # Release candidate tags look like: v1.11.0-rc1
-      - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
-{%- for label in ciflow_config.labels | sort %}
-  {%- if label != "ciflow/default" %}
-      - '!{{ label }}/*'
-  {%- endif %}
-{%- endfor %}
-  workflow_dispatch:
-
-env:
-  # Needed for conda builds
-  ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
-  ANACONDA_USER: pytorch
-  AWS_DEFAULT_REGION: us-east-1
-  BUILD_ENVIRONMENT: !{{ build_environment }}
-  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-  IN_CI: 1
-  IS_GHA: 1
-  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
-  PR_NUMBER: ${{ github.event.pull_request.number }}
-  PYTORCH_RETRY_TEST_CASES: 1
-  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-  SKIP_ALL_TESTS: 1
-!{{ common.concurrency(build_environment) }}
-
-jobs:
-{%- for config in build_configs %}
-  !{{ config["build_name"] }}-build:
-    runs-on: windows.4xlarge
-    timeout-minutes: !{{ common.timeout_minutes }}
-    !{{ binary_env(config) }}
-    steps:
-      !{{ common.setup_ec2_windows() }}
-      !{{ set_runner_specific_vars() }}
-      - name: Clone pytorch/pytorch
-        uses: actions/checkout@v2
-        with:
-          path: ${{ env.PYTORCH_ROOT }}
-          submodules: recursive
-      - name: Clone pytorch/builder
-        uses: actions/checkout@v2
-        with:
-          repository: pytorch/builder
-          path: ${{ env.BUILDER_ROOT }}
-          ref: release/1.11
-      - name: Populate binary env
-        shell: bash
-        run: |
-          "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh"
-      - name: Build PyTorch binary
-        shell: bash
-        run: |
-          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_build.sh"
-      - uses: !{{ common.upload_artifact_s3_action }}
-        if: always()
-        with:
-          name: !{{ config["build_name"] }}
-          retention-days: 14
-          if-no-files-found: error
-          path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
-      !{{ common.wait_and_kill_ssh_windows('pytorch') }}
-  !{{ config["build_name"] }}-test:  # Testing
-    if: ${{ github.repository_owner == 'pytorch' }}
-    needs: !{{ config["build_name"] }}-build
-{%- if config["gpu_arch_type"] == "cuda" %}
-    runs-on: windows.8xlarge.nvidia.gpu
-{%- else %}
-    runs-on: windows.4xlarge
-{%- endif %}
-    timeout-minutes: !{{ common.timeout_minutes }}
-    !{{ binary_env(config) }}
-    steps:
-      !{{ common.setup_ec2_windows() }}
-      !{{ set_runner_specific_vars() }}
-      - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
-        name: Download Build Artifacts
-        with:
-          name: !{{ config["build_name"] }}
-          path: "${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
-      - name: Clone pytorch/pytorch
-        uses: actions/checkout@v2
-        with:
-          path: ${{ env.PYTORCH_ROOT }}
-          submodules: recursive
-      - name: Clone pytorch/builder
-        uses: actions/checkout@v2
-        with:
-          repository: pytorch/builder
-          path: ${{ env.BUILDER_ROOT }}
-          ref: release/1.11
-      - name: Populate binary env
-        shell: bash
-        run: |
-          "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh"
-      - name: Test PyTorch binary
-        shell: bash
-        run: |
-          "${PYTORCH_ROOT}/.circleci/scripts/binary_windows_test.sh"
-      !{{ common.wait_and_kill_ssh_windows('pytorch') }}
-  !{{ config["build_name"] }}-upload:  # Uploading
-    runs-on: linux.2xlarge  # self hosted runner to download ec2 artifacts
-    if: ${{ github.repository_owner == 'pytorch' }}
-    needs: !{{ config["build_name"] }}-test
-    !{{ binary_env(config) }}
-    steps:
-      !{{ common.setup_ec2_linux() }}
-      - name: Clone pytorch/pytorch
-        uses: actions/checkout@v2
-      - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
-        name: Download Build Artifacts
-        with:
-          name: !{{ config["build_name"] }}
-          path: "${{ runner.temp }}/artifacts/"
-      - name: Set DRY_RUN (only for tagged pushes)
-        if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}}
-        run: |
-          echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
-      - name: Set UPLOAD_CHANNEL (only for tagged pushes)
-        if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}}
-        run: |
-          # reference ends with an RC suffix
-          if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
-            echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
-          fi
-      - name: Upload binaries
-        env:
-          PKG_DIR: "${{ runner.temp }}/artifacts"
-          UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
-          # When running these on pull_request events these should be blank
-          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
-          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
-          ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
-        run: |
-          docker run --rm -i \
-            -e ANACONDA_API_TOKEN \
-            -e AWS_ACCESS_KEY_ID \
-            -e AWS_SECRET_ACCESS_KEY \
-            -e DRY_RUN \
-            -e PACKAGE_TYPE \
-            -e PKG_DIR=/artifacts \
-            -e UPLOAD_CHANNEL \
-            -e UPLOAD_SUBFOLDER \
-            -v "${RUNNER_TEMP}/artifacts:/artifacts" \
-            -v "${GITHUB_WORKSPACE}:/v" \
-            -w /v \
-            308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
-            bash -c '.circleci/scripts/binary_upload.sh'
-      !{{ common.teardown_ec2_linux() }}
-{%- endfor %}
--- a/.github/templates/windows_ci_workflow.yml.j2
+++ b/.github/templates/windows_ci_workflow.yml.j2
@ -19,38 +19,34 @@
 name: !{{ build_environment }}

 on:
-{%- if is_default %}
+{%- if on_pull_request %}
  pull_request:
+  {%- if ciflow_config.enabled %}
+    {%- if ciflow_config.trigger_action_only %}
+    types: [!{{ ciflow_config.trigger_action }}]
+    {%- else %}
+    types: [opened, synchronize, reopened, !{{ ciflow_config.trigger_action }}]
+    {%- endif %}
+  {%- endif %}
 {%- endif %}
+{%- if is_scheduled %}
+  schedule:
+    - cron: !{{ is_scheduled }}
+{%- else %}
  push:
-{%- for label in ciflow_config.labels | sort %}
-  {%- if loop.first %}
-    tags:
-  {%- endif %}
-  {%- if label != "ciflow/default" %}
-      - '!{{ label }}/*'
-  {%- endif %}
-{%- endfor %}
-{%- if not is_scheduled %}
    branches:
      - master
      - release/*
-{%- else %}
-  schedule:
-    - cron: !{{ is_scheduled }}
 {%- endif %}
  workflow_dispatch:

 env:
  BUILD_ENVIRONMENT: !{{ build_environment }}
  BUILD_WHEEL: 1
-  MAX_JOBS: 8
  CUDA_VERSION: "!{{ cuda_version }}"
  IN_CI: 1
-  IS_GHA: 1
  INSTALL_WINDOWS_SDK: 1
  PYTHON_VERSION: "3.8"
-  PYTORCH_RETRY_TEST_CASES: 1
  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
  SCCACHE_BUCKET: "ossci-compiler-cache"
  VC_PRODUCT: "BuildTools"
@ -59,35 +55,46 @@ env:
  VC_YEAR: "2019"
  ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
  no_proxy: !{{ common.squid_no_proxy }}
-  AWS_DEFAULT_REGION: us-east-1
-  PR_NUMBER: ${{ github.event.pull_request.number }}
-  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-{%- if build_with_debug %}
-  DEBUG: 1
-{%- endif %}
 {%- if cuda_version != "cpu" %}
  TORCH_CUDA_ARCH_LIST: "7.0"
+  USE_CUDA: 1
 {%- endif %}
-  USE_CUDA: !{{ 1 if cuda_version != "cpu" else 0 }}

 !{{ common.concurrency(build_environment) }}

 jobs:
+{%- if ciflow_config.enabled %}
+  !{{ ciflow_config.root_job_name }}:
+    runs-on: ubuntu-18.04
+    if: ${{ !{{ ciflow_config.root_job_condition }} }}
+    steps:
+      - name: noop
+        run: echo running !{{ ciflow_config.root_job_name }}
+{%- endif %}
  build:
    runs-on: "windows.4xlarge"
-    timeout-minutes: !{{ common.timeout_minutes }}
+    defaults:
+      run:
+        working-directory: pytorch-${{ github.run_id }}
+    {%- if ciflow_config.enabled %}
+    needs: [!{{ ciflow_config.root_job_name }}]
+    {%- endif %}
    env:
      JOB_BASE_NAME: !{{ build_environment }}-build
      http_proxy: "!{{ common. squid_proxy }}"
      https_proxy: "!{{ common.squid_proxy }}"
    steps:
-      - name: print labels
-        run: echo "${PR_LABELS}"
      - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
        uses: seemethere/add-github-ssh-key@v1
        with:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      !{{ common.checkout() }}
+      - name: Checkout PyTorch
+        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
+        with:
+          submodules: recursive
+          path: pytorch-${{ github.run_id }}
+          # deep clone, to allow use of git merge-base
+          fetch-depth: 0
      !{{ common.display_ec2_information() }}
      - name: Install Visual Studio 2019 toolchain
        shell: powershell
@ -103,23 +110,32 @@ jobs:
        run: |
          .circleci/scripts/windows_cudnn_install.sh
 {%- endif %}
-      !{{ common.parse_ref() }}
      - name: Build
        shell: bash
        env:
          PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
        run: |
          .jenkins/pytorch/win-build.sh
      # Upload to github so that people can click and download artifacts
+      - name: Upload artifacts to Github
+        if: always()
+        uses: actions/upload-artifact@v2
+        # Don't fail on upload to GH since it's only for user convenience
+        continue-on-error: true
+        with:
+          retention-days: 14
+          if-no-files-found: error
+          name: ${{ env.BUILD_ENVIRONMENT }}
+          path: C:\${{ github.run_id }}\build-results
      - name: Upload artifacts to s3
+        if: always()
        uses: !{{ common.upload_artifact_s3_action }}
        with:
          retention-days: 14
          if-no-files-found: error
          name: ${{ env.BUILD_ENVIRONMENT }}
          path: C:\${{ github.run_id }}\build-results
-      !{{ common.wait_and_kill_ssh_windows() }}
+      !{{ wait_and_kill_ssh() }}
      - name: Cleanup build-results and workspaces
        if: always()
        shell: bash
@ -131,17 +147,15 @@ jobs:
          rm -rf ./*

  generate-test-matrix:
-    needs: build
+    {%- if ciflow_config.enabled %}
+    needs: [!{{ ciflow_config.root_job_name }}]
+    {%- endif %}
    runs-on: ubuntu-18.04
-    timeout-minutes: !{{ common.timeout_minutes }}
    env:
      TEST_RUNNER_TYPE: !{{ test_runner_type }}
      NUM_TEST_SHARDS: !{{ num_test_shards }}
      NUM_TEST_SHARDS_ON_PULL_REQUEST: !{{ num_test_shards_on_pull_request }}
      PR_BODY: ${{ github.event.pull_request.body }}
-      NOGPU_RUNNER_TYPE: windows.4xlarge
-      ENABLE_FORCE_ON_CPU_TEST: !{{ enable_force_on_cpu_test }}
-      RUN_SMOKE_TESTS_ONLY_ON_PR: !{{ only_run_smoke_tests_on_pull_request }}
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
      render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
@ -158,7 +172,9 @@ jobs:
        run: .github/scripts/generate_pytorch_test_matrix.py

  test:
-    timeout-minutes: !{{ common.timeout_minutes }}
+{%- if only_build_on_pull_request %}
+    if: ${{ github.event_name == 'push' }}
+{%- endif %}
    env:
      JOB_BASE_NAME: !{{ build_environment }}-test
      SHARD_NUMBER: ${{ matrix.shard }}
@ -166,31 +182,40 @@ jobs:
      TEST_CONFIG: ${{ matrix.config }}
      http_proxy: "!{{ common.squid_proxy }}"
      https_proxy: "!{{ common.squid_proxy }}"
+      RUN_SMOKE_TESTS_ONLY_ON_PR: !{{ only_run_smoke_tests_on_pull_request }}
      PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
-    needs: [build, generate-test-matrix]
+      CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }}
+    needs: [build, generate-test-matrix, !{{ ciflow_config.root_job_name }}]
    strategy:
      matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
      fail-fast: false
    runs-on: ${{ matrix.runner }}
+    defaults:
+      run:
+        working-directory: pytorch-${{ github.run_id }}
    steps:
+      - name: Checkout PyTorch
+        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
+        with:
+          submodules: recursive
+          path: pytorch-${{ github.run_id }}
+          # deep clone, to allow use of git merge-base
+          fetch-depth: 0
      !{{ common.display_ec2_information() }}
      - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
        uses: seemethere/add-github-ssh-key@v1
        with:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      !{{ common.checkout() }}
      - name: Install Visual Studio 2019 toolchain
        shell: powershell
        run: |
          .\.circleci\scripts\vs_install.ps1
 {%- if cuda_version != "cpu" %}
      - name: Install Cuda
-        if: ${{ matrix.config != 'force_on_cpu' }}
        shell: bash
        run: |
          .circleci/scripts/windows_cuda_install.sh
      - name: Install Cudnn
-        if: ${{ matrix.config != 'force_on_cpu' }}
        shell: bash
        run: |
          .circleci/scripts/windows_cudnn_install.sh
@ -213,14 +238,17 @@ jobs:
        shell: bash
        env:
          PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
-        # Time out the test phase after 3.5 hours
-        timeout-minutes: 210
        run: |
+            if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
+              export SHARD_NUMBER=0
+            fi
+            if [[ -n $GITHUB_HEAD_REF && "$RUN_SMOKE_TESTS_ONLY_ON_PR" == "true" ]]; then
+              export RUN_SMOKE_TESTS_ONLY=1
+            fi
            .jenkins/pytorch/win-test.sh
-      !{{ common.upload_downloaded_files(name='windows') }}
      !{{ common.upload_test_reports(name='windows') }}
      !{{ common.render_test_results() }}
-      !{{ common.wait_and_kill_ssh_windows() }}
+      !{{ wait_and_kill_ssh() }}
      !{{ common.parse_ref() }}
      !{{ common.upload_test_statistics(build_environment) }}
      - name: Cleanup workspace
--- a/Show More
+++ b/Show More