fix formatting CIRCLE_TAG when building docs (#67026 ) (#69876 )

Summary: Similar to pytorch/text#1416 malfet, brianjo The previous code failed when tags changed from `v0.9.0` to `v0.10.0`. I tested this offline, it would be nice to somehow be actually tag the repo and see that this adds the correct documentation directory to the pytorch/pytorch.github.io repo. Pull Request resolved: https://github.com/pytorch/pytorch/pull/67026 Reviewed By: saketh-are Differential Revision: D31843381 Pulled By: malfet fbshipit-source-id: 21526ad9ed4c1751c2d7f6d621da305f166a7f55 Co-authored-by: mattip <matti.picus@gmail.com>
[release/1.10] Remove fgrad_input from slow_conv2d (#64280 ) (#69622 )
2025-10-27 00:54:52 +08:00 · 2021-12-14 09:24:18 -08:00 · 2021-12-10 11:42:03 -08:00 · 2021-12-10 11:41:40 -08:00 · 2021-12-09 08:59:45 -08:00 · 2021-12-09 07:09:08 -08:00
4217 changed files with 124381 additions and 361392 deletions
--- a/.azure_pipelines/job_templates/prepare-build-template.yml
+++ b/.azure_pipelines/job_templates/prepare-build-template.yml
@ -46,7 +46,7 @@ steps:
      curl -k https://s3.amazonaws.com/ossci-windows/sccache.exe --output .\tmp_bin\sccache.exe
      curl -k https://s3.amazonaws.com/ossci-windows/sccache-cl.exe --output .\tmp_bin\sccache-cl.exe
      copy .\tmp_bin\sccache.exe .\tmp_bin\nvcc.exe
-      curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.4/randomtemp.exe --output .\tmp_bin\randomtemp.exe
+      curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.3/randomtemp.exe --output .\tmp_bin\randomtemp.exe
    displayName: Install sccache and randomtemp
    condition: not(eq(variables.CUDA_VERSION, ''))

--- a/.azure_pipelines/job_templates/set-environment-variables.yml
+++ b/.azure_pipelines/job_templates/set-environment-variables.yml
@ -120,7 +120,9 @@ steps:
        Write-Host "##vso[task.setvariable variable=CMAKE_LIBRARY_PATH;]$(Build.SourcesDirectory)\mkl\lib;$env:CMAKE_LIBRARY_PATH"
        Write-Host "##vso[task.setvariable variable=ADDITIONAL_PATH;]$(Build.SourcesDirectory)\tmp_bin"
        Write-Host "##vso[task.setvariable variable=SCCACHE_IDLE_TIMEOUT;]1500"
-        Write-Host "##vso[task.setvariable variable=CMAKE_CUDA_COMPILER_LAUNCHER;]$(Build.SourcesDirectory)/tmp_bin/randomtemp.exe;$(Build.SourcesDirectory)/tmp_bin/sccache.exe"
+        Write-Host "##vso[task.setvariable variable=RANDOMTEMP_EXECUTABLE;]$(Build.SourcesDirectory)\tmp_bin\nvcc.exe"
+        Write-Host "##vso[task.setvariable variable=CUDA_NVCC_EXECUTABLE;]$(Build.SourcesDirectory)\tmp_bin\randomtemp.exe"
+        Write-Host "##vso[task.setvariable variable=RANDOMTEMP_BASEDIR;]$(Build.SourcesDirectory)\tmp_bin"
      displayName: Set MKL, sccache and randomtemp environment variables

    # View current environment variables
--- a/.bazelrc
+++ b/.bazelrc
@ -1,7 +1,6 @@
 build --copt=--std=c++14
 build --copt=-I.
 build --copt=-isystem --copt bazel-out/k8-fastbuild/bin
-build --experimental_ui_max_stdouterr_bytes=2048576

 # Configuration to disable tty features for environments like CI
 build:no-tty --curses no
@ -12,8 +11,3 @@ build:no-tty --show_progress_rate_limit 10
 build:gpu --define=cuda=true
 # define a separate build folder for faster switching between configs
 build:gpu --platform_suffix=-gpu
-# rules_cuda configuration
-build:gpu --@rules_cuda//cuda:enable_cuda
-build:gpu --@rules_cuda//cuda:cuda_targets=sm_52
-build:gpu --@rules_cuda//cuda:compiler=nvcc
-build:gpu --repo_env=CUDA_PATH=/usr/local/cuda
--- a/.circleci/cimodel/data/binary_build_data.py
+++ b/.circleci/cimodel/data/binary_build_data.py
@ -30,7 +30,21 @@ def get_processor_arch_name(gpu_version):
        "cu" + gpu_version.strip("cuda") if gpu_version.startswith("cuda") else gpu_version
    )

+LINUX_PACKAGE_VARIANTS = OrderedDict(
+    manywheel=[
+        "3.6m",
+        "3.7m",
+        "3.8m",
+        "3.9m"
+    ],
+    conda=dimensions.STANDARD_PYTHON_VERSIONS,
+    libtorch=[
+        "3.7m",
+    ],
+)
+
 CONFIG_TREE_DATA = OrderedDict(
+    linux=(dimensions.GPU_VERSIONS, LINUX_PACKAGE_VARIANTS),
    macos=([None], OrderedDict(
        wheel=dimensions.STANDARD_PYTHON_VERSIONS,
        conda=dimensions.STANDARD_PYTHON_VERSIONS,
@ -49,8 +63,7 @@ CONFIG_TREE_DATA = OrderedDict(
        ],
    )),
    windows=(
-        # Stop building Win+CU102, see https://github.com/pytorch/pytorch/issues/65648
-        [v for v in dimensions.GPU_VERSIONS if v not in dimensions.ROCM_VERSION_LABELS and v != "cuda102"],
+        [v for v in dimensions.GPU_VERSIONS if v not in dimensions.ROCM_VERSION_LABELS],
        OrderedDict(
            wheel=dimensions.STANDARD_PYTHON_VERSIONS,
            conda=dimensions.STANDARD_PYTHON_VERSIONS,
--- a/.circleci/cimodel/data/dimensions.py
+++ b/.circleci/cimodel/data/dimensions.py
@ -4,12 +4,12 @@ CUDA_VERSIONS = [
    "102",
    "111",
    "113",
-    "115",
 ]

 ROCM_VERSIONS = [
-    "4.3.1",
-    "4.5.2",
+    "4.0.1",
+    "4.1",
+    "4.2",
 ]

 ROCM_VERSION_LABELS = ["rocm" + v for v in ROCM_VERSIONS]
@ -17,8 +17,8 @@ ROCM_VERSION_LABELS = ["rocm" + v for v in ROCM_VERSIONS]
 GPU_VERSIONS = [None] + ["cuda" + v for v in CUDA_VERSIONS] + ROCM_VERSION_LABELS

 STANDARD_PYTHON_VERSIONS = [
+    "3.6",
    "3.7",
    "3.8",
-    "3.9",
-    "3.10"
+    "3.9"
 ]
--- a/.circleci/cimodel/data/pytorch_build_data.py
+++ b/.circleci/cimodel/data/pytorch_build_data.py
@ -1,7 +1,70 @@
-from cimodel.lib.conf_tree import ConfigNode
+from cimodel.lib.conf_tree import ConfigNode, X, XImportant


 CONFIG_TREE_DATA = [
+    ("xenial", [
+        ("gcc", [
+            ("5.4", [  # All this subtree rebases to master and then build
+                ("3.6", [
+                    ("important", [X(True)]),
+                ]),
+            ]),
+            # TODO: bring back libtorch test
+            ("7", [X("3.6")]),
+        ]),
+        ("clang", [
+            ("7", [
+                ("3.6", [
+                    ("asan", [
+                        (True, [
+                            ("shard_test", [XImportant(True)]),
+                        ]),
+                    ]),
+                    ("onnx", [XImportant(True)]),
+                ]),
+            ]),
+        ]),
+        ("cuda", [
+            ("10.2", [
+                ("3.6", [
+                    # Build are needed for slow_gradcheck
+                    ('build_only', [X(True)]),
+                    ("slow_gradcheck", [
+                        # If you update this slow gradcheck, you should
+                        # also update docker_definitions.py to make sure
+                        # the docker image match the config used here
+                        (True, [
+                            ('shard_test', [XImportant(True)]),
+                        ]),
+                    ]),
+                    # UNCOMMENT THE BELOW TO REENABLE LIBTORCH
+                    # ("libtorch", [
+                    #     (True, [
+                    #         ('build_only', [X(True)]),
+                    #     ]),
+                    # ]),
+                ]),
+            ]),
+        ]),
+    ]),
+    ("bionic", [
+        ("clang", [
+            ("9", [
+                ("3.6", [
+                    ("xla", [XImportant(True)]),
+                    ("vulkan", [XImportant(True)]),
+                ]),
+            ]),
+        ]),
+        # @jithunnair-amd believes Jenkins builds are sufficient
+        # ("rocm", [
+        #     ("3.9", [
+        #         ("3.6", [
+        #             ('build_only', [XImportant(True)]),
+        #         ]),
+        #     ]),
+        # ]),
+    ]),
 ]


@ -82,6 +145,7 @@ class ExperimentalFeatureConfigNode(TreeConfigNode):
            "build_only": BuildOnlyConfigNode,
            "shard_test": ShardTestConfigNode,
            "cuda_gcc_override": CudaGccOverrideConfigNode,
+            "coverage": CoverageConfigNode,
            "pure_torch": PureTorchConfigNode,
            "slow_gradcheck": SlowGradcheckConfigNode,
        }
@ -225,6 +289,14 @@ class ShardTestConfigNode(TreeConfigNode):
        return ImportantConfigNode


+class CoverageConfigNode(TreeConfigNode):
+    def init2(self, node_name):
+        self.props["is_coverage"] = node_name
+
+    def child_constructor(self):
+        return ExperimentalFeatureConfigNode
+
+
 class ImportantConfigNode(TreeConfigNode):
    def modify_label(self, label):
        return "IMPORTANT=" + str(label)
--- a/.circleci/cimodel/data/pytorch_build_definitions.py
+++ b/.circleci/cimodel/data/pytorch_build_definitions.py
@ -239,6 +239,7 @@ def instantiate_configs(only_slow_gradcheck):
        compiler_version = fc.find_prop("compiler_version")
        is_xla = fc.find_prop("is_xla") or False
        is_asan = fc.find_prop("is_asan") or False
+        is_coverage = fc.find_prop("is_coverage") or False
        is_noarch = fc.find_prop("is_noarch") or False
        is_onnx = fc.find_prop("is_onnx") or False
        is_pure_torch = fc.find_prop("is_pure_torch") or False
@ -283,6 +284,10 @@ def instantiate_configs(only_slow_gradcheck):
            python_version = fc.find_prop("pyver")
            parms_list[0] = fc.find_prop("abbreviated_pyver")

+        if is_coverage:
+            parms_list_ignored_for_docker_image.append("coverage")
+            python_version = fc.find_prop("pyver")
+
        if is_noarch:
            parms_list_ignored_for_docker_image.append("noarch")

@ -352,6 +357,28 @@ def instantiate_configs(only_slow_gradcheck):
                                        tags_list=RC_PATTERN)
            c.dependent_tests = gen_docs_configs(c)

+        if (
+            compiler_name != "clang"
+            and not rocm_version
+            and not is_libtorch
+            and not is_vulkan
+            and not is_pure_torch
+            and not is_noarch
+            and not is_slow_gradcheck
+            and not only_slow_gradcheck
+            and not build_only
+        ):
+            distributed_test = Conf(
+                c.gen_build_name("") + "distributed",
+                [],
+                is_xla=False,
+                restrict_phases=["test"],
+                is_libtorch=False,
+                is_important=True,
+                parent_build=c,
+            )
+            c.dependent_tests.append(distributed_test)
+
        config_list.append(c)

    return config_list
--- a/.circleci/cimodel/data/simple/android_definitions.py
+++ b/.circleci/cimodel/data/simple/android_definitions.py
@ -0,0 +1,119 @@
+import cimodel.data.simple.util.branch_filters as branch_filters
+from cimodel.data.simple.util.docker_constants import (
+    DOCKER_IMAGE_NDK, DOCKER_REQUIREMENT_NDK
+)
+import cimodel.lib.miniutils as miniutils
+
+
+class AndroidJob:
+    def __init__(self,
+                 variant,
+                 template_name,
+                 is_master_only=True):
+
+        self.variant = variant
+        self.template_name = template_name
+        self.is_master_only = is_master_only
+
+    def gen_tree(self):
+
+        base_name_parts = [
+            "pytorch",
+            "linux",
+            "xenial",
+            "py3",
+            "clang5",
+            "android",
+            "ndk",
+            "r19c",
+        ] + self.variant + [
+            "build",
+        ]
+
+        full_job_name = "_".join(base_name_parts)
+        build_env_name = "-".join(base_name_parts)
+
+        props_dict = {
+            "name": full_job_name,
+            "build_environment": "\"{}\"".format(build_env_name),
+            "docker_image": "\"{}\"".format(DOCKER_IMAGE_NDK),
+            "requires": [DOCKER_REQUIREMENT_NDK]
+        }
+
+        if self.is_master_only:
+            props_dict["filters"] = branch_filters.gen_filter_dict(branch_filters.NON_PR_BRANCH_LIST)
+
+        return [{self.template_name: props_dict}]
+
+
+class AndroidGradleJob:
+    def __init__(self,
+                 job_name,
+                 template_name,
+                 dependencies,
+                 is_master_only=True,
+                 is_pr_only=False,
+                 extra_props=tuple()):
+
+        self.job_name = job_name
+        self.template_name = template_name
+        self.dependencies = dependencies
+        self.is_master_only = is_master_only
+        self.is_pr_only = is_pr_only
+        self.extra_props = dict(extra_props)
+
+    def gen_tree(self):
+
+        props_dict = {
+            "name": self.job_name,
+            "requires": self.dependencies,
+        }
+
+        if self.is_master_only:
+            props_dict["filters"] = branch_filters.gen_filter_dict(branch_filters.NON_PR_BRANCH_LIST)
+        elif self.is_pr_only:
+            props_dict["filters"] = branch_filters.gen_filter_dict(branch_filters.PR_BRANCH_LIST)
+        if self.extra_props:
+            props_dict.update(self.extra_props)
+
+        return [{self.template_name: props_dict}]
+
+
+WORKFLOW_DATA = [
+    AndroidJob(["x86_32"], "pytorch_linux_build", is_master_only=False),
+    AndroidJob(["x86_64"], "pytorch_linux_build"),
+    AndroidJob(["arm", "v7a"], "pytorch_linux_build"),
+    AndroidJob(["arm", "v8a"], "pytorch_linux_build"),
+    AndroidGradleJob(
+        "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-build-x86_32",
+        "pytorch_android_gradle_build-x86_32",
+        ["pytorch_linux_xenial_py3_clang5_android_ndk_r19c_x86_32_build"],
+        is_master_only=False,
+        is_pr_only=True),
+    AndroidGradleJob(
+        "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single",
+        "pytorch_android_gradle_custom_build_single",
+        [DOCKER_REQUIREMENT_NDK],
+        is_master_only=False,
+        is_pr_only=True),
+    AndroidGradleJob(
+        "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit",
+        "pytorch_android_gradle_custom_build_single",
+        [DOCKER_REQUIREMENT_NDK],
+        is_master_only=False,
+        is_pr_only=True,
+        extra_props=tuple({
+            "lite_interpreter": miniutils.quote(str(int(False)))
+        }.items())),
+    AndroidGradleJob(
+        "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-build",
+        "pytorch_android_gradle_build",
+        ["pytorch_linux_xenial_py3_clang5_android_ndk_r19c_x86_32_build",
+         "pytorch_linux_xenial_py3_clang5_android_ndk_r19c_x86_64_build",
+         "pytorch_linux_xenial_py3_clang5_android_ndk_r19c_arm_v7a_build",
+         "pytorch_linux_xenial_py3_clang5_android_ndk_r19c_arm_v8a_build"]),
+]
+
+
+def get_workflow_jobs():
+    return [item.gen_tree() for item in WORKFLOW_DATA]
--- a/.circleci/cimodel/data/simple/binary_smoketest.py
+++ b/.circleci/cimodel/data/simple/binary_smoketest.py
@ -120,9 +120,9 @@ WORKFLOW_DATA = [
    ),
    SmoketestJob(
        "binary_windows_build",
-        ["wheel", "3.7", "cu113"],
+        ["wheel", "3.7", "cu102"],
        None,
-        "binary_windows_wheel_3_7_cu113_build",
+        "binary_windows_wheel_3_7_cu102_build",
        is_master_only=True,
    ),

@ -144,11 +144,11 @@ WORKFLOW_DATA = [
    ),
    SmoketestJob(
        "binary_windows_test",
-        ["wheel", "3.7", "cu113"],
+        ["wheel", "3.7", "cu102"],
        None,
-        "binary_windows_wheel_3_7_cu113_test",
+        "binary_windows_wheel_3_7_cu102_test",
        is_master_only=True,
-        requires=["binary_windows_wheel_3_7_cu113_build"],
+        requires=["binary_windows_wheel_3_7_cu102_build"],
        extra_props={
            "executor": "windows-with-nvidia-gpu",
        },
--- a/.circleci/cimodel/data/simple/docker_definitions.py
+++ b/.circleci/cimodel/data/simple/docker_definitions.py
@ -4,8 +4,27 @@ from cimodel.lib.miniutils import quote
 from cimodel.data.simple.util.branch_filters import gen_filter_dict, RC_PATTERN


-# NOTE: All hardcoded docker image builds have been migrated to GHA
+# TODO: make this generated from a matrix rather than just a static list
 IMAGE_NAMES = [
+    "pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7",
+    "pytorch-linux-bionic-py3.6-clang9",
+    "pytorch-linux-bionic-cuda10.2-cudnn7-py3.6-clang9",
+    "pytorch-linux-bionic-py3.8-gcc9",
+    "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
+    "pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
+    "pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7",
+    "pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
+    "pytorch-linux-xenial-py3-clang5-asan",
+    "pytorch-linux-xenial-py3-clang7-asan",
+    "pytorch-linux-xenial-py3-clang7-onnx",
+    "pytorch-linux-xenial-py3.8",
+    "pytorch-linux-xenial-py3.6-clang7",
+    "pytorch-linux-xenial-py3.6-gcc5.4",  # this one is used in doc builds
+    "pytorch-linux-xenial-py3.6-gcc7.2",
+    "pytorch-linux-xenial-py3.6-gcc7",
+    "pytorch-linux-bionic-rocm4.1-py3.6",
+    "pytorch-linux-bionic-rocm4.2-py3.6",
+    "pytorch-linux-bionic-rocm4.3.1-py3.6",
 ]

 # This entry should be an element from the list above
@ -13,12 +32,10 @@ IMAGE_NAMES = [
 # pytorch_build_data.py
 SLOW_GRADCHECK_IMAGE_NAME = "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"

-def get_workflow_jobs(images=IMAGE_NAMES, only_slow_gradcheck=False):
+def get_workflow_jobs(only_slow_gradcheck=False):
    """Generates a list of docker image build definitions"""
    ret = []
-    for image_name in images:
-        if image_name.startswith('docker-'):
-            image_name = image_name.lstrip('docker-')
+    for image_name in IMAGE_NAMES:
        if only_slow_gradcheck and image_name is not SLOW_GRADCHECK_IMAGE_NAME:
            continue

--- a/.circleci/cimodel/data/simple/ios_definitions.py
+++ b/.circleci/cimodel/data/simple/ios_definitions.py
@ -75,12 +75,6 @@ WORKFLOW_DATA = [
    IOSJob(XCODE_VERSION, ArchVariant("arm64", "custom"), extra_props={
        "op_list": "mobilenetv2.yaml",
        "lite_interpreter": miniutils.quote(str(int(True)))}),
-    IOSJob(XCODE_VERSION, ArchVariant("x86_64", "coreml"), is_org_member_context=False, extra_props={
-        "use_coreml": miniutils.quote(str(int(True))),
-        "lite_interpreter": miniutils.quote(str(int(True)))}),
-    IOSJob(XCODE_VERSION, ArchVariant("arm64", "coreml"), extra_props={
-        "use_coreml": miniutils.quote(str(int(True))),
-        "lite_interpreter": miniutils.quote(str(int(True)))}),
 ]


--- a/.circleci/cimodel/data/simple/mobile_definitions.py
+++ b/.circleci/cimodel/data/simple/mobile_definitions.py
@ -4,6 +4,12 @@ PyTorch Mobile PR builds (use linux host toolchain + mobile build options)

 import cimodel.lib.miniutils as miniutils
 import cimodel.data.simple.util.branch_filters
+from cimodel.data.simple.util.docker_constants import (
+    DOCKER_IMAGE_ASAN,
+    DOCKER_REQUIREMENT_ASAN,
+    DOCKER_IMAGE_NDK,
+    DOCKER_REQUIREMENT_NDK
+)


 class MobileJob:
@ -46,6 +52,33 @@ class MobileJob:


 WORKFLOW_DATA = [
+    MobileJob(
+        DOCKER_IMAGE_ASAN,
+        [DOCKER_REQUIREMENT_ASAN],
+        ["build"]
+    ),
+
+    # Use LLVM-DEV toolchain in android-ndk-r19c docker image
+    MobileJob(
+        DOCKER_IMAGE_NDK,
+        [DOCKER_REQUIREMENT_NDK],
+        ["custom", "build", "dynamic"]
+    ),
+
+    MobileJob(
+        DOCKER_IMAGE_NDK,
+        [DOCKER_REQUIREMENT_NDK],
+        ["custom", "build", "static"]
+    ),
+
+    # Use LLVM-DEV toolchain in android-ndk-r19c docker image
+    # Most of this CI is already covered by "mobile-custom-build-dynamic" job
+    MobileJob(
+        DOCKER_IMAGE_NDK,
+        [DOCKER_REQUIREMENT_NDK],
+        ["code", "analysis"],
+        True
+    ),
 ]


--- a/.circleci/cimodel/data/simple/nightly_android.py
+++ b/.circleci/cimodel/data/simple/nightly_android.py
@ -0,0 +1,77 @@
+from cimodel.data.simple.util.docker_constants import (
+    DOCKER_IMAGE_NDK,
+    DOCKER_REQUIREMENT_NDK
+)
+
+
+class AndroidNightlyJob:
+    def __init__(self,
+                 variant,
+                 template_name,
+                 extra_props=None,
+                 with_docker=True,
+                 requires=None,
+                 no_build_suffix=False):
+
+        self.variant = variant
+        self.template_name = template_name
+        self.extra_props = extra_props or {}
+        self.with_docker = with_docker
+        self.requires = requires
+        self.no_build_suffix = no_build_suffix
+
+    def gen_tree(self):
+
+        base_name_parts = [
+            "pytorch",
+            "linux",
+            "xenial",
+            "py3",
+            "clang5",
+            "android",
+            "ndk",
+            "r19c",
+        ] + self.variant
+
+        build_suffix = [] if self.no_build_suffix else ["build"]
+        full_job_name = "_".join(["nightly"] + base_name_parts + build_suffix)
+        build_env_name = "-".join(base_name_parts)
+
+        props_dict = {
+            "name": full_job_name,
+            "requires": self.requires,
+            "filters": {"branches": {"only": "nightly"}},
+        }
+
+        props_dict.update(self.extra_props)
+
+        if self.with_docker:
+            props_dict["docker_image"] = DOCKER_IMAGE_NDK
+            props_dict["build_environment"] = build_env_name
+
+        return [{self.template_name: props_dict}]
+
+BASE_REQUIRES = [DOCKER_REQUIREMENT_NDK]
+
+WORKFLOW_DATA = [
+    AndroidNightlyJob(["x86_32"], "pytorch_linux_build", requires=BASE_REQUIRES),
+    AndroidNightlyJob(["x86_64"], "pytorch_linux_build", requires=BASE_REQUIRES),
+    AndroidNightlyJob(["arm", "v7a"], "pytorch_linux_build", requires=BASE_REQUIRES),
+    AndroidNightlyJob(["arm", "v8a"], "pytorch_linux_build", requires=BASE_REQUIRES),
+    AndroidNightlyJob(["android_gradle"], "pytorch_android_gradle_build",
+                      with_docker=False,
+                      requires=[
+                          "nightly_pytorch_linux_xenial_py3_clang5_android_ndk_r19c_x86_32_build",
+                          "nightly_pytorch_linux_xenial_py3_clang5_android_ndk_r19c_x86_64_build",
+                          "nightly_pytorch_linux_xenial_py3_clang5_android_ndk_r19c_arm_v7a_build",
+                          "nightly_pytorch_linux_xenial_py3_clang5_android_ndk_r19c_arm_v8a_build"]),
+    AndroidNightlyJob(["x86_32_android_publish_snapshot"], "pytorch_android_publish_snapshot",
+                      extra_props={"context": "org-member"},
+                      with_docker=False,
+                      requires=["nightly_pytorch_linux_xenial_py3_clang5_android_ndk_r19c_android_gradle_build"],
+                      no_build_suffix=True),
+]
+
+
+def get_workflow_jobs():
+    return [item.gen_tree() for item in WORKFLOW_DATA]
--- a/.circleci/cimodel/data/simple/nightly_ios.py
+++ b/.circleci/cimodel/data/simple/nightly_ios.py
@ -5,11 +5,9 @@ import cimodel.lib.miniutils as miniutils
 class IOSNightlyJob:
    def __init__(self,
                 variant,
-                 is_full_jit=False,
                 is_upload=False):

        self.variant = variant
-        self.is_full_jit = is_full_jit
        self.is_upload = is_upload

    def get_phase_name(self):
@ -19,11 +17,8 @@ class IOSNightlyJob:

        extra_name_suffix = [self.get_phase_name()] if self.is_upload else []

-        extra_name = ["full_jit"] if self.is_full_jit else []
-
        common_name_pieces = [
            "ios",
-        ] + extra_name + [
        ] + ios_definitions.XCODE_VERSION.render_dots_or_parts(with_version_dots) + [
            "nightly",
            self.variant,
@ -36,8 +31,7 @@ class IOSNightlyJob:
        return "_".join(["pytorch"] + self.get_common_name_pieces(False))

    def gen_tree(self):
-        build_configs = BUILD_CONFIGS_FULL_JIT if self.is_full_jit else BUILD_CONFIGS
-        extra_requires = [x.gen_job_name() for x in build_configs] if self.is_upload else []
+        extra_requires = [x.gen_job_name() for x in BUILD_CONFIGS] if self.is_upload else []

        props_dict = {
            "build_environment": "-".join(["libtorch"] + self.get_common_name_pieces(True)),
@ -53,9 +47,6 @@ class IOSNightlyJob:
            props_dict["use_metal"] = miniutils.quote(str(int(True)))
            props_dict["use_coreml"] = miniutils.quote(str(int(True)))

-        if self.is_full_jit:
-            props_dict["lite_interpreter"] = miniutils.quote(str(int(False)))
-
        template_name = "_".join([
            "binary",
            "ios",
@ -70,14 +61,9 @@ BUILD_CONFIGS = [
    IOSNightlyJob("arm64"),
 ]

-BUILD_CONFIGS_FULL_JIT = [
-    IOSNightlyJob("x86_64", is_full_jit=True),
-    IOSNightlyJob("arm64", is_full_jit=True),
-]

-WORKFLOW_DATA = BUILD_CONFIGS + BUILD_CONFIGS_FULL_JIT + [
-    IOSNightlyJob("binary", is_full_jit=False, is_upload=True),
-    IOSNightlyJob("binary", is_full_jit=True, is_upload=True),
+WORKFLOW_DATA = BUILD_CONFIGS + [
+    IOSNightlyJob("binary", is_upload=True),
 ]


--- a/.circleci/cimodel/data/simple/util/docker_constants.py
+++ b/.circleci/cimodel/data/simple/util/docker_constants.py
@ -11,7 +11,7 @@ def gen_docker_image_requires(image_name):


 DOCKER_IMAGE_BASIC, DOCKER_REQUIREMENT_BASE = gen_docker_image(
-    "pytorch-linux-xenial-py3.7-gcc5.4"
+    "pytorch-linux-xenial-py3.6-gcc5.4"
 )

 DOCKER_IMAGE_CUDA_10_2, DOCKER_REQUIREMENT_CUDA_10_2 = gen_docker_image(
@ -19,7 +19,7 @@ DOCKER_IMAGE_CUDA_10_2, DOCKER_REQUIREMENT_CUDA_10_2 = gen_docker_image(
 )

 DOCKER_IMAGE_GCC7, DOCKER_REQUIREMENT_GCC7 = gen_docker_image(
-    "pytorch-linux-xenial-py3.7-gcc7"
+    "pytorch-linux-xenial-py3.6-gcc7"
 )


--- a/.circleci/cimodel/data/windows_build_definitions.py
+++ b/.circleci/cimodel/data/windows_build_definitions.py
@ -0,0 +1,160 @@
+import cimodel.lib.miniutils as miniutils
+from cimodel.data.simple.util.branch_filters import gen_filter_dict, RC_PATTERN, NON_PR_BRANCH_LIST
+from cimodel.data.simple.util.versions import CudaVersion
+
+
+class WindowsJob:
+    def __init__(
+        self,
+        test_index,
+        vscode_spec,
+        cuda_version,
+        force_on_cpu=False,
+        multi_gpu=False,
+        master_only=False,
+        nightly_only=False,
+        master_and_nightly=False
+    ):
+        self.test_index = test_index
+        self.vscode_spec = vscode_spec
+        self.cuda_version = cuda_version
+        self.force_on_cpu = force_on_cpu
+        self.multi_gpu = multi_gpu
+        self.master_only = master_only
+        self.nightly_only = nightly_only
+        self.master_and_nightly = master_and_nightly
+
+    def gen_tree(self):
+
+        base_phase = "build" if self.test_index is None else "test"
+        numbered_phase = (
+            base_phase if self.test_index is None else base_phase + str(self.test_index)
+        )
+
+        key_parts = ["pytorch", "windows", base_phase]
+        if self.multi_gpu:
+            key_parts.append('multigpu')
+        key_name = "_".join(key_parts)
+
+        cpu_forcing_name_parts = ["on", "cpu"] if self.force_on_cpu else []
+
+        target_arch = self.cuda_version.render_dots() if self.cuda_version else "cpu"
+
+        python_version = "3.8"
+
+        base_name_parts = [
+            "pytorch",
+            "windows",
+            self.vscode_spec.render(),
+            "py" + python_version.replace(".", ""),
+            target_arch,
+        ]
+
+        prerequisite_jobs = []
+        if base_phase == "test":
+            prerequisite_jobs.append("_".join(base_name_parts + ["build"]))
+
+        if self.cuda_version:
+            self.cudnn_version = 8 if self.cuda_version.major == 11 else 7
+
+        arch_env_elements = (
+            ["cuda" + str(self.cuda_version.major) + "." + str(self.cuda_version.minor)]
+            if self.cuda_version
+            else ["cpu"]
+        )
+
+        build_environment_string = "-".join(
+            ["pytorch", "win"]
+            + self.vscode_spec.get_elements()
+            + arch_env_elements
+            + ["py" + python_version.split(".")[0]]
+        )
+
+        is_running_on_cuda = bool(self.cuda_version) and not self.force_on_cpu
+
+        if self.multi_gpu:
+            props_dict = {"requires": prerequisite_jobs}
+        else:
+            props_dict = {
+                "build_environment": build_environment_string,
+                "python_version": miniutils.quote(python_version),
+                "vs_version": miniutils.quote("16.8.6"),
+                "vc_version": miniutils.quote(self.vscode_spec.dotted_version()),
+                "vc_year": miniutils.quote(str(self.vscode_spec.year)),
+                "vc_product": self.vscode_spec.get_product(),
+                "use_cuda": miniutils.quote(str(int(is_running_on_cuda))),
+                "requires": prerequisite_jobs,
+            }
+
+        if self.master_only:
+            props_dict[
+                "filters"
+            ] = gen_filter_dict()
+        elif self.nightly_only:
+            props_dict[
+                "filters"
+            ] = gen_filter_dict(branches_list=["nightly"], tags_list=RC_PATTERN)
+        elif self.master_and_nightly:
+            props_dict[
+                "filters"
+            ] = gen_filter_dict(branches_list=NON_PR_BRANCH_LIST + ["nightly"], tags_list=RC_PATTERN)
+
+        name_parts = base_name_parts + cpu_forcing_name_parts + [numbered_phase]
+
+        if not self.multi_gpu:
+            if base_phase == "test":
+                test_name = "-".join(["pytorch", "windows", numbered_phase])
+                props_dict["test_name"] = test_name
+
+                if is_running_on_cuda:
+                    props_dict["executor"] = "windows-with-nvidia-gpu"
+
+            props_dict["cuda_version"] = (
+                miniutils.quote(str(self.cuda_version))
+                if self.cuda_version
+                else "cpu"
+            )
+
+        props_dict["name"] = "_".join(name_parts)
+
+        return [{key_name: props_dict}]
+
+
+class VcSpec:
+    def __init__(self, year, version_elements=None, hide_version=False):
+        self.year = year
+        self.version_elements = version_elements or []
+        self.hide_version = hide_version
+
+    def get_elements(self):
+        if self.hide_version:
+            return [self.prefixed_year()]
+        return [self.prefixed_year()] + self.version_elements
+
+    def get_product(self):
+        return "BuildTools"
+
+    def dotted_version(self):
+        return ".".join(self.version_elements)
+
+    def prefixed_year(self):
+        return "vs" + str(self.year)
+
+    def render(self):
+        return "_".join(self.get_elements())
+
+_VC2019 = VcSpec(2019)
+
+WORKFLOW_DATA = [
+    # VS2019 CUDA-10.2
+    WindowsJob(None, _VC2019, CudaVersion(10, 2), master_only=True),
+    # VS2019 CUDA-10.2 force on cpu
+    WindowsJob(1, _VC2019, CudaVersion(10, 2), force_on_cpu=True, master_only=True),
+
+    # TODO: This test is disabled due to https://github.com/pytorch/pytorch/issues/59724
+    # WindowsJob('_azure_multi_gpu', _VC2019, CudaVersion(11, 1), multi_gpu=True, master_and_nightly=True),
+]
+
+
+def get_windows_workflows():
+    return [item.gen_tree() for item in WORKFLOW_DATA]
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
--- a/.circleci/docker/android/build.gradle
+++ b/.circleci/docker/android/build.gradle
@ -51,9 +51,9 @@ android {
 dependencies {
    implementation 'com.android.support:appcompat-v7:28.0.0'
    implementation 'androidx.appcompat:appcompat:1.0.0'
-    implementation 'com.facebook.fbjni:fbjni-java-only:0.2.2'
+    implementation 'com.facebook.fbjni:fbjni-java-only:0.0.3'
    implementation 'com.google.code.findbugs:jsr305:3.0.1'
-    implementation 'com.facebook.soloader:nativeloader:0.10.1'
+    implementation 'com.facebook.soloader:nativeloader:0.8.0'

    implementation 'junit:junit:' + rootProject.junitVersion
    implementation 'androidx.test:core:' + rootProject.coreVersion
--- a/.circleci/docker/build.sh
+++ b/.circleci/docker/build.sh
@ -82,8 +82,8 @@ case "$image" in
    GCC_VERSION=7
    # Do not install PROTOBUF, DB, and VISION as a test
    ;;
-  pytorch-linux-xenial-py3.7-gcc5.4)
-    ANACONDA_PYTHON_VERSION=3.7
+  pytorch-linux-xenial-py3.6-gcc5.4)
+    ANACONDA_PYTHON_VERSION=3.6
    CMAKE_VERSION=3.10.3
    GCC_VERSION=5
    PROTOBUF=yes
@ -91,14 +91,14 @@ case "$image" in
    VISION=yes
    KATEX=yes
    ;;
-  pytorch-linux-xenial-py3.7-gcc7.2)
-    ANACONDA_PYTHON_VERSION=3.7
+  pytorch-linux-xenial-py3.6-gcc7.2)
+    ANACONDA_PYTHON_VERSION=3.6
    CMAKE_VERSION=3.10.3
    GCC_VERSION=7
    # Do not install PROTOBUF, DB, and VISION as a test
    ;;
-  pytorch-linux-xenial-py3.7-gcc7)
-    ANACONDA_PYTHON_VERSION=3.7
+  pytorch-linux-xenial-py3.6-gcc7)
+    ANACONDA_PYTHON_VERSION=3.6
    CMAKE_VERSION=3.10.3
    GCC_VERSION=7
    PROTOBUF=yes
@ -108,7 +108,7 @@ case "$image" in
  pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7)
    CUDA_VERSION=10.2
    CUDNN_VERSION=7
-    ANACONDA_PYTHON_VERSION=3.7
+    ANACONDA_PYTHON_VERSION=3.6
    CMAKE_VERSION=3.10.3
    GCC_VERSION=7
    PROTOBUF=yes
@ -119,7 +119,7 @@ case "$image" in
  pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7)
    CUDA_VERSION=11.1
    CUDNN_VERSION=8
-    ANACONDA_PYTHON_VERSION=3.7
+    ANACONDA_PYTHON_VERSION=3.6
    CMAKE_VERSION=3.10.3
    GCC_VERSION=7
    PROTOBUF=yes
@ -130,19 +130,7 @@ case "$image" in
  pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7)
    CUDA_VERSION=11.3.0 # Deviating from major.minor to conform to nvidia's Docker image names
    CUDNN_VERSION=8
-    TENSORRT_VERSION=8.0.1.6
-    ANACONDA_PYTHON_VERSION=3.7
-    CMAKE_VERSION=3.10.3
-    GCC_VERSION=7
-    PROTOBUF=yes
-    DB=yes
-    VISION=yes
-    KATEX=yes
-    ;;
-  pytorch-linux-bionic-cuda11.5-cudnn8-py3-gcc7)
-    CUDA_VERSION=11.5.0
-    CUDNN_VERSION=8
-    ANACONDA_PYTHON_VERSION=3.7
+    ANACONDA_PYTHON_VERSION=3.6
    CMAKE_VERSION=3.10.3
    GCC_VERSION=7
    PROTOBUF=yes
@ -151,15 +139,15 @@ case "$image" in
    KATEX=yes
    ;;
  pytorch-linux-xenial-py3-clang5-asan)
-    ANACONDA_PYTHON_VERSION=3.7
+    ANACONDA_PYTHON_VERSION=3.6
    CLANG_VERSION=5.0
-    CMAKE_VERSION=3.13.5
+    CMAKE_VERSION=3.10.3
    PROTOBUF=yes
    DB=yes
    VISION=yes
    ;;
  pytorch-linux-xenial-py3-clang7-asan)
-    ANACONDA_PYTHON_VERSION=3.7
+    ANACONDA_PYTHON_VERSION=3.6
    CLANG_VERSION=7
    CMAKE_VERSION=3.10.3
    PROTOBUF=yes
@ -167,7 +155,7 @@ case "$image" in
    VISION=yes
    ;;
  pytorch-linux-xenial-py3-clang7-onnx)
-    ANACONDA_PYTHON_VERSION=3.7
+    ANACONDA_PYTHON_VERSION=3.6
    CLANG_VERSION=7
    CMAKE_VERSION=3.10.3
    PROTOBUF=yes
@ -175,9 +163,9 @@ case "$image" in
    VISION=yes
    ;;
  pytorch-linux-xenial-py3-clang5-android-ndk-r19c)
-    ANACONDA_PYTHON_VERSION=3.7
+    ANACONDA_PYTHON_VERSION=3.6
    CLANG_VERSION=5.0
-    CMAKE_VERSION=3.13.5
+    CMAKE_VERSION=3.10.3
    LLVMDEV=yes
    PROTOBUF=yes
    ANDROID=yes
@ -185,16 +173,16 @@ case "$image" in
    GRADLE_VERSION=6.8.3
    NINJA_VERSION=1.9.0
    ;;
-  pytorch-linux-xenial-py3.7-clang7)
-    ANACONDA_PYTHON_VERSION=3.7
+  pytorch-linux-xenial-py3.6-clang7)
+    ANACONDA_PYTHON_VERSION=3.6
    CMAKE_VERSION=3.10.3
    CLANG_VERSION=7
    PROTOBUF=yes
    DB=yes
    VISION=yes
    ;;
-  pytorch-linux-bionic-py3.7-clang9)
-    ANACONDA_PYTHON_VERSION=3.7
+  pytorch-linux-bionic-py3.6-clang9)
+    ANACONDA_PYTHON_VERSION=3.6
    CLANG_VERSION=9
    PROTOBUF=yes
    DB=yes
@ -209,10 +197,10 @@ case "$image" in
    DB=yes
    VISION=yes
    ;;
-  pytorch-linux-bionic-cuda10.2-cudnn7-py3.7-clang9)
+  pytorch-linux-bionic-cuda10.2-cudnn7-py3.6-clang9)
    CUDA_VERSION=10.2
    CUDNN_VERSION=7
-    ANACONDA_PYTHON_VERSION=3.7
+    ANACONDA_PYTHON_VERSION=3.6
    CLANG_VERSION=9
    PROTOBUF=yes
    DB=yes
@ -227,32 +215,40 @@ case "$image" in
    DB=yes
    VISION=yes
    ;;
-  pytorch-linux-bionic-cuda11.0-cudnn8-py3.7-gcc9)
+  pytorch-linux-bionic-cuda11.0-cudnn8-py3.6-gcc9)
    CUDA_VERSION=11.0
    CUDNN_VERSION=8
-    ANACONDA_PYTHON_VERSION=3.7
+    ANACONDA_PYTHON_VERSION=3.6
    GCC_VERSION=9
    PROTOBUF=yes
    DB=yes
    VISION=yes
    ROCM_VERSION=3.9
    ;;
-  pytorch-linux-bionic-rocm4.3.1-py3.7)
-    ANACONDA_PYTHON_VERSION=3.7
+  pytorch-linux-bionic-rocm4.1-py3.6)
+    ANACONDA_PYTHON_VERSION=3.6
+    GCC_VERSION=9
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    ROCM_VERSION=4.1
+    ;;
+  pytorch-linux-bionic-rocm4.2-py3.6)
+    ANACONDA_PYTHON_VERSION=3.6
+    GCC_VERSION=9
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    ROCM_VERSION=4.2
+    ;;
+  pytorch-linux-bionic-rocm4.3.1-py3.6)
+    ANACONDA_PYTHON_VERSION=3.6
    GCC_VERSION=9
    PROTOBUF=yes
    DB=yes
    VISION=yes
    ROCM_VERSION=4.3.1
    ;;
-  pytorch-linux-bionic-rocm4.5-py3.7)
-    ANACONDA_PYTHON_VERSION=3.7
-    GCC_VERSION=9
-    PROTOBUF=yes
-    DB=yes
-    VISION=yes
-    ROCM_VERSION=4.5.2
-    ;;
  *)
    # Catch-all for builds that are not hardcoded.
    PROTOBUF=yes
@ -298,16 +294,6 @@ fi

 tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')

-# If we are trying to use nvidia cuda image make sure it exists, otherwise use IMAGE from ghcr.io
-# this logic currently only exists for ubuntu
-if [[ "$image" == *cuda*  && ${OS} == "ubuntu" ]]; then
-  IMAGE_NAME="nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu${UBUNTU_VERSION}"
-  if ! DOCKER_CLI_EXPERIMENTAL=enabled docker manifest inspect "${IMAGE_NAME}" >/dev/null 2>/dev/null; then
-    IMAGE_NAME="ghcr.io/pytorch/nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}"
-    INSTALL_CUDNN="True"
-  fi
-fi
-
 # Build image
 # TODO: build-arg THRIFT is not turned on for any image, remove it once we confirm
 # it's no longer needed.
@ -334,7 +320,6 @@ docker build \
       --build-arg "GCC_VERSION=${GCC_VERSION}" \
       --build-arg "CUDA_VERSION=${CUDA_VERSION}" \
       --build-arg "CUDNN_VERSION=${CUDNN_VERSION}" \
-       --build-arg "TENSORRT_VERSION=${TENSORRT_VERSION}" \
       --build-arg "ANDROID=${ANDROID}" \
       --build-arg "ANDROID_NDK=${ANDROID_NDK_VERSION}" \
       --build-arg "GRADLE_VERSION=${GRADLE_VERSION}" \
@ -344,9 +329,6 @@ docker build \
       --build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
       --build-arg "KATEX=${KATEX:-}" \
       --build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
-       --build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx900;gfx906}" \
-       --build-arg "IMAGE_NAME=${IMAGE_NAME}" \
-       --build-arg "INSTALL_CUDNN=${INSTALL_CUDNN}" \
       -f $(dirname ${DOCKERFILE})/Dockerfile \
       -t "$tmp_tag" \
       "$@" \
@ -365,7 +347,6 @@ function drun() {
 }

 if [[ "$OS" == "ubuntu" ]]; then
-
  if !(drun lsb_release -a 2>&1 | grep -qF Ubuntu); then
    echo "OS=ubuntu, but:"
    drun lsb_release -a
--- a/.circleci/docker/build_docker.sh
+++ b/.circleci/docker/build_docker.sh
@ -26,14 +26,11 @@ login() {
    docker login -u AWS --password-stdin "$1"
 }

+# Retry on timeouts (can happen on job stampede).
+retry login "${registry}"

-# Only run these steps if not on github actions
-if [[ -z "${GITHUB_ACTIONS}" ]]; then
-  # Retry on timeouts (can happen on job stampede).
-  retry login "${registry}"
-  # Logout on exit
-  trap "docker logout ${registry}" EXIT
-fi
+# Logout on exit
+trap "docker logout ${registry}" EXIT

 # export EC2=1
 # export JENKINS=1
@ -48,8 +45,8 @@ fi

 docker push "${image}:${tag}"

+docker save -o "${IMAGE_NAME}:${tag}.tar" "${image}:${tag}"
+
 if [ -z "${DOCKER_SKIP_S3_UPLOAD:-}" ]; then
-  trap "rm -rf ${IMAGE_NAME}:${tag}.tar" EXIT
-  docker save -o "${IMAGE_NAME}:${tag}.tar" "${image}:${tag}"
  aws s3 cp "${IMAGE_NAME}:${tag}.tar" "s3://ossci-linux-build/pytorch/base/${IMAGE_NAME}:${tag}.tar" --acl public-read
 fi
--- a/.circleci/docker/centos-rocm/Dockerfile
+++ b/.circleci/docker/centos-rocm/Dockerfile
@ -4,10 +4,6 @@ FROM centos:${CENTOS_VERSION}

 ARG CENTOS_VERSION

-# Set AMD gpu targets to build for
-ARG PYTORCH_ROCM_ARCH
-ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
-
 # Install required packages to build Caffe2

 # Install common dependencies (so that this step can be cached separately)
@ -15,12 +11,6 @@ ARG EC2
 ADD ./common/install_base.sh install_base.sh
 RUN bash ./install_base.sh && rm install_base.sh

-# Update CentOS git version
-RUN yum -y remove git
-RUN yum -y remove git-*
-RUN yum -y install https://packages.endpoint.com/rhel/7/os/x86_64/endpoint-repo-1.9-1.x86_64.rpm
-RUN yum install -y git
-
 # Install devtoolset
 ARG DEVTOOLSET_VERSION
 ADD ./common/install_devtoolset.sh install_devtoolset.sh
@ -37,7 +27,7 @@ RUN rm install_glibc.sh
 ADD ./common/install_user.sh install_user.sh
 RUN bash ./install_user.sh && rm install_user.sh

-# Install conda and other packages (e.g., numpy, pytest)
+# Install conda and other packages (e.g., numpy, coverage, pytest)
 ENV PATH /opt/conda/bin:$PATH
 ARG ANACONDA_PYTHON_VERSION
 ADD ./common/install_conda.sh install_conda.sh
--- a/.circleci/docker/common/install_base.sh
+++ b/.circleci/docker/common/install_base.sh
@ -11,13 +11,8 @@ install_ubuntu() {
  #   "$UBUNTU_VERSION" == "18.04"
  if [[ "$UBUNTU_VERSION" == "18.04"* ]]; then
    cmake3="cmake=3.10*"
-    maybe_libiomp_dev="libiomp-dev"
-  elif [[ "$UBUNTU_VERSION" == "20.04"* ]]; then
-    cmake3="cmake=3.16*"
-    maybe_libiomp_dev=""
  else
    cmake3="cmake=3.5*"
-    maybe_libiomp_dev="libiomp-dev"
  fi

  # Install common dependencies
@ -38,7 +33,7 @@ install_ubuntu() {
    git \
    libatlas-base-dev \
    libc6-dbg \
-    ${maybe_libiomp_dev} \
+    libiomp-dev \
    libyaml-dev \
    libz-dev \
    libjpeg-dev \
@ -49,10 +44,6 @@ install_ubuntu() {
    wget \
    vim

-  # Should resolve issues related to various apt package repository cert issues
-  # see: https://github.com/pytorch/pytorch/issues/65931
-  apt-get install -y libgnutls30
-
  # Cleanup package manager
  apt-get autoclean && apt-get clean
  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
@ -118,11 +109,14 @@ esac
 # Install Valgrind separately since the apt-get version is too old.
 mkdir valgrind_build && cd valgrind_build
 VALGRIND_VERSION=3.16.1
-wget https://ossci-linux.s3.amazonaws.com/valgrind-${VALGRIND_VERSION}.tar.bz2
+if ! wget http://valgrind.org/downloads/valgrind-${VALGRIND_VERSION}.tar.bz2
+then
+  wget https://sourceware.org/ftp/valgrind/valgrind-${VALGRIND_VERSION}.tar.bz2
+fi
 tar -xjf valgrind-${VALGRIND_VERSION}.tar.bz2
 cd valgrind-${VALGRIND_VERSION}
 ./configure --prefix=/usr/local
-make -j6
+make -j 4
 sudo make install
 cd ../../
 rm -rf valgrind_build
--- a/.circleci/docker/common/install_conda.sh
+++ b/.circleci/docker/common/install_conda.sh
@ -13,12 +13,7 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
      CONDA_FILE="Miniconda2-latest-Linux-x86_64.sh"
    ;;
    3)
-      if [ "$ANACONDA_PYTHON_VERSION" = "3.6" ]; then
-        # Latest release of Conda that still supports python-3.6
-        CONDA_FILE="Miniconda3-py37_4.10.3-Linux-x86_64.sh"
-      else
-        CONDA_FILE="Miniconda3-latest-Linux-x86_64.sh"
-      fi
+      CONDA_FILE="Miniconda3-latest-Linux-x86_64.sh"
    ;;
    *)
      echo "Unsupported ANACONDA_PYTHON_VERSION: $ANACONDA_PYTHON_VERSION"
@ -61,9 +56,7 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
  pushd /opt/conda

  # Track latest conda update
-  if [ "$ANACONDA_PYTHON_VERSION" != "3.6" ]; then
-    as_jenkins conda update -y -n base conda
-  fi
+  as_jenkins conda update -y -n base conda

  # Install correct Python version
  as_jenkins conda install -y python="$ANACONDA_PYTHON_VERSION"
@ -93,10 +86,14 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
    conda_install numpy=1.18.5 astunparse pyyaml mkl mkl-include setuptools cffi future six dataclasses typing_extensions
  fi

-  # Magma package names are concatenation of CUDA major and minor ignoring revision
-  # I.e. magma-cuda102 package corresponds to CUDA_VERSION=10.2 and CUDA_VERSION=10.2.89
-  if [ -n "$CUDA_VERSION" ]; then
-    conda_install magma-cuda$(TMP=${CUDA_VERSION/./};echo ${TMP%.*[0-9]}) -c pytorch
+  if [[ "$CUDA_VERSION" == 10.2* ]]; then
+    conda_install magma-cuda102 -c pytorch
+  elif [[ "$CUDA_VERSION" == 11.0* ]]; then
+    conda_install magma-cuda110 -c pytorch
+  elif [[ "$CUDA_VERSION" == 11.1* ]]; then
+    conda_install magma-cuda111 -c pytorch
+  elif [[ "$CUDA_VERSION" == 11.3* ]]; then
+    conda_install magma-cuda113 -c pytorch
  fi

  # TODO: This isn't working atm
@ -106,12 +103,14 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
  # TODO: Why is scipy pinned
  # Pin MyPy version because new errors are likely to appear with each release
  # Pin hypothesis to avoid flakiness: https://github.com/pytorch/pytorch/issues/31136
+  # Pin coverage so we can use COVERAGE_RCFILE
  as_jenkins pip install --progress-bar off pytest \
    scipy==$SCIPY_VERSION \
    scikit-image \
    psutil \
    unittest-xml-reporting \
    boto3==1.16.34 \
+    coverage==5.5 \
    hypothesis==4.53.2 \
    expecttest==0.1.3 \
    mypy==0.812 \
@ -120,7 +119,7 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
  # Install numba only on python-3.8 or below
  # For numba issue see https://github.com/pytorch/pytorch/issues/51511
  if [[ $(python -c "import sys; print(int(sys.version_info < (3, 9)))") == "1" ]]; then
-    as_jenkins pip install --progress-bar off numba==0.54.1 librosa>=0.6.2
+    as_jenkins pip install --progress-bar off numba librosa>=0.6.2
  else
    as_jenkins pip install --progress-bar off numba==0.49.0 librosa>=0.6.2
  fi
--- a/.circleci/docker/common/install_cudnn8.sh
+++ b/.circleci/docker/common/install_cudnn8.sh
@ -1,10 +0,0 @@
-#!/bin/bash
-
-sudo apt-get update
-# also install ssh to avoid error of:
-# --------------------------------------------------------------------------
-# The value of the MCA parameter "plm_rsh_agent" was set to a path
-# that could not be found:
-#   plm_rsh_agent: ssh : rsh
-sudo apt-get install -y ssh
-sudo apt-get update && apt-get install -y --no-install-recommends libcudnn8=8.2.0.53-1+cuda11.3 libcudnn8-dev=8.2.0.53-1+cuda11.3 && apt-mark hold libcudnn8
--- a/.circleci/docker/common/install_gcc.sh
+++ b/.circleci/docker/common/install_gcc.sh
@ -7,18 +7,15 @@ if [ -n "$GCC_VERSION" ]; then
  # Need the official toolchain repo to get alternate packages
  add-apt-repository ppa:ubuntu-toolchain-r/test
  apt-get update
-  if [[ "$UBUNTU_VERSION" == "16.04" && "${GCC_VERSION:0:1}" == "5" ]]; then
+  if [ "$UBUNTU_VERSION" = "16.04" -a "$GCC_VERSION" = "5" ]; then
    apt-get install -y g++-5=5.4.0-6ubuntu1~16.04.12
-    update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 50
-    update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-5 50
-    update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-5 50
  else
    apt-get install -y g++-$GCC_VERSION
-    update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-"$GCC_VERSION" 50
-    update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-"$GCC_VERSION" 50
-    update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-"$GCC_VERSION" 50
  fi

+  update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-"$GCC_VERSION" 50
+  update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-"$GCC_VERSION" 50
+  update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-"$GCC_VERSION" 50

  # Cleanup package manager
  apt-get autoclean && apt-get clean
--- a/.circleci/docker/common/install_openssl.sh
+++ b/.circleci/docker/common/install_openssl.sh
@ -4,11 +4,11 @@ set -ex

 OPENSSL=openssl-1.1.1k

-wget -q -O "${OPENSSL}.tar.gz" "https://ossci-linux.s3.amazonaws.com/${OPENSSL}.tar.gz"
+wget -q -O "${OPENSSL}.tar.gz" "https://www.openssl.org/source/${OPENSSL}.tar.gz"
 tar xf "${OPENSSL}.tar.gz"
 cd "${OPENSSL}"
 ./config --prefix=/opt/openssl -d '-Wl,--enable-new-dtags,-rpath,$(LIBRPATH)'
-# NOTE: openssl install errors out when built with the -j option
-make -j6; make install_sw
+# NOTE: opensl errors out when built with the -j option
+make install_sw
 cd ..
 rm -rf "${OPENSSL}"
--- a/.circleci/docker/common/install_protobuf.sh
+++ b/.circleci/docker/common/install_protobuf.sh
@ -14,9 +14,9 @@ install_protobuf_317() {

  curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v3.17.3/protobuf-all-3.17.3.tar.gz"
  tar -xvz -C "$pb_dir" --strip-components 1 -f protobuf-all-3.17.3.tar.gz
-  # -j6 to balance memory usage and speed.
+  # -j2 to balance memory usage and speed.
  # naked `-j` seems to use too much memory.
-  pushd "$pb_dir" && ./configure && make -j6 && make -j6 check && sudo make -j6 install && sudo ldconfig
+  pushd "$pb_dir" && ./configure && make -j2 && make -j2 check && sudo make -j2 install && sudo ldconfig
  popd
  rm -rf $pb_dir
 }
--- a/.circleci/docker/common/install_rocm.sh
+++ b/.circleci/docker/common/install_rocm.sh
@ -4,27 +4,22 @@ set -ex

 install_magma() {
    # "install" hipMAGMA into /opt/rocm/magma by copying after build
-    git clone https://bitbucket.org/icl/magma.git
+    git clone https://bitbucket.org/icl/magma.git -b magma_ctrl_launch_bounds
    pushd magma
-    # fix for magma_queue memory leak issue
-    git checkout c62d700d880c7283b33fb1d615d62fc9c7f7ca21
+    # The branch "magma_ctrl_launch_bounds" is having a fix over the below commit, so keeping the below comment for reference.
+    #git checkout 878b1ce02e9cfe4a829be22c8f911e9c0b6bd88f
+    # Work around non-asii characters in certain magma sources; remove this after upstream magma fixes this.
+    perl -i.bak -pe 's/[^[:ascii:]]//g' sparse/control/magma_zfree.cpp
+    perl -i.bak -pe 's/[^[:ascii:]]//g' sparse/control/magma_zsolverinfo.cpp
    cp make.inc-examples/make.inc.hip-gcc-mkl make.inc
    echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc
    echo 'LIB += -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib -Wl,--rpath,$(MKLROOT)/lib -Wl,--rpath,/opt/rocm/magma/lib' >> make.inc
-    echo 'DEVCCFLAGS += --gpu-max-threads-per-block=256' >> make.inc
-    export PATH="${PATH}:/opt/rocm/bin"
-    if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then
-      amdgpu_targets=`echo $PYTORCH_ROCM_ARCH | sed 's/;/ /g'`
-    else
-      amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs`
-    fi
-    for arch in $amdgpu_targets; do
-      echo "DEVCCFLAGS += --amdgpu-target=$arch" >> make.inc
-    done
+    echo 'DEVCCFLAGS += --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906 --amdgpu-target=gfx908 --gpu-max-threads-per-block=256' >> make.inc
    # hipcc with openmp flag may cause isnan() on __device__ not to be found; depending on context, compiler may attempt to match with host definition
    sed -i 's/^FOPENMP/#FOPENMP/g' make.inc
+    export PATH="${PATH}:/opt/rocm/bin"
    make -f make.gen.hipMAGMA -j $(nproc)
-    LANG=C.UTF-8 make lib/libmagma.so -j $(nproc) MKLROOT=/opt/conda
+    make lib/libmagma.so -j $(nproc) MKLROOT=/opt/conda
    make testing/testing_dgemm -j $(nproc) MKLROOT=/opt/conda
    popd
    mv magma /opt/rocm
@ -34,19 +29,12 @@ ver() {
    printf "%3d%03d%03d%03d" $(echo "$1" | tr '.' ' ');
 }

-# Map ROCm version to AMDGPU version
-declare -A AMDGPU_VERSIONS=( ["4.5.2"]="21.40.2" )
-
 install_ubuntu() {
    apt-get update
    if [[ $UBUNTU_VERSION == 18.04 ]]; then
      # gpg-agent is not available by default on 18.04
      apt-get install -y --no-install-recommends gpg-agent
    fi
-    if [[ $UBUNTU_VERSION == 20.04 ]]; then
-      # gpg-agent is not available by default on 20.04
-      apt-get install -y --no-install-recommends gpg-agent
-    fi
    apt-get install -y kmod
    apt-get install -y wget

@ -54,13 +42,6 @@ install_ubuntu() {
    apt-get install -y libc++1
    apt-get install -y libc++abi1

-    if [[ $(ver $ROCM_VERSION) -ge $(ver 4.5) ]]; then
-        # Add amdgpu repository
-        UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'`
-        local amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/ubuntu"
-        echo "deb [arch=amd64] ${amdgpu_baseurl} ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list
-    fi
-
    ROCM_REPO="ubuntu"
    if [[ $(ver $ROCM_VERSION) -lt $(ver 4.2) ]]; then
        ROCM_REPO="xenial"
@ -68,8 +49,7 @@ install_ubuntu() {

    # Add rocm repository
    wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
-    local rocm_baseurl="http://repo.radeon.com/rocm/apt/${ROCM_VERSION}"
-    echo "deb [arch=amd64] ${rocm_baseurl} ${ROCM_REPO} main" > /etc/apt/sources.list.d/rocm.list
+    echo "deb [arch=amd64] http://repo.radeon.com/rocm/apt/${ROCM_VERSION} ${ROCM_REPO} main" > /etc/apt/sources.list.d/rocm.list
    apt-get update --allow-insecure-repositories

    DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
@ -106,24 +86,11 @@ install_centos() {
  yum install -y epel-release
  yum install -y dkms kernel-headers-`uname -r` kernel-devel-`uname -r`

-  if [[ $(ver $ROCM_VERSION) -ge $(ver 4.5) ]]; then
-      # Add amdgpu repository
-      local amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/rhel/7.9/main/x86_64"
-      echo "[AMDGPU]" > /etc/yum.repos.d/amdgpu.repo
-      echo "name=AMDGPU" >> /etc/yum.repos.d/amdgpu.repo
-      echo "baseurl=${amdgpu_baseurl}" >> /etc/yum.repos.d/amdgpu.repo
-      echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo
-      echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo
-      echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo
-  fi
-
-  local rocm_baseurl="http://repo.radeon.com/rocm/yum/${ROCM_VERSION}"
  echo "[ROCm]" > /etc/yum.repos.d/rocm.repo
  echo "name=ROCm" >> /etc/yum.repos.d/rocm.repo
-  echo "baseurl=${rocm_baseurl}" >> /etc/yum.repos.d/rocm.repo
+  echo "baseurl=http://repo.radeon.com/rocm/yum/${ROCM_VERSION}" >> /etc/yum.repos.d/rocm.repo
  echo "enabled=1" >> /etc/yum.repos.d/rocm.repo
-  echo "gpgcheck=1" >> /etc/yum.repos.d/rocm.repo
-  echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/rocm.repo
+  echo "gpgcheck=0" >> /etc/yum.repos.d/rocm.repo

  yum update -y

--- a/.circleci/docker/common/install_tensorrt.sh
+++ b/.circleci/docker/common/install_tensorrt.sh
@ -1,7 +0,0 @@
-#!/bin/bash
-
-if [ -n "$TENSORRT_VERSION" ]; then
-    python3 -m pip install --upgrade setuptools pip
-    python3 -m pip install nvidia-pyindex
-    python3 -m pip install nvidia-tensorrt==${TENSORRT_VERSION} --extra-index-url https://pypi.ngc.nvidia.com
-fi
--- a/.circleci/docker/ubuntu-cuda/Dockerfile
+++ b/.circleci/docker/ubuntu-cuda/Dockerfile
@ -1,14 +1,12 @@
 ARG UBUNTU_VERSION
 ARG CUDA_VERSION
 ARG CUDNN_VERSION
-ARG IMAGE_NAME
-
-FROM ${IMAGE_NAME}
-
-ARG UBUNTU_VERSION
-ARG CUDA_VERSION
-ARG CUDNN_VERSION

+FROM nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu${UBUNTU_VERSION}
+
+ARG UBUNTU_VERSION
+ARG CUDA_VERSION
+ARG CUDNN_VERSION

 ENV DEBIAN_FRONTEND noninteractive

@ -26,7 +24,7 @@ ARG KATEX
 ADD ./common/install_katex.sh install_katex.sh
 RUN bash ./install_katex.sh && rm install_katex.sh

-# Install conda and other packages (e.g., numpy, pytest)
+# Install conda and other packages (e.g., numpy, coverage, pytest)
 ENV PATH /opt/conda/bin:$PATH
 ARG ANACONDA_PYTHON_VERSION
 ADD ./common/install_conda.sh install_conda.sh
@ -67,12 +65,6 @@ ADD ./common/install_openssl.sh install_openssl.sh
 ENV OPENSSL_ROOT_DIR /opt/openssl
 RUN bash ./install_openssl.sh

-# (optional) Install TensorRT
-ARG TENSORRT_VERSION
-ADD ./common/install_tensorrt.sh install_tensorrt.sh
-RUN if [ -n "${TENSORRT_VERSION}" ]; then bash ./install_tensorrt.sh; fi
-RUN rm install_tensorrt.sh
-
 # (optional) Install non-default CMake version
 ARG CMAKE_VERSION
 ADD ./common/install_cmake.sh install_cmake.sh
@ -83,7 +75,7 @@ RUN rm install_cmake.sh
 ADD ./common/install_cache.sh install_cache.sh
 ENV PATH /opt/cache/bin:$PATH
 RUN bash ./install_cache.sh && rm install_cache.sh
-ENV CMAKE_CUDA_COMPILER_LAUNCHER=/opt/cache/bin/sccache
+ENV CUDA_NVCC_EXECUTABLE=/opt/cache/lib/nvcc

 # Add jni.h for java host build
 ADD ./common/install_jni.sh install_jni.sh
@ -102,17 +94,9 @@ ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
 # AWS specific CUDA build guidance
 ENV TORCH_CUDA_ARCH_LIST Maxwell
 ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all"
-ENV CUDA_PATH /usr/local/cuda

 # Install LLVM dev version (Defined in the pytorch/builder github repository)
 COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm

-# Hack for CUDA 11.5.0 image to install cudnn8 since cudnn8 is not included with CUDA 11.5 image
-# Also note cudnn 8.2.0.53 is labeled for cuda 11.3
-ARG INSTALL_CUDNN
-ADD ./common/install_cudnn8.sh install_cudnn8.sh
-RUN if [ -n "${INSTALL_CUDNN}" ]; then bash install_cudnn8.sh; fi
-RUN rm install_cudnn8.sh
-
 USER jenkins
 CMD ["bash"]
--- a/.circleci/docker/ubuntu-rocm/Dockerfile
+++ b/.circleci/docker/ubuntu-rocm/Dockerfile
@ -6,10 +6,6 @@ ARG UBUNTU_VERSION

 ENV DEBIAN_FRONTEND noninteractive

-# Set AMD gpu targets to build for
-ARG PYTORCH_ROCM_ARCH
-ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
-
 # Install common dependencies (so that this step can be cached separately)
 ARG EC2
 ADD ./common/install_base.sh install_base.sh
@ -25,7 +21,7 @@ RUN bash ./install_clang.sh && rm install_clang.sh
 ADD ./common/install_user.sh install_user.sh
 RUN bash ./install_user.sh && rm install_user.sh

-# Install conda and other packages (e.g., numpy, pytest)
+# Install conda and other packages (e.g., numpy, coverage, pytest)
 ENV PATH /opt/conda/bin:$PATH
 ARG ANACONDA_PYTHON_VERSION
 ADD ./common/install_conda.sh install_conda.sh
--- a/.circleci/docker/ubuntu/Dockerfile
+++ b/.circleci/docker/ubuntu/Dockerfile
@ -33,7 +33,7 @@ ARG KATEX
 ADD ./common/install_katex.sh install_katex.sh
 RUN bash ./install_katex.sh && rm install_katex.sh

-# Install conda and other packages (e.g., numpy, pytest)
+# Install conda and other packages (e.g., numpy, coverage, pytest)
 ENV PATH /opt/conda/bin:$PATH
 ARG ANACONDA_PYTHON_VERSION
 ADD ./common/install_conda.sh install_conda.sh
--- a/.circleci/ecr_gc_docker/Dockerfile
+++ b/.circleci/ecr_gc_docker/Dockerfile
@ -0,0 +1,13 @@
+FROM ubuntu:18.04
+
+RUN apt-get update && apt-get install -y python3-pip git && rm -rf /var/lib/apt/lists/* /var/log/dpkg.log
+
+ADD requirements.txt /requirements.txt
+
+RUN pip3 install -r /requirements.txt
+
+ADD gc.py /usr/bin/gc.py
+
+ADD docker_hub.py /usr/bin/docker_hub.py
+
+ENTRYPOINT ["/usr/bin/gc.py"]
--- a/.circleci/ecr_gc_docker/docker_hub.py
+++ b/.circleci/ecr_gc_docker/docker_hub.py
@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+
+from collections import namedtuple
+
+import boto3
+import requests
+import os
+
+
+IMAGE_INFO = namedtuple(
+    "IMAGE_INFO", ("repo", "tag", "size", "last_updated_at", "last_updated_by")
+)
+
+
+def build_access_token(username, passwordtr):
+    r = requests.post(
+        "https://hub.docker.com/v2/users/login/",
+        data={"username": username, "password": password},
+    )
+    r.raise_for_status()
+    token = r.json().get("token")
+    return {"Authorization": "JWT " + token}
+
+
+def list_repos(user, token):
+    r = requests.get("https://hub.docker.com/v2/repositories/" + user, headers=token)
+    r.raise_for_status()
+    ret = sorted(
+        repo["user"] + "/" + repo["name"] for repo in r.json().get("results", [])
+    )
+    if ret:
+        print("repos found:")
+        print("".join("\n\t" + r for r in ret))
+    return ret
+
+
+def list_tags(repo, token):
+    r = requests.get(
+        "https://hub.docker.com/v2/repositories/" + repo + "/tags", headers=token
+    )
+    r.raise_for_status()
+    return [
+        IMAGE_INFO(
+            repo=repo,
+            tag=t["name"],
+            size=t["full_size"],
+            last_updated_at=t["last_updated"],
+            last_updated_by=t["last_updater_username"],
+        )
+        for t in r.json().get("results", [])
+    ]
+
+
+def save_to_s3(tags):
+    table_content = ""
+    client = boto3.client("s3")
+    for t in tags:
+        table_content += (
+            "<tr><td>{repo}</td><td>{tag}</td><td>{size}</td>"
+            "<td>{last_updated_at}</td><td>{last_updated_by}</td></tr>"
+        ).format(
+            repo=t.repo,
+            tag=t.tag,
+            size=t.size,
+            last_updated_at=t.last_updated_at,
+            last_updated_by=t.last_updated_by,
+        )
+    html_body = """
+    <html>
+        <head>
+            <link rel="stylesheet"
+                href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css"
+                integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh"
+                crossorigin="anonymous">
+            <link rel="stylesheet" type="text/css"
+                href="https://cdn.datatables.net/1.10.20/css/jquery.dataTables.css">
+            <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js">
+            </script>
+            <script type="text/javascript" charset="utf8"
+                src="https://cdn.datatables.net/1.10.20/js/jquery.dataTables.js"></script>
+            <title> docker image info</title>
+        </head>
+        <body>
+            <table class="table table-striped table-hover" id="docker">
+            <caption>Docker images on docker hub</caption>
+            <thead class="thead-dark">
+                <tr>
+                <th scope="col">repo</th>
+                <th scope="col">tag</th>
+                <th scope="col">size</th>
+                <th scope="col">last_updated_at</th>
+                <th scope="col">last_updated_by</th>
+                </tr>
+            </thead>
+            <tbody>
+                {table_content}
+            </tbody>
+            </table>
+        </body>
+        <script>
+            $(document).ready( function () {{
+                $('#docker').DataTable({{paging: false}});
+            }} );py
+        </script>
+    </html>
+    """.format(
+        table_content=table_content
+    )
+    client.put_object(
+        Bucket="docker.pytorch.org",
+        ACL="public-read",
+        Key="docker_hub.html",
+        Body=html_body,
+        ContentType="text/html",
+    )
+
+
+if __name__ == "__main__":
+    username = os.environ.get("DOCKER_HUB_USERNAME")
+    password = os.environ.get("DOCKER_HUB_PASSWORD")
+    token = build_access_token(username, password)
+    tags = []
+    for repo in list_repos("pytorch", token):
+        tags.extend(list_tags(repo, token))
+    save_to_s3(tags)
--- a/.circleci/ecr_gc_docker/gc.py
+++ b/.circleci/ecr_gc_docker/gc.py
@ -0,0 +1,218 @@
+#!/usr/bin/env python3
+
+import argparse
+import boto3
+import datetime
+import pytz
+import re
+import sys
+
+
+def save_to_s3(project, data):
+    table_content = ""
+    client = boto3.client("s3")
+    for repo, tag, window, age, pushed in data:
+        table_content += "<tr><td>{repo}</td><td>{tag}</td><td>{window}</td><td>{age}</td><td>{pushed}</td></tr>".format(
+            repo=repo, tag=tag, window=window, age=age, pushed=pushed
+        )
+    html_body = """
+    <html>
+        <head>
+            <link rel="stylesheet"
+                href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css"
+                integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh"
+                crossorigin="anonymous">
+            <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.10.20/css/jquery.dataTables.css">
+            <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js"></script>
+            <script type="text/javascript" charset="utf8" src="https://cdn.datatables.net/1.10.20/js/jquery.dataTables.js"></script>
+            <title>{project} nightly and permanent docker image info</title>
+        </head>
+        <body>
+            <table class="table table-striped table-hover" id="docker">
+            <thead class="thead-dark">
+                <tr>
+                <th scope="col">repo</th>
+                <th scope="col">tag</th>
+                <th scope="col">keep window</th>
+                <th scope="col">age</th>
+                <th scope="col">pushed at</th>
+                </tr>
+            </thead>
+            <tbody>
+                {table_content}
+            </tbody>
+            </table>
+        </body>
+        <script>
+            $(document).ready( function () {{
+                $('#docker').DataTable({{paging: false}});
+            }} );
+        </script>
+    </html>
+    """.format(
+        project=project, table_content=table_content
+    )
+
+    # for pytorch, file can be found at
+    # http://ossci-docker.s3-website.us-east-1.amazonaws.com/pytorch.html
+    # and later one we can config docker.pytorch.org to point to the location
+
+    client.put_object(
+        Bucket="docker.pytorch.org",
+        ACL="public-read",
+        Key="{project}.html".format(project=project),
+        Body=html_body,
+        ContentType="text/html",
+    )
+
+
+def repos(client):
+    paginator = client.get_paginator("describe_repositories")
+    pages = paginator.paginate(registryId="308535385114")
+    for page in pages:
+        for repo in page["repositories"]:
+            yield repo
+
+
+def images(client, repository):
+    paginator = client.get_paginator("describe_images")
+    pages = paginator.paginate(
+        registryId="308535385114", repositoryName=repository["repositoryName"]
+    )
+    for page in pages:
+        for image in page["imageDetails"]:
+            yield image
+
+
+parser = argparse.ArgumentParser(description="Delete old Docker tags from registry")
+parser.add_argument(
+    "--dry-run", action="store_true", help="Dry run; print tags that would be deleted"
+)
+parser.add_argument(
+    "--debug", action="store_true", help="Debug, print ignored / saved tags"
+)
+parser.add_argument(
+    "--keep-stable-days",
+    type=int,
+    default=14,
+    help="Days of stable Docker tags to keep (non per-build images)",
+)
+parser.add_argument(
+    "--keep-unstable-days",
+    type=int,
+    default=1,
+    help="Days of unstable Docker tags to keep (per-build images)",
+)
+parser.add_argument(
+    "--filter-prefix",
+    type=str,
+    default="",
+    help="Only run cleanup for repositories with this prefix",
+)
+parser.add_argument(
+    "--ignore-tags",
+    type=str,
+    default="",
+    help="Never cleanup these tags (comma separated)",
+)
+args = parser.parse_args()
+
+if not args.ignore_tags or not args.filter_prefix:
+    print(
+        """
+Missing required arguments --ignore-tags and --filter-prefix
+
+You must specify --ignore-tags and --filter-prefix to avoid accidentally
+pruning a stable Docker tag which is being actively used.  This will
+make you VERY SAD.  So pay attention.
+
+First, which filter-prefix do you want?  The list of valid prefixes
+is in jobs/private.groovy under the 'docker-registry-cleanup' job.
+You probably want either pytorch or caffe2.
+
+Second, which ignore-tags do you want?  It should be whatever the most
+up-to-date DockerVersion for the repository in question is.  Follow
+the imports of jobs/pytorch.groovy to find them.
+"""
+    )
+    sys.exit(1)
+
+client = boto3.client("ecr", region_name="us-east-1")
+stable_window = datetime.timedelta(days=args.keep_stable_days)
+unstable_window = datetime.timedelta(days=args.keep_unstable_days)
+now = datetime.datetime.now(pytz.UTC)
+ignore_tags = args.ignore_tags.split(",")
+
+
+def chunks(chunkable, n):
+    """ Yield successive n-sized chunks from l.
+    """
+    for i in range(0, len(chunkable), n):
+        yield chunkable[i: i + n]
+
+
+SHA_PATTERN = re.compile(r'^[0-9a-f]{40}$')
+
+
+def looks_like_git_sha(tag):
+    """Returns a boolean to check if a tag looks like a git sha
+
+    For reference a sha1 is 40 characters with only 0-9a-f and contains no
+    "-" characters
+    """
+    return re.match(SHA_PATTERN, tag) is not None
+
+
+stable_window_tags = []
+for repo in repos(client):
+    repositoryName = repo["repositoryName"]
+    if not repositoryName.startswith(args.filter_prefix):
+        continue
+
+    # Keep list of image digests to delete for this repository
+    digest_to_delete = []
+
+    for image in images(client, repo):
+        tags = image.get("imageTags")
+        if not isinstance(tags, (list,)) or len(tags) == 0:
+            continue
+        created = image["imagePushedAt"]
+        age = now - created
+        for tag in tags:
+            if any([
+                    looks_like_git_sha(tag),
+                    tag.isdigit(),
+                    tag.count("-") == 4,  # TODO: Remove, this no longer applies as tags are now built using a SHA1
+                    tag in ignore_tags]):
+                window = stable_window
+                if tag in ignore_tags:
+                    stable_window_tags.append((repositoryName, tag, "", age, created))
+                elif age < window:
+                    stable_window_tags.append((repositoryName, tag, window, age, created))
+            else:
+                window = unstable_window
+
+            if tag in ignore_tags or age < window:
+                if args.debug:
+                    print("Ignoring {}:{} (age: {})".format(repositoryName, tag, age))
+                break
+        else:
+            for tag in tags:
+                print("{}Deleting {}:{} (age: {})".format("(dry run) " if args.dry_run else "", repositoryName, tag, age))
+            digest_to_delete.append(image["imageDigest"])
+    if args.dry_run:
+        if args.debug:
+            print("Skipping actual deletion, moving on...")
+    else:
+        # Issue batch delete for all images to delete for this repository
+        # Note that as of 2018-07-25, the maximum number of images you can
+        # delete in a single batch is 100, so chunk our list into batches of
+        # 100
+        for c in chunks(digest_to_delete, 100):
+            client.batch_delete_image(
+                registryId="308535385114",
+                repositoryName=repositoryName,
+                imageIds=[{"imageDigest": digest} for digest in c],
+            )
+
+        save_to_s3(args.filter_prefix, stable_window_tags)
--- a/.circleci/ecr_gc_docker/requirements.txt
+++ b/.circleci/ecr_gc_docker/requirements.txt
@ -0,0 +1,3 @@
+boto3
+pytz
+requests
--- a/.circleci/generate_config_yml.py
+++ b/.circleci/generate_config_yml.py
@ -11,11 +11,17 @@ import sys
 from collections import namedtuple

 import cimodel.data.binary_build_definitions as binary_build_definitions
+import cimodel.data.pytorch_build_definitions as pytorch_build_definitions
+import cimodel.data.simple.android_definitions
 import cimodel.data.simple.binary_smoketest
 import cimodel.data.simple.docker_definitions
+import cimodel.data.simple.ios_definitions
+import cimodel.data.simple.macos_definitions
 import cimodel.data.simple.mobile_definitions
+import cimodel.data.simple.nightly_android
 import cimodel.data.simple.nightly_ios
 import cimodel.data.simple.anaconda_prune_defintions
+import cimodel.data.windows_build_definitions as windows_build_definitions
 import cimodel.lib.miniutils as miniutils
 import cimodel.lib.miniyaml as miniyaml

@ -72,15 +78,15 @@ class Header(object):
        for line in filter(None, lines):
            output_filehandle.write(line + "\n")

-def _for_all_items(items, functor) -> None:
-    if isinstance(items, list):
-        for item in items:
-            _for_all_items(item, functor)
-    if isinstance(items, dict) and len(items) == 1:
-        item_type, item = next(iter(items.items()))
-        functor(item_type, item)
-
 def filter_master_only_jobs(items):
+    def _for_all_items(items, functor) -> None:
+        if isinstance(items, list):
+            for item in items:
+                _for_all_items(item, functor)
+        if isinstance(items, dict) and len(items) == 1:
+            item_type, item = next(iter(items.items()))
+            functor(item_type, item)
+
    def _is_master_item(item):
        filters = item.get('filters', None)
        branches = filters.get('branches', None) if filters is not None else None
@ -118,37 +124,24 @@ def filter_master_only_jobs(items):
    _for_all_items(items, _save_requires_if_master)
    return _do_filtering(items)

-def generate_required_docker_images(items):
-    required_docker_images = set()
-
-    def _requires_docker_image(item_type, item):
-        requires = item.get('requires', None)
-        if not isinstance(requires, list):
-            return
-        for requirement in requires:
-            requirement = requirement.replace('"', '')
-            if requirement.startswith('docker-'):
-                required_docker_images.add(requirement)
-
-    _for_all_items(items, _requires_docker_image)
-    return required_docker_images

 def gen_build_workflows_tree():
    build_workflows_functions = [
+        cimodel.data.simple.docker_definitions.get_workflow_jobs,
+        pytorch_build_definitions.get_workflow_jobs,
+        cimodel.data.simple.macos_definitions.get_workflow_jobs,
+        cimodel.data.simple.android_definitions.get_workflow_jobs,
+        cimodel.data.simple.ios_definitions.get_workflow_jobs,
        cimodel.data.simple.mobile_definitions.get_workflow_jobs,
        cimodel.data.simple.binary_smoketest.get_workflow_jobs,
        cimodel.data.simple.nightly_ios.get_workflow_jobs,
+        cimodel.data.simple.nightly_android.get_workflow_jobs,
        cimodel.data.simple.anaconda_prune_defintions.get_workflow_jobs,
+        windows_build_definitions.get_windows_workflows,
        binary_build_definitions.get_post_upload_jobs,
        binary_build_definitions.get_binary_smoke_test_jobs,
    ]
    build_jobs = [f() for f in build_workflows_functions]
-    build_jobs.extend(
-        cimodel.data.simple.docker_definitions.get_workflow_jobs(
-            # sort for consistency
-            sorted(generate_required_docker_images(build_jobs))
-        )
-    )
    master_build_jobs = filter_master_only_jobs(build_jobs)

    binary_build_functions = [
@ -157,6 +150,11 @@ def gen_build_workflows_tree():
        binary_build_definitions.get_nightly_uploads,
    ]

+    slow_gradcheck_jobs = [
+        pytorch_build_definitions.get_workflow_jobs,
+        cimodel.data.simple.docker_definitions.get_workflow_jobs,
+    ]
+
    return {
        "workflows": {
            "binary_builds": {
@ -171,6 +169,10 @@ def gen_build_workflows_tree():
                "when": r"<< pipeline.parameters.run_master_build >>",
                "jobs": master_build_jobs,
            },
+            "slow_gradcheck_build": {
+                "when": r"<< pipeline.parameters.run_slow_gradcheck_build >>",
+                "jobs": [f(only_slow_gradcheck=True) for f in slow_gradcheck_jobs],
+            },
        }
    }

@ -185,6 +187,7 @@ YAML_SOURCES = [
    File("build-parameters/binary-build-params.yml"),
    File("build-parameters/promote-build-params.yml"),
    Header("Job specs"),
+    File("job-specs/pytorch-job-specs.yml"),
    File("job-specs/binary-job-specs.yml"),
    File("job-specs/job-specs-custom.yml"),
    File("job-specs/job-specs-promote.yml"),
@ -193,6 +196,8 @@ YAML_SOURCES = [
    File("job-specs/docker_jobs.yml"),
    Header("Workflows"),
    Treegen(gen_build_workflows_tree, 0),
+    File("workflows/workflows-scheduled-ci.yml"),
+    File("workflows/workflows-ecr-gc.yml"),
    File("workflows/workflows-promote.yml"),
 ]

--- a/.circleci/scripts/binary_checkout.sh
+++ b/.circleci/scripts/binary_checkout.sh
@ -61,7 +61,7 @@ git --no-pager log --max-count 1
 popd

 # Clone the Builder master repo
-retry git clone -q https://github.com/pytorch/builder.git "$BUILDER_ROOT"
+retry git clone -q https://github.com/pytorch/builder.git -b release/1.10 "$BUILDER_ROOT"
 pushd "$BUILDER_ROOT"
 echo "Using builder from "
 git --no-pager log --max-count 1
--- a/.circleci/scripts/binary_ios_test.sh
+++ b/.circleci/scripts/binary_ios_test.sh
@ -27,4 +27,4 @@ if ! [ -x "$(command -v xcodebuild)" ]; then
    exit 1
 fi
 PROFILE=PyTorch_CI_2022
-ruby ${PROJ_ROOT}/scripts/xcode_build.rb -i ${PROJ_ROOT}/build_ios/install -x ${PROJ_ROOT}/ios/TestApp/TestApp.xcodeproj -p ${IOS_PLATFORM} -c ${PROFILE} -t ${IOS_DEV_TEAM_ID}
+ruby ${PROJ_ROOT}/scripts/xcode_build.rb -i ${PROJ_ROOT}/build_ios/install -x ${PROJ_ROOT}/ios/TestApp/TestApp.xcodeproj -p ${IOS_PLATFORM} -c ${PROFILE} -t ${IOS_DEV_TEAM_ID} -f Accelerate,MetalPerformanceShaders,CoreML
--- a/.circleci/scripts/binary_ios_upload.sh
+++ b/.circleci/scripts/binary_ios_upload.sh
@ -23,23 +23,14 @@ do
    fi
 done
 lipo -i ${ZIP_DIR}/install/lib/*.a
-echo "BUILD_LITE_INTERPRETER: ${BUILD_LITE_INTERPRETER}"
 # copy the umbrella header and license
-if [ "${BUILD_LITE_INTERPRETER}" == "1" ]; then
-    cp ${PROJ_ROOT}/ios/LibTorch-Lite.h ${ZIP_DIR}/src/
-else
-    cp ${PROJ_ROOT}/ios/LibTorch.h ${ZIP_DIR}/src/
-fi
+cp ${PROJ_ROOT}/ios/LibTorch-Lite.h ${ZIP_DIR}/src/
 cp ${PROJ_ROOT}/LICENSE ${ZIP_DIR}/
 # zip the library
 export DATE="$(date -u +%Y%m%d)"
-export IOS_NIGHTLY_BUILD_VERSION="1.11.0.${DATE}"
-if [ "${BUILD_LITE_INTERPRETER}" == "1" ]; then
-    # libtorch_lite_ios_nightly_1.11.0.20210810.zip
-    ZIPFILE="libtorch_lite_ios_nightly_${IOS_NIGHTLY_BUILD_VERSION}.zip"
-else
-    ZIPFILE="libtorch_ios_nightly_build.zip"
-fi
+export IOS_NIGHTLY_BUILD_VERSION="1.10.0.${DATE}"
+# libtorch_lite_ios_nightly_1.10.0.20210810.zip
+ZIPFILE="libtorch_lite_ios_nightly_${IOS_NIGHTLY_BUILD_VERSION}.zip"
 cd ${ZIP_DIR}
 #for testing
 touch version.txt
@ -61,15 +52,13 @@ set +x
 # echo "AWS SECRET: ${AWS_SECRET_ACCESS_KEY}"
 aws s3 cp ${ZIPFILE} s3://ossci-ios-build/ --acl public-read

-if [ "${BUILD_LITE_INTERPRETER}" == "1" ]; then
-    # create a new LibTorch-Lite-Nightly.podspec from the template
-    echo "cp ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec.template ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec"
-    cp ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec.template ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec
+# create a new LibTorch-Lite-Nightly.podspec from the template
+echo "cp ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec.template ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec"
+cp ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec.template ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec

-    # update pod version
-    sed -i '' -e "s/IOS_NIGHTLY_BUILD_VERSION/${IOS_NIGHTLY_BUILD_VERSION}/g" ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec
-    cat ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec
+# update pod version
+sed -i '' -e "s/IOS_NIGHTLY_BUILD_VERSION/${IOS_NIGHTLY_BUILD_VERSION}/g" ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec
+cat ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec

-    # push the new LibTorch-Lite-Nightly.podspec to CocoaPods
-    pod trunk push --verbose --allow-warnings --use-libraries --skip-import-validation ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec
-fi
+# push the new LibTorch-Lite-Nightly.podspec to CocoaPods
+pod trunk push --verbose --allow-warnings --use-libraries --skip-import-validation ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec
--- a/.circleci/scripts/binary_linux_build.sh
+++ b/.circleci/scripts/binary_linux_build.sh
@ -11,7 +11,7 @@ NUM_CPUS=$(( $(nproc) - 2 ))
 # Defaults here for **binary** linux builds so they can be changed in one place
 export MAX_JOBS=${MAX_JOBS:-$(( ${NUM_CPUS} > ${MEMORY_LIMIT_MAX_JOBS} ? ${MEMORY_LIMIT_MAX_JOBS} : ${NUM_CPUS} ))}

-if [[ "${DESIRED_CUDA}" =~ cu11[0-9] ]]; then
+if [[ "${DESIRED_CUDA}" == "cu111" || "${DESIRED_CUDA}" == "cu113" ]]; then
  export BUILD_SPLIT_CUDA="ON"
 fi

--- a/.circleci/scripts/binary_linux_test.sh
+++ b/.circleci/scripts/binary_linux_test.sh
@ -1,24 +1,10 @@
 #!/bin/bash

-OUTPUT_SCRIPT=${OUTPUT_SCRIPT:-/home/circleci/project/ci_test_script.sh}
-
-# only source if file exists
-if [[ -f /home/circleci/project/env ]]; then
-  source /home/circleci/project/env
-fi
-cat >"${OUTPUT_SCRIPT}" <<EOL
+source /home/circleci/project/env
+cat >/home/circleci/project/ci_test_script.sh <<EOL
 # =================== The following code will be executed inside Docker container ===================
 set -eux -o pipefail

-retry () {
-    "\$@"  || (sleep 1 && "\$@") || (sleep 2 && "\$@")
-}
-
-# Source binary env file here if exists
-if [[ -e "${BINARY_ENV_FILE:-/nofile}" ]]; then
-  source "${BINARY_ENV_FILE:-/nofile}"
-fi
-
 python_nodot="\$(echo $DESIRED_PYTHON | tr -d m.u)"

 # Set up Python
@ -37,23 +23,14 @@ fi

 EXTRA_CONDA_FLAGS=""
 NUMPY_PIN=""
-PROTOBUF_PACKAGE="defaults::protobuf"
-if [[ "\$python_nodot" = *310* ]]; then
-  EXTRA_CONDA_FLAGS="-c=conda-forge"
-  # There's an issue with conda channel priority where it'll randomly pick 1.19 over 1.20
-  # we set a lower boundary here just to be safe
-  NUMPY_PIN=">=1.21.2"
-  PROTOBUF_PACKAGE="protobuf>=3.19.0"
-fi
-
-if [[ "\$python_nodot" = *39*  ]]; then
+if [[ "\$python_nodot" = *39* ]]; then
  EXTRA_CONDA_FLAGS="-c=conda-forge"
  # There's an issue with conda channel priority where it'll randomly pick 1.19 over 1.20
  # we set a lower boundary here just to be safe
  NUMPY_PIN=">=1.20"
 fi

-if [[ "$DESIRED_CUDA" == "cu112" || "$DESIRED_CUDA" == "cu115" ]]; then
+if [[ "$DESIRED_CUDA" == "cu112" ]]; then
  EXTRA_CONDA_FLAGS="-c=conda-forge"
 fi

@ -82,7 +59,7 @@ if [[ "$PACKAGE_TYPE" == conda ]]; then
      ninja \
      dataclasses \
      typing-extensions \
-      ${PROTOBUF_PACKAGE} \
+      defaults::protobuf \
      six
    if [[ "$DESIRED_CUDA" == 'cpu' ]]; then
      retry conda install -c pytorch -y cpuonly
@ -115,4 +92,4 @@ EOL
 echo
 echo
 echo "The script that will run in the next step is:"
-cat "${OUTPUT_SCRIPT}"
+cat /home/circleci/project/ci_test_script.sh
--- a/.circleci/scripts/binary_populate_env.sh
+++ b/.circleci/scripts/binary_populate_env.sh
@ -19,47 +19,39 @@ tagged_version() {
  fi
 }

-# These are only relevant for CircleCI
-# TODO: Remove these later once migrated fully to GHA
-if [[ -z ${IS_GHA:-} ]]; then
-  # We need to write an envfile to persist these variables to following
-  # steps, but the location of the envfile depends on the circleci executor
-  if [[ "$(uname)" == Darwin ]]; then
-    # macos executor (builds and tests)
-    workdir="/Users/distiller/project"
-  elif [[ "$OSTYPE" == "msys" ]]; then
-    # windows executor (builds and tests)
-    workdir="/c/w"
-  elif [[ -d "/home/circleci/project" ]]; then
-    # machine executor (binary tests)
-    workdir="/home/circleci/project"
-  else
-    # docker executor (binary builds)
-    workdir="/"
-  fi
-  envfile="$workdir/env"
-  touch "$envfile"
-  chmod +x "$envfile"
+# We need to write an envfile to persist these variables to following
+# steps, but the location of the envfile depends on the circleci executor
+if [[ "$(uname)" == Darwin ]]; then
+  # macos executor (builds and tests)
+  workdir="/Users/distiller/project"
+elif [[ "$OSTYPE" == "msys" ]]; then
+  # windows executor (builds and tests)
+  workdir="/c/w"
+elif [[ -d "/home/circleci/project" ]]; then
+  # machine executor (binary tests)
+  workdir="/home/circleci/project"
+else
+  # docker executor (binary builds)
+  workdir="/"
+fi
+envfile="$workdir/env"
+touch "$envfile"
+chmod +x "$envfile"

-  # Parse the BUILD_ENVIRONMENT to package type, python, and cuda
-  configs=($BUILD_ENVIRONMENT)
-  export PACKAGE_TYPE="${configs[0]}"
-  export DESIRED_PYTHON="${configs[1]}"
-  export DESIRED_CUDA="${configs[2]}"
-  if [[ "${BUILD_FOR_SYSTEM:-}" == "windows" ]]; then
-    export DESIRED_DEVTOOLSET=""
-    export LIBTORCH_CONFIG="${configs[3]:-}"
-    if [[ "$LIBTORCH_CONFIG" == 'debug' ]]; then
-      export DEBUG=1
-    fi
-  else
-    export DESIRED_DEVTOOLSET="${configs[3]:-}"
+# Parse the BUILD_ENVIRONMENT to package type, python, and cuda
+configs=($BUILD_ENVIRONMENT)
+export PACKAGE_TYPE="${configs[0]}"
+export DESIRED_PYTHON="${configs[1]}"
+export DESIRED_CUDA="${configs[2]}"
+if [[ "${BUILD_FOR_SYSTEM:-}" == "windows" ]]; then
+  export DESIRED_DEVTOOLSET=""
+  export LIBTORCH_CONFIG="${configs[3]:-}"
+  if [[ "$LIBTORCH_CONFIG" == 'debug' ]]; then
+    export DEBUG=1
  fi
 else
-  envfile=${BINARY_ENV_FILE:-/tmp/env}
-  workdir="/pytorch"
+  export DESIRED_DEVTOOLSET="${configs[3]:-}"
 fi
-
 if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
  export BUILD_PYTHONLESS=1
 fi
@ -93,7 +85,7 @@ PIP_UPLOAD_FOLDER='nightly/'
 # We put this here so that OVERRIDE_PACKAGE_VERSION below can read from it
 export DATE="$(date -u +%Y%m%d)"
 #TODO: We should be pulling semver version from the base version.txt
-BASE_BUILD_VERSION="1.11.0.dev$DATE"
+BASE_BUILD_VERSION="1.10.0.dev$DATE"
 # Change BASE_BUILD_VERSION to git tag when on a git tag
 # Use 'git -C' to make doubly sure we're in the correct directory for checking
 # the git tag
@ -139,24 +131,24 @@ if [[ "$PACKAGE_TYPE" == libtorch ]]; then
  fi
 fi

-cat >"$envfile" <<EOL
+cat >>"$envfile" <<EOL
 # =================== The following code will be executed inside Docker container ===================
 export TZ=UTC
 echo "Running on $(uname -a) at $(date)"

 export PACKAGE_TYPE="$PACKAGE_TYPE"
-export DESIRED_PYTHON="${DESIRED_PYTHON:-}"
+export DESIRED_PYTHON="$DESIRED_PYTHON"
 export DESIRED_CUDA="$DESIRED_CUDA"
 export LIBTORCH_VARIANT="${LIBTORCH_VARIANT:-}"
 export BUILD_PYTHONLESS="${BUILD_PYTHONLESS:-}"
-export DESIRED_DEVTOOLSET="${DESIRED_DEVTOOLSET:-}"
+export DESIRED_DEVTOOLSET="$DESIRED_DEVTOOLSET"
 if [[ "${BUILD_FOR_SYSTEM:-}" == "windows" ]]; then
  export LIBTORCH_CONFIG="${LIBTORCH_CONFIG:-}"
  export DEBUG="${DEBUG:-}"
 fi

 export DATE="$DATE"
-export NIGHTLIES_DATE_PREAMBLE=1.11.0.dev
+export NIGHTLIES_DATE_PREAMBLE=1.10.0.dev
 export PYTORCH_BUILD_VERSION="$PYTORCH_BUILD_VERSION"
 export PYTORCH_BUILD_NUMBER="$PYTORCH_BUILD_NUMBER"
 export OVERRIDE_PACKAGE_VERSION="$PYTORCH_BUILD_VERSION"
@ -164,7 +156,6 @@ export OVERRIDE_PACKAGE_VERSION="$PYTORCH_BUILD_VERSION"
 # TODO: We don't need this anymore IIUC
 export TORCH_PACKAGE_NAME='torch'
 export TORCH_CONDA_BUILD_FOLDER='pytorch-nightly'
-export ANACONDA_USER='pytorch'

 export USE_FBGEMM=1
 export JAVA_HOME=$JAVA_HOME
@ -172,6 +163,23 @@ export BUILD_JNI=$BUILD_JNI
 export PIP_UPLOAD_FOLDER="$PIP_UPLOAD_FOLDER"
 export DOCKER_IMAGE="$DOCKER_IMAGE"

+export workdir="$workdir"
+export MAC_PACKAGE_WORK_DIR="$workdir"
+if [[ "$OSTYPE" == "msys" ]]; then
+  export PYTORCH_ROOT="$workdir/p"
+  export BUILDER_ROOT="$workdir/b"
+else
+  export PYTORCH_ROOT="$workdir/pytorch"
+  export BUILDER_ROOT="$workdir/builder"
+fi
+export MINICONDA_ROOT="$workdir/miniconda"
+export PYTORCH_FINAL_PACKAGE_DIR="$workdir/final_pkgs"
+
+export CIRCLE_TAG="${CIRCLE_TAG:-}"
+export CIRCLE_SHA1="$CIRCLE_SHA1"
+export CIRCLE_PR_NUMBER="${CIRCLE_PR_NUMBER:-}"
+export CIRCLE_BRANCH="$CIRCLE_BRANCH"
+export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"

 export USE_GOLD_LINKER="${USE_GOLD_LINKER}"
 export USE_GLOO_WITH_OPENSSL="ON"
@ -179,42 +187,6 @@ export USE_WHOLE_CUDNN="${USE_WHOLE_CUDNN}"
 # =================== The above code will be executed inside Docker container ===================
 EOL

-# nproc doesn't exist on darwin
-if [[ "$(uname)" != Darwin ]]; then
-  # Because most Circle executors only have 20 CPUs, using more causes OOMs w/ Ninja and nvcc parallelization
-  MEMORY_LIMIT_MAX_JOBS=18
-  NUM_CPUS=$(( $(nproc) - 2 ))
-
-  # Defaults here for **binary** linux builds so they can be changed in one place
-  export MAX_JOBS=${MAX_JOBS:-$(( ${NUM_CPUS} > ${MEMORY_LIMIT_MAX_JOBS} ? ${MEMORY_LIMIT_MAX_JOBS} : ${NUM_CPUS} ))}
-
-  cat >>"$envfile" <<EOL
-  export MAX_JOBS="${MAX_JOBS}"
-EOL
-fi
-
-if [[ -z "${IS_GHA:-}" ]]; then
-  cat >>"$envfile" <<EOL
-  export workdir="$workdir"
-  export MAC_PACKAGE_WORK_DIR="$workdir"
-  if [[ "$OSTYPE" == "msys" ]]; then
-    export PYTORCH_ROOT="$workdir/p"
-    export BUILDER_ROOT="$workdir/b"
-  else
-    export PYTORCH_ROOT="$workdir/pytorch"
-    export BUILDER_ROOT="$workdir/builder"
-  fi
-  export MINICONDA_ROOT="$workdir/miniconda"
-  export PYTORCH_FINAL_PACKAGE_DIR="$workdir/final_pkgs"
-
-  export CIRCLE_TAG="${CIRCLE_TAG:-}"
-  export CIRCLE_SHA1="$CIRCLE_SHA1"
-  export CIRCLE_PR_NUMBER="${CIRCLE_PR_NUMBER:-}"
-  export CIRCLE_BRANCH="$CIRCLE_BRANCH"
-  export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"
-EOL
-fi
-
 echo 'retry () {' >> "$envfile"
 echo '    $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)' >> "$envfile"
 echo '}' >> "$envfile"
--- a/.circleci/scripts/binary_upload.sh
+++ b/.circleci/scripts/binary_upload.sh
@ -63,10 +63,6 @@ s3_upload() {
  )
 }

-# Install dependencies (should be a no-op if previously installed)
-conda install -yq anaconda-client
-pip install -q awscli
-
 case "${PACKAGE_TYPE}" in
  conda)
    conda_upload
--- a/.circleci/scripts/binary_windows_build.sh
+++ b/.circleci/scripts/binary_windows_build.sh
@ -10,8 +10,8 @@ export SCCACHE_BUCKET=ossci-compiler-cache-windows
 export NIGHTLIES_PYTORCH_ROOT="$PYTORCH_ROOT"
 export VC_YEAR=2019

-if [[ "${DESIRED_CUDA}" == *"cu11"* ]]; then
-    export BUILD_SPLIT_CUDA=ON
+if [[ "${DESIRED_CUDA}" == "cu111" || "${DESIRED_CUDA}" == "cu113" ]]; then
+    export BUILD_SPLIT_CUDA="ON"
 fi

 echo "Free Space for CUDA DEBUG BUILD"
--- a/.circleci/scripts/cpp_doc_push_script.sh
+++ b/.circleci/scripts/cpp_doc_push_script.sh
@ -65,6 +65,7 @@ cp torch/_utils_internal.py tools/shared

 # Generate PyTorch files
 time python tools/setup_helpers/generate_code.py \
+  --declarations-path build/aten/src/ATen/Declarations.yaml \
  --native-functions-path aten/src/ATen/native/native_functions.yaml \
  --nn-path aten/src/

@ -96,12 +97,8 @@ git status
 git config user.email "soumith+bot@pytorch.org"
 git config user.name "pytorchbot"
 # If there aren't changes, don't make a commit; push is no-op
-git commit -m "Generate C++ docs from pytorch/pytorch@${GITHUB_SHA}" || true
+git commit -m "Generate C++ docs from pytorch/pytorch@$CIRCLE_SHA1" || true
 git status

-if [[ "${WITH_PUSH:-}" == true ]]; then
-  git push -u origin
-fi
-
 popd
 # =================== The above code **should** be executed inside Docker container ===================
--- a/.circleci/scripts/python_doc_push_script.sh
+++ b/.circleci/scripts/python_doc_push_script.sh
@ -131,12 +131,8 @@ git status
 git config user.email "soumith+bot@pytorch.org"
 git config user.name "pytorchbot"
 # If there aren't changes, don't make a commit; push is no-op
-git commit -m "Generate Python docs from pytorch/pytorch@${GITHUB_SHA}" || true
+git commit -m "Generate Python docs from pytorch/pytorch@$CIRCLE_SHA1" || true
 git status

-if [[ "${WITH_PUSH:-}" == true ]]; then
-  git push -u origin "${branch}"
-fi
-
 popd
 # =================== The above code **should** be executed inside Docker container ===================
--- a/.circleci/scripts/setup_ci_environment.sh
+++ b/.circleci/scripts/setup_ci_environment.sh
@ -32,7 +32,7 @@ if ! command -v aws >/dev/null; then
 fi

 if [ -n "${USE_CUDA_DOCKER_RUNTIME:-}" ]; then
-  DRIVER_FN="NVIDIA-Linux-x86_64-495.44.run"
+  DRIVER_FN="NVIDIA-Linux-x86_64-460.39.run"
  wget "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN"
  sudo /bin/bash "$DRIVER_FN" -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false)
  nvidia-smi
--- a/.circleci/scripts/windows_cuda_install.sh
+++ b/.circleci/scripts/windows_cuda_install.sh
@ -11,17 +11,13 @@ case ${CUDA_VERSION} in
        cuda_install_packages="nvcc_10.2 cuobjdump_10.2 nvprune_10.2 cupti_10.2 cublas_10.2 cublas_dev_10.2 cudart_10.2 cufft_10.2 cufft_dev_10.2 curand_10.2 curand_dev_10.2 cusolver_10.2 cusolver_dev_10.2 cusparse_10.2 cusparse_dev_10.2 nvgraph_10.2 nvgraph_dev_10.2 npp_10.2 npp_dev_10.2 nvrtc_10.2 nvrtc_dev_10.2 nvml_dev_10.2"
        ;;
    11.1)
-        cuda_installer_name="cuda_11.1.1_456.81_win10"
+        cuda_installer_name="cuda_11.1.0_456.43_win10"
        cuda_install_packages="nvcc_11.1 cuobjdump_11.1 nvprune_11.1 nvprof_11.1 cupti_11.1 cublas_11.1 cublas_dev_11.1 cudart_11.1 cufft_11.1 cufft_dev_11.1 curand_11.1 curand_dev_11.1 cusolver_11.1 cusolver_dev_11.1 cusparse_11.1 cusparse_dev_11.1 npp_11.1 npp_dev_11.1 nvrtc_11.1 nvrtc_dev_11.1 nvml_dev_11.1"
        ;;
    11.3)
        cuda_installer_name="cuda_11.3.0_465.89_win10"
        cuda_install_packages="thrust_11.3 nvcc_11.3 cuobjdump_11.3 nvprune_11.3 nvprof_11.3 cupti_11.3 cublas_11.3 cublas_dev_11.3 cudart_11.3 cufft_11.3 cufft_dev_11.3 curand_11.3 curand_dev_11.3 cusolver_11.3 cusolver_dev_11.3 cusparse_11.3 cusparse_dev_11.3 npp_11.3 npp_dev_11.3 nvrtc_11.3 nvrtc_dev_11.3 nvml_dev_11.3"
        ;;
-    11.5)
-        cuda_installer_name="cuda_11.5.0_496.13_win10"
-        cuda_install_packages="thrust_11.5 nvcc_11.5 cuobjdump_11.5 nvprune_11.5 nvprof_11.5 cupti_11.5 cublas_11.5 cublas_dev_11.5 cudart_11.5 cufft_11.5 cufft_dev_11.5 curand_11.5 curand_dev_11.5 cusolver_11.5 cusolver_dev_11.5 cusparse_11.5 cusparse_dev_11.5 npp_11.5 npp_dev_11.5 nvrtc_11.5 nvrtc_dev_11.5 nvml_dev_11.5"
-        ;;
    *)
        echo "CUDA_VERSION $CUDA_VERSION is not supported yet"
        exit 1
--- a/.circleci/scripts/windows_cudnn_install.sh
+++ b/.circleci/scripts/windows_cudnn_install.sh
@ -19,9 +19,6 @@ case ${CUDA_VERSION} in
    11.3)
        archive_version="v8.2.0.53"
        ;;
-    11.5)
-        archive_version="v8.2.0.53"
-        ;;
    *)
        echo "CUDA_VERSION: ${CUDA_VERSION} not supported yet"
        exit 1
--- a/.circleci/verbatim-sources/build-parameters/pytorch-build-params.yml
+++ b/.circleci/verbatim-sources/build-parameters/pytorch-build-params.yml
@ -26,6 +26,24 @@ pytorch_params: &pytorch_params
    CI_MASTER: << pipeline.parameters.run_master_build >>
  resource_class: << parameters.resource_class >>

+pytorch_android_params: &pytorch_android_params
+  parameters:
+    build_environment:
+      type: string
+      default: ""
+    op_list:
+      type: string
+      default: ""
+    lite_interpreter:
+      type: string
+      default: "1"
+  environment:
+    BUILD_ENVIRONMENT: pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single
+    DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c"
+    PYTHON_VERSION: "3.6"
+    SELECTED_OP_LIST: << parameters.op_list >>
+    BUILD_LITE_INTERPRETER: << parameters.lite_interpreter >>
+
 pytorch_ios_params: &pytorch_ios_params
  parameters:
    build_environment:
--- a/.circleci/verbatim-sources/job-specs/binary-job-specs.yml
+++ b/.circleci/verbatim-sources/job-specs/binary-job-specs.yml
@ -1,4 +1,3 @@
-jobs:
  binary_linux_build:
    <<: *binary_linux_build_params
    steps:
--- a/.circleci/verbatim-sources/job-specs/docker_jobs.yml
+++ b/.circleci/verbatim-sources/job-specs/docker_jobs.yml
@ -54,3 +54,61 @@
              export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_DOCKER_BUILDER_V1}
              set -x
              cd .circleci/docker && ./build_docker.sh
+  docker_for_ecr_gc_build_job:
+      machine:
+        image: ubuntu-2004:202104-01
+      steps:
+        - checkout
+        - run:
+            name: build_docker_image_for_ecr_gc
+            no_output_timeout: "1h"
+            command: |
+              cd .circleci/ecr_gc_docker
+              docker build . -t 308535385114.dkr.ecr.us-east-1.amazonaws.com/gc/ecr
+              set +x
+              export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_DOCKER_BUILDER_V1}
+              export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_DOCKER_BUILDER_V1}
+              export AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
+              export AWS_REGION=us-east-1
+              aws ecr get-login-password --region $AWS_REGION|docker login --username AWS \
+                       --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com
+              set -x
+              docker push $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/gc/ecr
+  ecr_gc_job:
+      parameters:
+        project:
+          type: string
+          default: "pytorch"
+        tags_to_keep:  # comma separate values
+          type: string
+      environment:
+        PROJECT: << parameters.project >>
+        # TODO: Remove legacy image tags once we feel comfortable with new docker image tags
+        IMAGE_TAG: << parameters.tags_to_keep >>
+      docker:
+        - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/gc/ecr
+          aws_auth:
+            aws_access_key_id: ${CIRCLECI_AWS_ACCESS_KEY_FOR_DOCKER_BUILDER_V1}
+            aws_secret_access_key: ${CIRCLECI_AWS_SECRET_KEY_FOR_DOCKER_BUILDER_V1}
+
+      steps:
+        - checkout
+        - run:
+            # NOTE: see 'docker_build_job' for how these tags actually get built
+            name: dynamically generate tags to keep
+            no_output_timeout: "1h"
+            command: |
+              GENERATED_IMAGE_TAG=$(\
+                git log --oneline --pretty='%H' .circleci/docker \
+                  | xargs -I '{}' git rev-parse '{}:.circleci/docker' \
+                  | paste -sd "," -)
+              echo "export GENERATED_IMAGE_TAG='${GENERATED_IMAGE_TAG}'" >> ${BASH_ENV}
+        - run:
+            name: garbage collecting for ecr images
+            no_output_timeout: "1h"
+            command: |
+              set +x
+              export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_DOCKER_BUILDER_V1}
+              export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_DOCKER_BUILDER_V1}
+              set -x
+              /usr/bin/gc.py --filter-prefix ${PROJECT}  --ignore-tags "${IMAGE_TAG},${GENERATED_IMAGE_TAG}"
--- a/.circleci/verbatim-sources/job-specs/job-specs-custom.yml
+++ b/.circleci/verbatim-sources/job-specs/job-specs-custom.yml
@ -27,7 +27,7 @@
  pytorch_python_doc_build:
    environment:
      BUILD_ENVIRONMENT: pytorch-python-doc-push
-      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.7-gcc5.4"
+      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4"
    resource_class: large
    machine:
      image: ubuntu-2004:202104-01
@ -73,7 +73,7 @@
  pytorch_cpp_doc_build:
    environment:
      BUILD_ENVIRONMENT: pytorch-cpp-doc-push
-      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.7-gcc5.4"
+      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4"
    resource_class: large
    machine:
      image: ubuntu-2004:202104-01
@ -213,7 +213,7 @@
          command: |
            set -ex
            source /Users/distiller/workspace/miniconda3/bin/activate
-            python3 -m pip install boto3==1.19.12
+            pip install boto3

            export IN_CI=1
            export JOB_BASE_NAME=$CIRCLE_JOB
@ -253,7 +253,7 @@
    environment:
      BUILD_ENVIRONMENT: pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-build
      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c"
-      PYTHON_VERSION: "3.7"
+      PYTHON_VERSION: "3.6"
    resource_class: large
    machine:
      image: ubuntu-2004:202104-01
@ -342,7 +342,7 @@
    environment:
      BUILD_ENVIRONMENT: pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-publish-snapshot
      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c"
-      PYTHON_VERSION: "3.7"
+      PYTHON_VERSION: "3.6"
    resource_class: large
    machine:
      image: ubuntu-2004:202104-01
@ -378,7 +378,7 @@
    environment:
      BUILD_ENVIRONMENT: pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-build-only-x86_32
      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c"
-      PYTHON_VERSION: "3.7"
+      PYTHON_VERSION: "3.6"
    resource_class: large
    machine:
      image: ubuntu-2004:202104-01
@ -416,6 +416,43 @@
        path: ~/workspace/build_android_x86_32_artifacts/artifacts.tgz
        destination: artifacts.tgz

+  pytorch_android_gradle_custom_build_single:
+    <<: *pytorch_android_params
+    resource_class: large
+    machine:
+      image: ubuntu-2004:202104-01
+    steps:
+    - checkout
+    - calculate_docker_image_tag
+    - setup_linux_system_environment
+    - checkout
+    - calculate_docker_image_tag
+    - setup_ci_environment
+    - run:
+        name: pytorch android gradle custom build single architecture (for PR)
+        no_output_timeout: "1h"
+        command: |
+          set -e
+          # Unlike other gradle jobs, it's not worth building libtorch in a separate CI job and share via docker, because:
+          # 1) Not shareable: it's custom selective build, which is different from default libtorch mobile build;
+          # 2) Not parallelizable by architecture: it only builds libtorch for one architecture;
+
+          echo "DOCKER_IMAGE: ${DOCKER_IMAGE}:${DOCKER_TAG}"
+          time docker pull ${DOCKER_IMAGE}:${DOCKER_TAG} >/dev/null
+
+          git submodule sync && git submodule update -q --init --recursive --depth 1 --jobs 0
+          VOLUME_MOUNTS="-v /home/circleci/project/:/var/lib/jenkins/workspace"
+          export id=$(docker run --env-file "${BASH_ENV}" ${VOLUME_MOUNTS} --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}:${DOCKER_TAG})
+
+          export COMMAND='((echo "export GRADLE_OFFLINE=1" && echo "export BUILD_LITE_INTERPRETER=${BUILD_LITE_INTERPRETER}" && echo "sudo chown -R jenkins workspace && cd workspace && ./.circleci/scripts/build_android_gradle.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
+
+          # Skip docker push as this job is purely for size analysis purpose.
+          # Result binaries are already in `/home/circleci/project/` as it's mounted instead of copied.
+
+    - upload_binary_size_for_android_build:
+        build_type: custom-build-single
+
  pytorch_ios_build:
    <<: *pytorch_ios_params
    macos:
@ -484,7 +521,6 @@
            echo "IOS_PLATFORM: ${IOS_PLATFORM}"
            echo "USE_PYTORCH_METAL": "${USE_METAL}"
            echo "BUILD_LITE_INTERPRETER": "${BUILD_LITE_INTERPRETER}"
-            echo "USE_COREML_DELEGATE": "${USE_COREML_DELEGATE}"

            #check the custom build flag
            echo "SELECTED_OP_LIST: ${SELECTED_OP_LIST}"
@ -493,7 +529,6 @@
            fi
            export IOS_ARCH=${IOS_ARCH}
            export IOS_PLATFORM=${IOS_PLATFORM}
-            export USE_COREML_DELEGATE=${USE_COREML_DELEGATE}
            if [ ${IOS_PLATFORM} != "SIMULATOR" ]; then
              export USE_PYTORCH_METAL=${USE_METAL}
            fi
@ -533,32 +568,20 @@
            PROJ_ROOT=/Users/distiller/project
            source ~/anaconda/bin/activate
            # use the pytorch nightly build to generate models
-            pip3 install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
+            conda install pytorch torchvision -c pytorch-nightly --yes
            # generate models for differnet backends
            cd ${PROJ_ROOT}/ios/TestApp/benchmark
            mkdir -p ../models
-            if [ ${USE_COREML_DELEGATE} == 1 ]; then
-              pip install coremltools==5.0b5
-              pip install six
-              python coreml_backend.py
-            else
-              python trace_model.py
-            fi
+            python trace_model.py
            if [ ${BUILD_LITE_INTERPRETER} == 1 ]; then
-              echo "Setting up the TestApp for LiteInterpreter"
              ruby setup.rb --lite 1
            else
-              echo "Setting up the TestApp for Full JIT"
              ruby setup.rb
            fi
            cd ${PROJ_ROOT}/ios/TestApp
            instruments -s -devices
            if [ ${BUILD_LITE_INTERPRETER} == 1 ]; then
-              if [ ${USE_COREML_DELEGATE} == 1 ]; then
-                fastlane scan --only_testing TestAppTests/TestAppTests/testCoreML
-              else
-                fastlane scan --only_testing TestAppTests/TestAppTests/testLiteInterpreter
-              fi
+              fastlane scan --only_testing TestAppTests/TestAppTests/testLiteInterpreter
            else
              fastlane scan --only_testing TestAppTests/TestAppTests/testFullJIT
            fi
@ -660,7 +683,7 @@
  pytorch_doc_test:
    environment:
      BUILD_ENVIRONMENT: pytorch-doc-test
-      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.7-gcc5.4"
+      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4"
    resource_class: medium
    machine:
      image: ubuntu-2004:202104-01
--- a/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml
+++ b/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml
@ -0,0 +1,400 @@
+jobs:
+  pytorch_linux_build:
+    <<: *pytorch_params
+    machine:
+      image: ubuntu-2004:202104-01
+    steps:
+    # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml
+    - checkout
+    - calculate_docker_image_tag
+    - setup_linux_system_environment
+    - optional_merge_target_branch
+    - setup_ci_environment
+    - run:
+        name: Build
+        no_output_timeout: "1h"
+        command: |
+          set -e
+          if [[ ${BUILD_ENVIRONMENT} == *"pure_torch"* ]]; then
+            echo 'BUILD_CAFFE2=OFF' >> "${BASH_ENV}"
+          fi
+          if [[ ${BUILD_ENVIRONMENT} == *"paralleltbb"* ]]; then
+            echo 'ATEN_THREADING=TBB' >> "${BASH_ENV}"
+            echo 'USE_TBB=1' >> "${BASH_ENV}"
+          elif [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
+            echo 'ATEN_THREADING=NATIVE' >> "${BASH_ENV}"
+          fi
+          echo "Parallel backend flags: "${PARALLEL_FLAGS}
+          # Pull Docker image and run build
+          echo "DOCKER_IMAGE: "${DOCKER_IMAGE}:${DOCKER_TAG}
+          time docker pull ${DOCKER_IMAGE}:${DOCKER_TAG} >/dev/null
+          export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}:${DOCKER_TAG})
+
+          git submodule sync && git submodule update -q --init --recursive --depth 1 --jobs 0
+
+          docker cp /home/circleci/project/. $id:/var/lib/jenkins/workspace
+
+          export COMMAND='((echo "sudo chown -R jenkins workspace && export JOB_BASE_NAME="$CIRCLE_JOB" && cd workspace && .jenkins/pytorch/build.sh && find ${BUILD_ROOT} -type f -name "*.a" -or -name "*.o" -delete") | docker exec -u jenkins -i "$id" bash) 2>&1'
+
+          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
+
+          # Copy dist folder back
+          docker cp $id:/var/lib/jenkins/workspace/dist /home/circleci/project/. || echo "Dist folder not found"
+
+          # Push intermediate Docker image for next phase to use
+          if [ -z "${BUILD_ONLY}" ]; then
+            # Note [Special build images]
+            # The xla build uses the same docker image as
+            # pytorch_linux_bionic_py3_6_clang9_build. In the push step, we have to
+            # distinguish between them so the test can pick up the correct image.
+            output_image=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}
+            if [[ ${BUILD_ENVIRONMENT} == *"xla"* ]]; then
+              export COMMIT_DOCKER_IMAGE=$output_image-xla
+            elif [[ ${BUILD_ENVIRONMENT} == *"libtorch"* ]]; then
+              export COMMIT_DOCKER_IMAGE=$output_image-libtorch
+            elif [[ ${BUILD_ENVIRONMENT} == *"paralleltbb"* ]]; then
+              export COMMIT_DOCKER_IMAGE=$output_image-paralleltbb
+            elif [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
+              export COMMIT_DOCKER_IMAGE=$output_image-parallelnative
+            elif [[ ${BUILD_ENVIRONMENT} == *"android-ndk-r19c-x86_64"* ]]; then
+              export COMMIT_DOCKER_IMAGE=$output_image-android-x86_64
+            elif [[ ${BUILD_ENVIRONMENT} == *"android-ndk-r19c-arm-v7a"* ]]; then
+              export COMMIT_DOCKER_IMAGE=$output_image-android-arm-v7a
+            elif [[ ${BUILD_ENVIRONMENT} == *"android-ndk-r19c-arm-v8a"* ]]; then
+              export COMMIT_DOCKER_IMAGE=$output_image-android-arm-v8a
+            elif [[ ${BUILD_ENVIRONMENT} == *"android-ndk-r19c-x86_32"* ]]; then
+              export COMMIT_DOCKER_IMAGE=$output_image-android-x86_32
+            elif [[ ${BUILD_ENVIRONMENT} == *"android-ndk-r19c-vulkan-x86_32"* ]]; then
+              export COMMIT_DOCKER_IMAGE=$output_image-android-vulkan-x86_32
+            elif [[ ${BUILD_ENVIRONMENT} == *"vulkan-linux"* ]]; then
+              export COMMIT_DOCKER_IMAGE=$output_image-vulkan
+            else
+              export COMMIT_DOCKER_IMAGE=$output_image
+            fi
+            docker commit "$id" ${COMMIT_DOCKER_IMAGE}
+            time docker push ${COMMIT_DOCKER_IMAGE}
+          fi
+    - run:
+        name: upload build & binary data
+        no_output_timeout: "5m"
+        command: |
+            cd /pytorch && export COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
+            python3 -mpip install requests && \
+            SCRIBE_GRAPHQL_ACCESS_TOKEN=${SCRIBE_GRAPHQL_ACCESS_TOKEN} \
+            python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
+    - store_artifacts:
+        path: /home/circleci/project/dist
+
+  pytorch_linux_test:
+    <<: *pytorch_params
+    machine:
+      image: ubuntu-2004:202104-01
+    steps:
+    # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml
+    - checkout
+    - calculate_docker_image_tag
+    - setup_linux_system_environment
+    - setup_ci_environment
+    - run:
+        name: Download Docker image
+        no_output_timeout: "90m"
+        command: |
+          set -e
+          export PYTHONUNBUFFERED=1
+          if [[ "${DOCKER_IMAGE}" == *rocm3.9* ]]; then
+            export DOCKER_TAG="f3d89a32912f62815e4feaeed47e564e887dffd6"
+          fi
+          # See Note [Special build images]
+          output_image=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}
+          if [[ ${BUILD_ENVIRONMENT} == *"xla"* ]]; then
+            export COMMIT_DOCKER_IMAGE=$output_image-xla
+          elif [[ ${BUILD_ENVIRONMENT} == *"libtorch"* ]]; then
+            export COMMIT_DOCKER_IMAGE=$output_image-libtorch
+          elif [[ ${BUILD_ENVIRONMENT} == *"paralleltbb"* ]]; then
+            export COMMIT_DOCKER_IMAGE=$output_image-paralleltbb
+          elif [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
+            export COMMIT_DOCKER_IMAGE=$output_image-parallelnative
+          elif [[ ${BUILD_ENVIRONMENT} == *"vulkan-linux"* ]]; then
+            export COMMIT_DOCKER_IMAGE=$output_image-vulkan
+          else
+            export COMMIT_DOCKER_IMAGE=$output_image
+          fi
+          echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
+
+          if [[ ${BUILD_ENVIRONMENT} == *"paralleltbb"* ]]; then
+            echo 'ATEN_THREADING=TBB' >> "${BASH_ENV}"
+            echo 'USE_TBB=1' >> "${BASH_ENV}"
+          elif [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
+            echo 'ATEN_THREADING=NATIVE' >> "${BASH_ENV}"
+          fi
+          echo "Parallel backend flags: "${PARALLEL_FLAGS}
+
+          time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null
+
+          # TODO: Make this less painful
+          if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then
+            export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --gpus all --shm-size=2g -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
+          elif [[ ${BUILD_ENVIRONMENT} == *"rocm"* ]]; then
+            hostname
+            export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=8g --ipc=host --device /dev/kfd --device /dev/dri --group-add video -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
+          else
+            export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=1g --ipc=host -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
+          fi
+          echo "id=${id}" >> "${BASH_ENV}"
+
+    - run:
+        name: Check for no AVX instruction by default
+        no_output_timeout: "20m"
+        command: |
+          set -e
+          is_vanilla_build() {
+            if [ "${BUILD_ENVIRONMENT}" == "pytorch-linux-bionic-py3.6-clang9-test" ]; then
+              return 0
+            fi
+            if [ "${BUILD_ENVIRONMENT}" == "pytorch-linux-xenial-py3.6-gcc5.4-test" ]; then
+              return 0
+            fi
+            return 1
+          }
+
+          if is_vanilla_build; then
+            echo "apt-get update || apt-get install libgnutls30" | docker exec -u root -i "$id" bash
+            echo "apt-get install -y qemu-user gdb" | docker exec -u root -i "$id" bash
+            echo "cd workspace/build; qemu-x86_64 -g 2345 -cpu Broadwell -E ATEN_CPU_CAPABILITY=default ./bin/basic --gtest_filter=BasicTest.BasicTestCPU & gdb ./bin/basic -ex 'set pagination off' -ex 'target remote :2345' -ex 'continue' -ex 'bt' -ex='set confirm off' -ex 'quit \$_isvoid(\$_exitcode)'" | docker exec -u jenkins -i "$id" bash
+          else
+            echo "Skipping for ${BUILD_ENVIRONMENT}"
+          fi
+    - run:
+        name: Test
+        no_output_timeout: "90m"
+        command: |
+          set -e
+
+          cat >docker_commands.sh \<<EOL
+          # =================== The following code will be executed inside Docker container ===================
+          set -ex
+          export SCRIBE_GRAPHQL_ACCESS_TOKEN="${SCRIBE_GRAPHQL_ACCESS_TOKEN}"
+          export JOB_BASE_NAME="$CIRCLE_JOB"
+          # temporary fix for https://github.com/pytorch/pytorch/issues/60746
+          if [ -z "$CIRCLE_PR_NUMBER" ]; then
+            if [[ $CIRCLE_BRANCH =~ .*pull.* ]]; then
+              export PR_NUMBER="$(echo $CIRCLE_BRANCH | sed 's/[^0-9]//g')"
+              export CIRCLE_PR_NUMBER="$PR_NUMBER"
+            fi
+          else
+            export PR_NUMBER="$CIRCLE_PR_NUMBER"
+          fi
+          ${PARALLEL_FLAGS}
+          cd workspace
+          EOL
+          if [[ ${BUILD_ENVIRONMENT} == *"multigpu"* ]]; then
+            echo ".jenkins/pytorch/multigpu-test.sh" >> docker_commands.sh
+          elif [[ ${BUILD_ENVIRONMENT} == *onnx* ]]; then
+            echo "pip install click mock tabulate networkx==2.0" >> docker_commands.sh
+            echo "pip -q install --user \"file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx\"" >> docker_commands.sh
+            echo ".jenkins/caffe2/test.sh" >> docker_commands.sh
+          else
+            echo ".jenkins/pytorch/test.sh" >> docker_commands.sh
+          fi
+          echo "(cat docker_commands.sh | docker exec -u jenkins -i "$id" bash) 2>&1" > command.sh
+          unbuffer bash command.sh | ts
+
+          if [[ ${BUILD_ENVIRONMENT} == *"coverage"* ]]; then
+              echo "Retrieving C++ coverage report"
+              docker cp $id:/var/lib/jenkins/workspace/build/coverage.info ./test
+          fi
+          if [[ ${BUILD_ENVIRONMENT} == *"coverage"* || ${BUILD_ENVIRONMENT} == *"onnx"* ]]; then
+              echo "Retrieving Python coverage report"
+              docker cp $id:/var/lib/jenkins/workspace/test/.coverage ./test
+              docker cp $id:/var/lib/jenkins/workspace/test/coverage.xml ./test
+              python3 -mpip install codecov
+              python3 -mcodecov
+          fi
+    - run:
+        name: Report results
+        no_output_timeout: "5m"
+        command: |
+          set -e
+          # Retrieving test results should be done as very first step as command never fails
+          # But is always executed if previous step fails for some reason
+          echo "Retrieving test reports"
+          docker cp $id:/var/lib/jenkins/workspace/test/test-reports ./ || echo 'No test reports found!'
+          docker stats --all --no-stream
+
+          cat >docker_commands.sh \<<EOL
+          # =================== The following code will be executed inside Docker container ===================
+          set -ex
+          export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}
+          export SCRIBE_GRAPHQL_ACCESS_TOKEN="${SCRIBE_GRAPHQL_ACCESS_TOKEN}"
+          export CIRCLE_TAG="${CIRCLE_TAG:-}"
+          export CIRCLE_SHA1="$CIRCLE_SHA1"
+          export CIRCLE_PR_NUMBER="${CIRCLE_PR_NUMBER:-}"
+          export CIRCLE_BRANCH="$CIRCLE_BRANCH"
+          export JOB_BASE_NAME="$CIRCLE_JOB"
+          export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"
+          cd workspace
+          python -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
+          EOL
+          echo "(cat docker_commands.sh | docker exec -u jenkins -e LANG=C.UTF-8 -i "$id" bash) 2>&1" > command.sh
+          unbuffer bash command.sh | ts
+        when: always
+    - store_test_results:
+        path: test-reports
+    - store_artifacts:
+        path: test/.coverage
+    - store_artifacts:
+        path: test/coverage.xml
+
+  pytorch_windows_build:
+    <<: *pytorch_windows_params
+    parameters:
+      executor:
+        type: string
+        default: "windows-xlarge-cpu-with-nvidia-cuda"
+      build_environment:
+        type: string
+        default: ""
+      test_name:
+        type: string
+        default: ""
+      cuda_version:
+        type: string
+        default: "10.1"
+      python_version:
+        type: string
+        default: "3.8"
+      vs_version:
+        type: string
+        default: "16.8.6"
+      vc_version:
+        type: string
+        default: "14.16"
+      vc_year:
+        type: string
+        default: "2019"
+      vc_product:
+        type: string
+        default: "BuildTools"
+      use_cuda:
+        type: string
+        default: ""
+    executor: <<parameters.executor>>
+    steps:
+      - checkout
+      - run:
+          name: Install VS2019 toolchain
+          no_output_timeout: 10m
+          command: |
+              powershell .circleci/scripts/vs_install.ps1
+      - run:
+          name: Install Cuda
+          no_output_timeout: 30m
+          command: |
+            if [[ "${USE_CUDA}" == "1" ]]; then
+              .circleci/scripts/windows_cuda_install.sh
+            fi
+      - run:
+          name: Install Cudnn
+          command : |
+            if [[ "${USE_CUDA}" == "1" ]]; then
+              .circleci/scripts/windows_cudnn_install.sh
+            fi
+      - run:
+          name: Build
+          no_output_timeout: "90m"
+          command: |
+            set -e
+            set +x
+            export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_WIN_BUILD_V1}
+            export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_WIN_BUILD_V1}
+            set -x
+            .jenkins/pytorch/win-build.sh
+      - persist_to_workspace:
+          root: "C:/w"
+          paths: build-results
+      - store_artifacts:
+          path: C:/w/build-results
+
+  pytorch_windows_test:
+    <<: *pytorch_windows_params
+    parameters:
+      executor:
+        type: string
+        default: "windows-medium-cpu-with-nvidia-cuda"
+      build_environment:
+        type: string
+        default: ""
+      test_name:
+        type: string
+        default: ""
+      cuda_version:
+        type: string
+        default: "10.1"
+      python_version:
+        type: string
+        default: "3.8"
+      vs_version:
+        type: string
+        default: "16.8.6"
+      vc_version:
+        type: string
+        default: "14.16"
+      vc_year:
+        type: string
+        default: "2019"
+      vc_product:
+        type: string
+        default: "BuildTools"
+      use_cuda:
+        type: string
+        default: ""
+    executor: <<parameters.executor>>
+    steps:
+      - checkout
+      - attach_workspace:
+          at: c:/users/circleci/workspace
+      - run:
+          name: Install VS2019 toolchain
+          no_output_timeout: 10m
+          command: |
+              powershell .circleci/scripts/vs_install.ps1
+      - run:
+          name: Install Cuda
+          no_output_timeout: 30m
+          command: |
+            if [[ "${CUDA_VERSION}" != "cpu" ]]; then
+              if [[ "${CUDA_VERSION}" != "10" || "${JOB_EXECUTOR}" != "windows-with-nvidia-gpu" ]]; then
+                .circleci/scripts/windows_cuda_install.sh
+              fi
+            fi
+      - run:
+          name: Install Cudnn
+          command : |
+            if [[ "${CUDA_VERSION}" != "cpu" ]]; then
+              .circleci/scripts/windows_cudnn_install.sh
+            fi
+      - run:
+          name: Test
+          no_output_timeout: "30m"
+          command: |
+            set -e
+            export IN_CI=1
+            set +x
+            export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_WIN_BUILD_V1}
+            export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_WIN_BUILD_V1}
+            set -x
+            .jenkins/pytorch/win-test.sh
+      - run:
+          name: Report results
+          no_output_timeout: "5m"
+          command: |
+            set -ex
+            export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_WIN_BUILD_V1}
+            export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_WIN_BUILD_V1}
+            pip install typing_extensions boto3
+            python -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
+          when: always
+      - store_test_results:
+          path: test/test-reports
+      - store_artifacts:
+          path: test/coverage.xml
--- a/.circleci/verbatim-sources/nightly-binary-build-defaults.yml
+++ b/.circleci/verbatim-sources/nightly-binary-build-defaults.yml
@ -26,7 +26,6 @@
 # (smoke tests and upload jobs do not need the pytorch repo).
 binary_checkout: &binary_checkout
  name: Checkout pytorch/builder repo
-  no_output_timeout: "30m"
  command: .circleci/scripts/binary_checkout.sh

 # Parses circleci arguments in a consistent way, essentially routing to the
--- a/.circleci/verbatim-sources/workflows/workflows-ecr-gc.yml
+++ b/.circleci/verbatim-sources/workflows/workflows-ecr-gc.yml
@ -0,0 +1,34 @@
+  ecr_gc:
+    triggers:
+      - schedule:
+          cron: "45 * * * *"
+          filters:
+            branches:
+              only:
+                - master
+    jobs:
+      - docker_for_ecr_gc_build_job
+      - ecr_gc_job:
+            name: ecr_gc_job_for_pytorch
+            project: pytorch
+            tags_to_keep: "271,262,256,278,282,291,300,323,327,347,389,401,402,403,405,a8006f9a-272d-4478-b137-d121c6f05c83,6e7b11da-a919-49e5-b2ba-da66e3d4bb0a,f990c76a-a798-42bb-852f-5be5006f8026,e43973a9-9d5a-4138-9181-a08a0fc55e2f,8fcf46ef-4a34-480b-a8ee-b0a30a4d3e59,9a3986fa-7ce7-4a36-a001-3c9bef9892e2,1bc00f11-e0f3-4e5c-859f-15937dd938cd,209062ef-ab58-422a-b295-36c4eed6e906,be76e8fd-44e2-484d-b090-07e0cc3a56f0,fff7795428560442086f7b2bb6004b65245dc11a,ab1632df-fa59-40e6-8c23-98e004f61148"
+            requires:
+              - docker_for_ecr_gc_build_job
+      - ecr_gc_job:
+            name: ecr_gc_job_for_caffe2
+            project: caffe2
+            tags_to_keep: "376,373,369,348,345,336,325,324,315,306,301,287,283,276,273,266,253,248,238,230,213"
+            requires:
+              - docker_for_ecr_gc_build_job
+      - ecr_gc_job:
+            name: ecr_gc_job_for_translate
+            project: translate
+            tags_to_keep: "8"
+            requires:
+              - docker_for_ecr_gc_build_job
+      - ecr_gc_job:
+            name: ecr_gc_job_for_tensorcomp
+            project: tensorcomp
+            tags_to_keep: "34"
+            requires:
+              - docker_for_ecr_gc_build_job
--- a/.circleci/verbatim-sources/workflows/workflows-scheduled-ci.yml
+++ b/.circleci/verbatim-sources/workflows/workflows-scheduled-ci.yml
@ -0,0 +1,37 @@
+  # the following clones pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7's tests but enables
+  # slow tests and sets an environment variable so gradcheck runs with fast_mode=False
+  slow-gradcheck-scheduled-ci:
+    triggers:
+      - schedule:
+          # runs every 8 hours on the 45th minute
+          cron: "45 0,8,16 * * *"
+          filters:
+            branches:
+              only:
+                - master
+    jobs:
+      - docker_build_job:
+          name: "docker-pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
+          image_name: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
+      - pytorch_linux_build:
+          name: periodic_pytorch_xenial_cuda10_2_cudnn7_gcc7_build
+          requires:
+            - "docker-pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
+          build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-build"
+          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
+      - pytorch_linux_test:
+          name: periodic_pytorch_xenial_cuda10_2_cudnn7_gcc7_old_gradcheck_test1
+          requires:
+            - periodic_pytorch_xenial_cuda10_2_cudnn7_gcc7_build
+          build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-old-gradcheck-test1"
+          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
+          use_cuda_docker_runtime: "1"
+          resource_class: gpu.medium
+      - pytorch_linux_test:
+          name: periodic_pytorch_xenial_cuda10_2_cudnn7_gcc7_old_gradcheck_test2
+          requires:
+            - periodic_pytorch_xenial_cuda10_2_cudnn7_gcc7_build
+          build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-old-gradcheck-test2"
+          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
+          use_cuda_docker_runtime: "1"
+          resource_class: gpu.medium
--- a/.clang-tidy
+++ b/.clang-tidy
@ -33,12 +33,11 @@ modernize-*,
 -modernize-use-default-member-init,
 -modernize-use-using,
 -modernize-use-trailing-return-type,
-modernize-use-nodiscard,
 performance-*,
 -performance-noexcept-move-constructor,
 -performance-unnecessary-value-param,
 '
-HeaderFilterRegex: 'torch/csrc/(?!deploy/interpreter/cpython).*'
+HeaderFilterRegex: 'torch/csrc/.*'
 AnalyzeTemporaryDtors: false
 WarningsAsErrors: '*'
 CheckOptions:
--- a/.flake8
+++ b/.flake8
@ -16,6 +16,7 @@ per-file-ignores = __init__.py: F401 torch/utils/cpp_extension.py: B950
 optional-ascii-coding = True
 exclude =
    ./.git,
+    ./build_code_analyzer,
    ./build_test_custom_build,
    ./build,
    ./caffe2,
--- a/.github/ISSUE_TEMPLATE/bug-report.md
+++ b/.github/ISSUE_TEMPLATE/bug-report.md
@ -0,0 +1,49 @@
+---
+name: "\U0001F41B Bug Report"
+about: Submit a bug report to help us improve PyTorch
+
+---
+
+## 🐛 Bug
+
+<!-- A clear and concise description of what the bug is. -->
+
+## To Reproduce
+
+Steps to reproduce the behavior:
+
+1.
+1.
+1.
+
+<!-- If you have a code sample, error messages, stack traces, please provide it here as well -->
+
+## Expected behavior
+
+<!-- A clear and concise description of what you expected to happen. -->
+
+## Environment
+
+Please copy and paste the output from our
+[environment collection script](https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py)
+(or fill out the checklist below manually).
+
+You can get the script and run it with:
+```
+wget https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py
+# For security purposes, please check the contents of collect_env.py before running it.
+python collect_env.py
+```
+
+ - PyTorch Version (e.g., 1.0):
+ - OS (e.g., Linux):
+ - How you installed PyTorch (`conda`, `pip`, source):
+ - Build command you used (if compiling from source):
+ - Python version:
+ - CUDA/cuDNN version:
+ - GPU models and configuration:
+ - Any other relevant information:
+
+## Additional context
+
+<!-- Add any other context about the problem here. -->
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@ -1,56 +0,0 @@
-name: 🐛 Bug Report
-description: Create a report to help us reproduce and fix the bug
-
-body:
- type: markdown
-  attributes:
-    value: >
-      #### Before submitting a bug, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/pytorch/pytorch/issues?q=is%3Aissue+sort%3Acreated-desc+).
- type: textarea
-  attributes:
-    label: 🐛 Describe the bug
-    description: |
-      Please provide a clear and concise description of what the bug is.
-
-      If relevant, add a minimal example so that we can reproduce the error by running the code. It is very important for the snippet to be as succinct (minimal) as possible, so please take time to trim down any irrelevant code to help us debug efficiently. We are going to copy-paste your code and we expect to get the same result as you did: avoid any external data, and include the relevant imports, etc. For example:
-
-      ```python
-      # All necessary imports at the beginning
-      import torch
-
-      # A succinct reproducing example trimmed down to the essential parts:
-      t = torch.rand(5, 10)  # Note: the bug is here, we should pass requires_grad=True
-      t.sum().backward()
-      ```
-
-      If the code is too long (hopefully, it isn't), feel free to put it in a public gist and link it in the issue: https://gist.github.com.
-
-      Please also paste or describe the results you observe instead of the expected results. If you observe an error, please paste the error message including the **full** traceback of the exception. It may be relevant to wrap error messages in ```` ```triple quotes blocks``` ````.
-    placeholder: |
-      A clear and concise description of what the bug is.
-
-      ```python
-      # Sample code to reproduce the problem
-      ```
-
-      ```
-      The error message you got, with the full traceback.
-      ```
-  validations:
-    required: true
- type: textarea
-  attributes:
-    label: Versions
-    description: |
-      Please run the following and paste the output below.
-      ```sh
-      wget https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py
-      # For security purposes, please check the contents of collect_env.py before running it.
-      python collect_env.py
-      ```
-  validations:
-    required: true
- type: markdown
-  attributes:
-    value: >
-      Thanks for contributing 🎉!
--- a/.github/ISSUE_TEMPLATE/ci-sev.md
+++ b/.github/ISSUE_TEMPLATE/ci-sev.md
@ -1,39 +0,0 @@
---
-name: "⚠️CI SEV"
-about: Tracking incidents for PyTorch's CI infra.
---
-
-> NOTE: Remember to label this issue with "`ci: sev`"
-
-## Current Status
-*Status could be: preemptive, ongoing, mitigated, closed. Also tell people if they need to take action to fix it (i.e. rebase)*.
-
-## Error looks like
-*Provide some way users can tell that this SEV is causing their issue.*
-
-## Incident timeline (all times pacific)
-*Include when the incident began, when it was detected, mitigated, root caused, and finally closed.*
-
-<details>
-<summary> Click for example </summary>
-
-e.g.
- 10/30 7:27a incident began
- 10/30 8:30a detected by <method>
- 10/30 9:00 pm root caused as…
- 10/30 9:10 pm mitigated by…
- 10/31 10: am closed by…
-
-</details>
-
-## User impact
-*How does this affect users of PyTorch CI?*
-
-## Root cause
-*What was the root cause of this issue?*
-
-## Mitigation
-*How did we mitigate the issue?*
-
-## Prevention/followups
-*How do we prevent issues like this in the future?*
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@ -1,5 +0,0 @@
-blank_issues_enabled: true
-contact_links:
-  - name: Questions
-    url: https://discuss.pytorch.org/
-    about: Ask questions and discuss with other pytorch community members
--- a/.github/ISSUE_TEMPLATE/documentation.md
+++ b/.github/ISSUE_TEMPLATE/documentation.md
@ -0,0 +1,9 @@
+---
+name: "\U0001F4DA Documentation"
+about: Report an issue related to https://pytorch.org/docs
+
+---
+
+## 📚 Documentation
+
+<!-- A clear and concise description of what content in https://pytorch.org/docs is an issue. If this has to do with the general https://pytorch.org website, please file an issue at https://github.com/pytorch/pytorch.github.io/issues/new/choose instead. If this has to do with https://pytorch.org/tutorials, please file an issue at https://github.com/pytorch/tutorials/issues/new -->
--- a/.github/ISSUE_TEMPLATE/documentation.yml
+++ b/.github/ISSUE_TEMPLATE/documentation.yml
@ -1,20 +0,0 @@
-name: 📚 Documentation
-description: Report an issue related to https://pytorch.org/docs/stable/index.html
-
-body:
- type: textarea
-  attributes:
-    label: 📚 The doc issue
-    description: >
-      A clear and concise description of what content in https://pytorch.org/docs/stable/index.html is an issue. If this has to do with the general https://pytorch.org website, please file an issue at https://github.com/pytorch/pytorch.github.io/issues/new/choose instead. If this has to do with https://pytorch.org/tutorials, please file an issue at https://github.com/pytorch/tutorials/issues/new.
-  validations:
-    required: true
- type: textarea
-  attributes:
-    label: Suggest a potential alternative/fix
-    description: >
-      Tell us how we could improve the documentation in this regard.
- type: markdown
-  attributes:
-    value: >
-      Thanks for contributing 🎉!
--- a/.github/ISSUE_TEMPLATE/feature-request.md
+++ b/.github/ISSUE_TEMPLATE/feature-request.md
@ -0,0 +1,24 @@
+---
+name: "\U0001F680 Feature Request"
+about: Submit a proposal/request for a new PyTorch feature
+
+---
+
+## 🚀 Feature
+<!-- A clear and concise description of the feature proposal -->
+
+## Motivation
+
+<!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too -->
+
+## Pitch
+
+<!-- A clear and concise description of what you want to happen. -->
+
+## Alternatives
+
+<!-- A clear and concise description of any alternative solutions or features you've considered, if any. -->
+
+## Additional context
+
+<!-- Add any other context or screenshots about the feature request here. -->
--- a/.github/ISSUE_TEMPLATE/feature-request.yml
+++ b/.github/ISSUE_TEMPLATE/feature-request.yml
@ -1,25 +0,0 @@
-name: 🚀 Feature request
-description: Submit a proposal/request for a new pytorch feature
-
-body:
- type: textarea
-  attributes:
-    label: 🚀 The feature, motivation and pitch
-    description: >
-      A clear and concise description of the feature proposal. Please outline the motivation for the proposal. Is your feature request related to a specific problem? e.g., *"I'm working on X and would like Y to be possible"*. If this is related to another GitHub issue, please link here too.
-  validations:
-    required: true
- type: textarea
-  attributes:
-    label: Alternatives
-    description: >
-      A description of any alternative solutions or features you've considered, if any.
- type: textarea
-  attributes:
-    label: Additional context
-    description: >
-      Add any other context or screenshots about the feature request.
- type: markdown
-  attributes:
-    value: >
-      Thanks for contributing 🎉!
--- a/.github/ISSUE_TEMPLATE/questions-help-support.md
+++ b/.github/ISSUE_TEMPLATE/questions-help-support.md
@ -0,0 +1,13 @@
+---
+name: "❓Questions/Help/Support"
+about: Do you need support? We have resources.
+
+---
+
+## ❓ Questions and Help
+
+### Please note that this issue tracker is not a help form and this issue will be closed.
+
+We have a set of [listed resources available on the website](https://pytorch.org/resources). Our primary means of support is our discussion forum:
+
+- [Discussion Forum](https://discuss.pytorch.org/)
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@ -1 +1 @@
-Fixes #ISSUE_NUMBER
+Fixes #{issue number}
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@ -1,9 +1,6 @@
 self-hosted-runner:
  labels:
-    - linux.large
    - linux.2xlarge
-    - linux.4xlarge
-    - linux.4xlarge.nvidia.gpu
    - linux.8xlarge.nvidia.gpu
    - linux.16xlarge.nvidia.gpu
    - windows.4xlarge
--- a/.github/generated-ciflow-ruleset.json
+++ b/.github/generated-ciflow-ruleset.json
@ -2,264 +2,100 @@
  "__comment": "@generated DO NOT EDIT MANUALLY, Generation script: .github/scripts/generate_ci_workflows.py",
  "label_rules": {
    "ciflow/all": [
-      "caffe2-linux-xenial-py3.7-gcc5.4",
-      "docker-builds",
-      "ios-12-5-1-arm64",
-      "ios-12-5-1-arm64-coreml",
-      "ios-12-5-1-arm64-custom-ops",
-      "ios-12-5-1-arm64-full-jit",
-      "ios-12-5-1-arm64-metal",
-      "ios-12-5-1-x86-64",
-      "ios-12-5-1-x86-64-coreml",
-      "ios-12-5-1-x86-64-full-jit",
-      "libtorch-linux-xenial-cuda10.2-py3.7-gcc7",
-      "libtorch-linux-xenial-cuda11.3-py3.7-gcc7",
+      "libtorch-linux-xenial-cuda10.2-py3.6-gcc7",
+      "libtorch-linux-xenial-cuda11.3-py3.6-gcc7",
      "linux-bionic-cuda10.2-py3.9-gcc7",
-      "linux-bionic-py3.7-clang9",
-      "linux-docs",
-      "linux-docs-push",
-      "linux-vulkan-bionic-py3.7-clang9",
-      "linux-xenial-cuda11.3-py3.7-gcc7",
-      "linux-xenial-cuda11.3-py3.7-gcc7-bazel-test",
-      "linux-xenial-cuda11.3-py3.7-gcc7-no-ops",
-      "linux-xenial-py3-clang5-mobile-build",
-      "linux-xenial-py3-clang5-mobile-custom-build-static",
-      "linux-xenial-py3.7-clang7-asan",
-      "linux-xenial-py3.7-clang7-onnx",
-      "linux-xenial-py3.7-gcc5.4",
-      "linux-xenial-py3.7-gcc7",
-      "linux-xenial-py3.7-gcc7-no-ops",
-      "macos-10-15-py3-arm64",
-      "macos-10-15-py3-lite-interpreter-x86-64",
-      "macos-11-py3-x86-64",
-      "parallelnative-linux-xenial-py3.7-gcc5.4",
-      "periodic-libtorch-linux-bionic-cuda11.5-py3.7-gcc7",
-      "periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7",
-      "periodic-linux-bionic-cuda11.5-py3.7-gcc7",
-      "periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck",
-      "periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug",
+      "linux-bionic-py3.6-clang9",
+      "linux-bionic-py3.8-gcc9-coverage",
+      "linux-xenial-cuda10.2-py3.6-gcc7",
+      "linux-xenial-cuda11.3-py3.6-gcc7",
+      "linux-xenial-py3.6-gcc5.4",
+      "linux-xenial-py3.6-gcc7-bazel-test",
+      "parallelnative-linux-xenial-py3.6-gcc5.4",
+      "periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7",
+      "periodic-linux-xenial-cuda11.1-py3.6-gcc7",
      "periodic-win-vs2019-cuda11.1-py3",
-      "periodic-win-vs2019-cuda11.5-py3",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-build",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit",
+      "puretorch-linux-xenial-py3.6-gcc5.4",
      "win-vs2019-cpu-py3",
+      "win-vs2019-cuda10.2-py3",
      "win-vs2019-cuda11.3-py3"
    ],
-    "ciflow/android": [
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-build",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit"
-    ],
    "ciflow/bazel": [
-      "linux-xenial-cuda11.3-py3.7-gcc7-bazel-test"
+      "linux-xenial-py3.6-gcc7-bazel-test"
    ],
-    "ciflow/binaries": [
-      "linux-binary-conda",
-      "linux-binary-libtorch-cxx11-abi",
-      "linux-binary-libtorch-pre-cxx11",
-      "linux-binary-manywheel"
-    ],
-    "ciflow/binaries/conda": [
-      "linux-binary-conda"
-    ],
-    "ciflow/binaries/libtorch": [
-      "linux-binary-libtorch-cxx11-abi",
-      "linux-binary-libtorch-pre-cxx11"
-    ],
-    "ciflow/binaries/wheel": [
-      "linux-binary-manywheel"
+    "ciflow/coverage": [
+      "linux-bionic-py3.8-gcc9-coverage"
    ],
    "ciflow/cpu": [
-      "caffe2-linux-xenial-py3.7-gcc5.4",
-      "linux-bionic-py3.7-clang9",
-      "linux-docs",
-      "linux-docs-push",
-      "linux-vulkan-bionic-py3.7-clang9",
-      "linux-xenial-cuda11.3-py3.7-gcc7-bazel-test",
-      "linux-xenial-py3.7-clang7-asan",
-      "linux-xenial-py3.7-clang7-onnx",
-      "linux-xenial-py3.7-gcc5.4",
-      "linux-xenial-py3.7-gcc7",
-      "linux-xenial-py3.7-gcc7-no-ops",
-      "parallelnative-linux-xenial-py3.7-gcc5.4",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-build",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit",
+      "linux-bionic-py3.6-clang9",
+      "linux-bionic-py3.8-gcc9-coverage",
+      "linux-xenial-py3.6-gcc5.4",
+      "linux-xenial-py3.6-gcc7-bazel-test",
+      "parallelnative-linux-xenial-py3.6-gcc5.4",
+      "puretorch-linux-xenial-py3.6-gcc5.4",
      "win-vs2019-cpu-py3"
    ],
    "ciflow/cuda": [
-      "libtorch-linux-xenial-cuda10.2-py3.7-gcc7",
-      "libtorch-linux-xenial-cuda11.3-py3.7-gcc7",
+      "libtorch-linux-xenial-cuda10.2-py3.6-gcc7",
+      "libtorch-linux-xenial-cuda11.3-py3.6-gcc7",
      "linux-bionic-cuda10.2-py3.9-gcc7",
-      "linux-xenial-cuda11.3-py3.7-gcc7",
-      "linux-xenial-cuda11.3-py3.7-gcc7-no-ops",
-      "periodic-libtorch-linux-bionic-cuda11.5-py3.7-gcc7",
-      "periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7",
-      "periodic-linux-bionic-cuda11.5-py3.7-gcc7",
-      "periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck",
-      "periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug",
+      "linux-xenial-cuda10.2-py3.6-gcc7",
+      "linux-xenial-cuda11.3-py3.6-gcc7",
+      "periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7",
+      "periodic-linux-xenial-cuda11.1-py3.6-gcc7",
      "periodic-win-vs2019-cuda11.1-py3",
-      "periodic-win-vs2019-cuda11.5-py3",
+      "win-vs2019-cuda10.2-py3",
      "win-vs2019-cuda11.3-py3"
    ],
    "ciflow/default": [
-      "linux-bionic-py3.7-clang9",
-      "linux-docs",
-      "linux-vulkan-bionic-py3.7-clang9",
-      "linux-xenial-cuda11.3-py3.7-gcc7",
-      "linux-xenial-cuda11.3-py3.7-gcc7-bazel-test",
-      "linux-xenial-py3-clang5-mobile-build",
-      "linux-xenial-py3-clang5-mobile-custom-build-static",
-      "linux-xenial-py3.7-clang7-asan",
-      "linux-xenial-py3.7-clang7-onnx",
-      "linux-xenial-py3.7-gcc5.4",
-      "linux-xenial-py3.7-gcc7",
-      "linux-xenial-py3.7-gcc7-no-ops",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit",
+      "linux-bionic-py3.6-clang9",
+      "linux-bionic-py3.8-gcc9-coverage",
+      "linux-xenial-cuda11.3-py3.6-gcc7",
+      "linux-xenial-py3.6-gcc5.4",
+      "linux-xenial-py3.6-gcc7-bazel-test",
      "win-vs2019-cpu-py3",
      "win-vs2019-cuda11.3-py3"
    ],
-    "ciflow/docs": [
-      "linux-docs"
-    ],
-    "ciflow/ios": [
-      "ios-12-5-1-arm64",
-      "ios-12-5-1-arm64-coreml",
-      "ios-12-5-1-arm64-custom-ops",
-      "ios-12-5-1-arm64-full-jit",
-      "ios-12-5-1-arm64-metal",
-      "ios-12-5-1-x86-64",
-      "ios-12-5-1-x86-64-coreml",
-      "ios-12-5-1-x86-64-full-jit"
-    ],
    "ciflow/libtorch": [
-      "libtorch-linux-xenial-cuda10.2-py3.7-gcc7",
-      "libtorch-linux-xenial-cuda11.3-py3.7-gcc7",
-      "periodic-libtorch-linux-bionic-cuda11.5-py3.7-gcc7",
-      "periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7"
+      "libtorch-linux-xenial-cuda10.2-py3.6-gcc7",
+      "libtorch-linux-xenial-cuda11.3-py3.6-gcc7",
+      "periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7"
    ],
    "ciflow/linux": [
-      "caffe2-linux-xenial-py3.7-gcc5.4",
-      "libtorch-linux-xenial-cuda10.2-py3.7-gcc7",
-      "libtorch-linux-xenial-cuda11.3-py3.7-gcc7",
+      "libtorch-linux-xenial-cuda10.2-py3.6-gcc7",
+      "libtorch-linux-xenial-cuda11.3-py3.6-gcc7",
      "linux-bionic-cuda10.2-py3.9-gcc7",
-      "linux-bionic-py3.7-clang9",
-      "linux-docs",
-      "linux-docs-push",
-      "linux-vulkan-bionic-py3.7-clang9",
-      "linux-xenial-cuda11.3-py3.7-gcc7",
-      "linux-xenial-cuda11.3-py3.7-gcc7-bazel-test",
-      "linux-xenial-cuda11.3-py3.7-gcc7-no-ops",
-      "linux-xenial-py3-clang5-mobile-build",
-      "linux-xenial-py3-clang5-mobile-custom-build-static",
-      "linux-xenial-py3.7-clang7-asan",
-      "linux-xenial-py3.7-clang7-onnx",
-      "linux-xenial-py3.7-gcc5.4",
-      "linux-xenial-py3.7-gcc7",
-      "linux-xenial-py3.7-gcc7-no-ops",
-      "parallelnative-linux-xenial-py3.7-gcc5.4",
-      "periodic-libtorch-linux-bionic-cuda11.5-py3.7-gcc7",
-      "periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7",
-      "periodic-linux-bionic-cuda11.5-py3.7-gcc7",
-      "periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck",
-      "periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-build",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit"
-    ],
-    "ciflow/macos": [
-      "ios-12-5-1-arm64",
-      "ios-12-5-1-arm64-coreml",
-      "ios-12-5-1-arm64-custom-ops",
-      "ios-12-5-1-arm64-full-jit",
-      "ios-12-5-1-arm64-metal",
-      "ios-12-5-1-x86-64",
-      "ios-12-5-1-x86-64-coreml",
-      "ios-12-5-1-x86-64-full-jit",
-      "macos-10-15-py3-arm64",
-      "macos-10-15-py3-lite-interpreter-x86-64",
-      "macos-11-py3-x86-64"
-    ],
-    "ciflow/mobile": [
-      "linux-xenial-py3-clang5-mobile-build",
-      "linux-xenial-py3-clang5-mobile-custom-build-static"
+      "linux-bionic-py3.6-clang9",
+      "linux-bionic-py3.8-gcc9-coverage",
+      "linux-xenial-cuda10.2-py3.6-gcc7",
+      "linux-xenial-cuda11.3-py3.6-gcc7",
+      "linux-xenial-py3.6-gcc5.4",
+      "linux-xenial-py3.6-gcc7-bazel-test",
+      "parallelnative-linux-xenial-py3.6-gcc5.4",
+      "periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7",
+      "periodic-linux-xenial-cuda11.1-py3.6-gcc7",
+      "puretorch-linux-xenial-py3.6-gcc5.4"
    ],
    "ciflow/noarch": [
-      "linux-bionic-py3.7-clang9"
-    ],
-    "ciflow/onnx": [
-      "linux-xenial-py3.7-clang7-onnx"
-    ],
-    "ciflow/sanitizers": [
-      "linux-xenial-py3.7-clang7-asan"
+      "linux-bionic-py3.6-clang9"
    ],
    "ciflow/scheduled": [
-      "linux-docs-push",
-      "periodic-libtorch-linux-bionic-cuda11.5-py3.7-gcc7",
-      "periodic-libtorch-linux-xenial-cuda11.1-py3.7-gcc7",
-      "periodic-linux-bionic-cuda11.5-py3.7-gcc7",
-      "periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck",
-      "periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug",
-      "periodic-win-vs2019-cuda11.1-py3",
-      "periodic-win-vs2019-cuda11.5-py3"
+      "periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7",
+      "periodic-linux-xenial-cuda11.1-py3.6-gcc7",
+      "periodic-win-vs2019-cuda11.1-py3"
    ],
    "ciflow/slow": [
      "linux-bionic-cuda10.2-py3.9-gcc7",
-      "periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck"
-    ],
-    "ciflow/slow-gradcheck": [
-      "periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck"
-    ],
-    "ciflow/trunk": [
-      "caffe2-linux-xenial-py3.7-gcc5.4",
-      "docker-builds",
-      "ios-12-5-1-arm64",
-      "ios-12-5-1-arm64-coreml",
-      "ios-12-5-1-arm64-custom-ops",
-      "ios-12-5-1-arm64-full-jit",
-      "ios-12-5-1-arm64-metal",
-      "ios-12-5-1-x86-64",
-      "ios-12-5-1-x86-64-coreml",
-      "ios-12-5-1-x86-64-full-jit",
-      "libtorch-linux-xenial-cuda10.2-py3.7-gcc7",
-      "libtorch-linux-xenial-cuda11.3-py3.7-gcc7",
-      "linux-bionic-cuda10.2-py3.9-gcc7",
-      "linux-bionic-py3.7-clang9",
-      "linux-docs",
-      "linux-vulkan-bionic-py3.7-clang9",
-      "linux-xenial-cuda11.3-py3.7-gcc7",
-      "linux-xenial-cuda11.3-py3.7-gcc7-bazel-test",
-      "linux-xenial-cuda11.3-py3.7-gcc7-no-ops",
-      "linux-xenial-py3-clang5-mobile-build",
-      "linux-xenial-py3-clang5-mobile-custom-build-static",
-      "linux-xenial-py3.7-clang7-asan",
-      "linux-xenial-py3.7-clang7-onnx",
-      "linux-xenial-py3.7-gcc5.4",
-      "linux-xenial-py3.7-gcc7",
-      "linux-xenial-py3.7-gcc7-no-ops",
-      "macos-10-15-py3-arm64",
-      "macos-10-15-py3-lite-interpreter-x86-64",
-      "macos-11-py3-x86-64",
-      "parallelnative-linux-xenial-py3.7-gcc5.4",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-build",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single",
-      "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit",
-      "win-vs2019-cpu-py3",
-      "win-vs2019-cuda11.3-py3"
-    ],
-    "ciflow/vulkan": [
-      "linux-vulkan-bionic-py3.7-clang9"
+      "linux-xenial-cuda10.2-py3.6-gcc7"
    ],
    "ciflow/win": [
      "periodic-win-vs2019-cuda11.1-py3",
-      "periodic-win-vs2019-cuda11.5-py3",
      "win-vs2019-cpu-py3",
+      "win-vs2019-cuda10.2-py3",
      "win-vs2019-cuda11.3-py3"
    ],
    "ciflow/xla": [
-      "linux-bionic-py3.7-clang9"
+      "linux-bionic-py3.6-clang9"
    ]
  },
  "version": "v1"
--- a/.github/scale-config.yml
+++ b/.github/scale-config.yml
@ -5,9 +5,6 @@
 #
 # NOTE (Apr, 5, 2021): Linux runners are currently all an amazonlinux2
 #
-# NOTE (Jan 5, 2021): Linux runners are all non-ephemeral to reduce the amount of CreateInstaces calls
-#                     to avoid RequestLimitExceeded issues
-#
 # TODO: Add some documentation on how the auto-scaling works
 #
 # NOTE: Default values,
@ -18,45 +15,23 @@
 #     os: linux
 #     max_available: 20
 #     disk_size: 50
-#     is_ephemeral: true

 runner_types:
-  # mainly used for ciflow-should-run, not made to run any serious tests
-  linux.large:
-    instance_type: c5.large
-    os: linux
-    disk_size: 10
-    is_ephemeral: false
  linux.2xlarge:
    instance_type: c5.2xlarge
    os: linux
    max_available: 500
    disk_size: 150
-    is_ephemeral: false
-  linux.4xlarge: # for binary-builds
-    instance_type: c5.4xlarge
-    os: linux
-    max_available: 250
-    disk_size: 150
-    is_ephemeral: false
  linux.8xlarge.nvidia.gpu:
    instance_type: g3.8xlarge
    os: linux
-    max_available: 125
+    max_available: 50
    disk_size: 150
-    is_ephemeral: false
-  linux.4xlarge.nvidia.gpu:
-    instance_type: g3.4xlarge
-    os: linux
-    max_available: 125
-    disk_size: 150
-    is_ephemeral: false
  linux.16xlarge.nvidia.gpu:
    instance_type: g3.16xlarge
    os: linux
    max_available: 10
    disk_size: 150
-    is_ephemeral: false
  windows.4xlarge:
    instance_type: c5d.4xlarge
    os: windows
@ -65,5 +40,5 @@ runner_types:
  windows.8xlarge.nvidia.gpu:
    instance_type: p3.2xlarge
    os: windows
-    max_available: 50
+    max_available: 25
    disk_size: 256
--- a/.github/scripts/ensure_actions_will_cancel.py
+++ b/.github/scripts/ensure_actions_will_cancel.py
@ -46,20 +46,11 @@ if __name__ == "__main__":
            "group": concurrency_key(filename),
            "cancel-in-progress": True,
        }
-        actual = data.get("concurrency", None)
-        if actual != expected:
+        if data.get("concurrency", None) != expected:
            print(
                f"'concurrency' incorrect or not found in '{filename.relative_to(REPO_ROOT)}'",
                file=sys.stderr,
            )
-            print(
-                f"expected: {expected}",
-                file=sys.stderr,
-            )
-            print(
-                f"actual:   {actual}",
-                file=sys.stderr,
-            )
            errors_found = True

    if errors_found:
--- a/.github/scripts/export_pytorch_labels.py
+++ b/.github/scripts/export_pytorch_labels.py
@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-'''
-Test ownership was introduced in https://github.com/pytorch/pytorch/issues/66232.
-
-As a part of enforcing test ownership, we want to maintain a list of existing PyTorch labels
-to verify the owners' existence. This script outputs a file containing a list of existing
-pytorch/pytorch labels so that the file could be uploaded to S3.
-
-This script assumes the correct env vars are set for AWS permissions.
-
-'''
-
-import boto3  # type: ignore[import]
-import json
-from functools import lru_cache
-from typing import List, Any
-from urllib.request import urlopen, Request
-
-# Modified from https://github.com/pytorch/pytorch/blob/b00206d4737d1f1e7a442c9f8a1cadccd272a386/torch/hub.py#L129
-def _read_url(url: Any) -> Any:
-    with urlopen(url) as r:
-        return r.headers, r.read().decode(r.headers.get_content_charset('utf-8'))
-
-
-def request_for_labels(url: str) -> Any:
-    headers = {'Accept': 'application/vnd.github.v3+json'}
-    return _read_url(Request(url, headers=headers))
-
-
-def get_last_page(header: Any) -> int:
-    # Link info looks like: <https://api.github.com/repositories/65600975/labels?per_page=100&page=2>;
-    # rel="next", <https://api.github.com/repositories/65600975/labels?per_page=100&page=3>; rel="last"
-    link_info = header['link']
-    prefix = "&page="
-    suffix = ">;"
-    return int(link_info[link_info.rindex(prefix) + len(prefix):link_info.rindex(suffix)])
-
-
-def update_labels(labels: List[str], info: str) -> None:
-    labels_json = json.loads(info)
-    labels.extend([x["name"] for x in labels_json])
-
-
-@lru_cache()
-def get_pytorch_labels() -> List[str]:
-    prefix = "https://api.github.com/repos/pytorch/pytorch/labels?per_page=100"
-    header, info = request_for_labels(prefix + "&page=1")
-    labels: List[str] = []
-    update_labels(labels, info)
-
-    last_page = get_last_page(header)
-    assert last_page > 0, "Error reading header info to determine total number of pages of labels"
-    for page_number in range(2, last_page + 1):  # skip page 1
-        _, info = request_for_labels(prefix + f"&page={page_number}")
-        update_labels(labels, info)
-
-    return labels
-
-
-def send_labels_to_S3(labels: List[str]) -> None:
-    labels_file_name = "pytorch_labels.json"
-    obj = boto3.resource('s3').Object('ossci-metrics', labels_file_name)
-    obj.put(Body=json.dumps(labels).encode())
-
-
-def main() -> None:
-    send_labels_to_S3(get_pytorch_labels())
-
-
-if __name__ == '__main__':
-    main()
--- a/.github/scripts/generate_binary_build_matrix.py
+++ b/.github/scripts/generate_binary_build_matrix.py
@ -10,13 +10,19 @@ architectures:
    * Latest ROCM
 """

-from typing import Dict, List, Tuple
+import argparse
+import json
+from typing import Dict, List

+CUDA_ARCHES = [
+    "10.2",
+    "11.1"
+]

-CUDA_ARCHES = ["10.2", "11.1", "11.3", "11.5"]
-
-
-ROCM_ARCHES = ["4.2", "4.3.1"]
+ROCM_ARCHES = [
+    "3.10",
+    "4.0"
+]


 def arch_type(arch_version: str) -> str:
@ -30,145 +36,132 @@ def arch_type(arch_version: str) -> str:

 WHEEL_CONTAINER_IMAGES = {
    **{
-        gpu_arch: f"pytorch/manylinux-builder:cuda{gpu_arch}"
+        # TODO: Re-do manylinux CUDA image tagging scheme to be similar to
+        #       ROCM so we don't have to do this replacement
+        gpu_arch: f"pytorch/manylinux-cuda{gpu_arch.replace('.', '')}"
        for gpu_arch in CUDA_ARCHES
    },
    **{
-        gpu_arch: f"pytorch/manylinux-builder:rocm{gpu_arch}"
+        gpu_arch: f"pytorch/manylinux-rocm:{gpu_arch}"
        for gpu_arch in ROCM_ARCHES
    },
-    "cpu": "pytorch/manylinux-builder:cpu",
+    "cpu": "pytorch/manylinux-cpu"
 }

 CONDA_CONTAINER_IMAGES = {
-    **{gpu_arch: f"pytorch/conda-builder:cuda{gpu_arch}" for gpu_arch in CUDA_ARCHES},
-    "cpu": "pytorch/conda-builder:cpu",
+    **{
+        gpu_arch: f"pytorch/conda-builder:cuda{gpu_arch}"
+        for gpu_arch in CUDA_ARCHES
+    },
+    "cpu": "pytorch/conda-builder:cpu"
 }

-PRE_CXX11_ABI = "pre-cxx11"
-CXX11_ABI = "cxx11-abi"
-
-LIBTORCH_CONTAINER_IMAGES: Dict[Tuple[str, str], str] = {
+LIBTORCH_CONTAINER_IMAGES = {
    **{
-        (gpu_arch, PRE_CXX11_ABI): f"pytorch/manylinux-builder:cuda{gpu_arch}"
+        # TODO: Re-do manylinux CUDA image tagging scheme to be similar to
+        #       ROCM so we don't have to do this replacement
+        (gpu_arch, "pre-cxx11"): f"pytorch/manylinux-cuda{gpu_arch.replace('.', '')}"
        for gpu_arch in CUDA_ARCHES
    },
    **{
-        (gpu_arch, CXX11_ABI): f"pytorch/libtorch-cxx11-builder:cuda{gpu_arch}"
+        (gpu_arch, "cxx11-abi"): f"pytorch/libtorch-cxx11-builder:cuda{gpu_arch}"
        for gpu_arch in CUDA_ARCHES
    },
-    ("cpu", PRE_CXX11_ABI): "pytorch/manylinux-builder:cpu",
-    ("cpu", CXX11_ABI): "pytorch/libtorch-cxx11-builder:cpu",
+    ("cpu", "pre-cxx11"): "pytorch/manylinux-cpu",
+    ("cpu", "cxx11-abi"): "pytorch/libtorch-cxx11-builder:cpu",
 }

 FULL_PYTHON_VERSIONS = [
+    "3.6",
    "3.7",
    "3.8",
    "3.9",
 ]


-def translate_desired_cuda(gpu_arch_type: str, gpu_arch_version: str) -> str:
-    return {
-        "cpu": "cpu",
-        "cuda": f"cu{gpu_arch_version.replace('.', '')}",
-        "rocm": f"rocm{gpu_arch_version}",
-    }.get(gpu_arch_type, gpu_arch_version)
+def is_pull_request() -> bool:
+    return False
+    # return os.environ.get("GITHUB_HEAD_REF")


-def generate_conda_matrix() -> List[Dict[str, str]]:
-    ret: List[Dict[str, str]] = []
-    for python_version in FULL_PYTHON_VERSIONS:
+def snip_if(is_pr: bool, versions: List[str]) -> List[str]:
+    """
+    Return the full list of versions, or just the latest if on a PR.
+    """
+    return [versions[-1]] if is_pr else versions
+
+
+def generate_conda_matrix(is_pr: bool) -> List[Dict[str, str]]:
+    return [
+        {
+            "python_version": python_version,
+            "gpu_arch_type": arch_type(arch_version),
+            "gpu_arch_version": arch_version,
+            "container_image": CONDA_CONTAINER_IMAGES[arch_version],
+        }
+        for python_version in snip_if(is_pr, FULL_PYTHON_VERSIONS)
        # We don't currently build conda packages for rocm
-        for arch_version in ["cpu"] + CUDA_ARCHES:
-            gpu_arch_type = arch_type(arch_version)
-            gpu_arch_version = "" if arch_version == "cpu" else arch_version
-            ret.append(
-                {
-                    "python_version": python_version,
-                    "gpu_arch_type": gpu_arch_type,
-                    "gpu_arch_version": gpu_arch_version,
-                    "desired_cuda": translate_desired_cuda(
-                        gpu_arch_type, gpu_arch_version
-                    ),
-                    "container_image": CONDA_CONTAINER_IMAGES[arch_version],
-                    "package_type": "conda",
-                    "build_name": f"conda-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace(
-                        ".", "_"
-                    ),
-                }
-            )
-    return ret
+        for arch_version in ["cpu"] + snip_if(is_pr, CUDA_ARCHES)
+    ]


-def generate_libtorch_matrix(abi_version: str) -> List[Dict[str, str]]:
+def generate_libtorch_matrix(is_pr: bool) -> List[Dict[str, str]]:
    libtorch_variants = [
        "shared-with-deps",
        "shared-without-deps",
        "static-with-deps",
        "static-without-deps",
    ]
-    ret: List[Dict[str, str]] = []
-    for arch_version in ["cpu"] + CUDA_ARCHES:
-        for libtorch_variant in libtorch_variants:
-            # We don't currently build libtorch for rocm
-            # one of the values in the following list must be exactly
-            # CXX11_ABI, but the precise value of the other one doesn't
-            # matter
-            gpu_arch_type = arch_type(arch_version)
-            gpu_arch_version = "" if arch_version == "cpu" else arch_version
-            ret.append(
-                {
-                    "gpu_arch_type": gpu_arch_type,
-                    "gpu_arch_version": gpu_arch_version,
-                    "desired_cuda": translate_desired_cuda(
-                        gpu_arch_type, gpu_arch_version
-                    ),
-                    "libtorch_variant": libtorch_variant,
-                    "devtoolset": abi_version,
-                    "container_image": LIBTORCH_CONTAINER_IMAGES[
-                        (arch_version, abi_version)
-                    ],
-                    "package_type": "libtorch",
-                    "build_name": f"libtorch-{gpu_arch_type}{gpu_arch_version}-{libtorch_variant}-{abi_version}".replace(
-                        ".", "_"
-                    ),
-                }
-            )
-    return ret
+    return [
+        {
+            "gpu_arch_type": arch_type(arch_version),
+            "gpu_arch_version": arch_version,
+            "libtorch_variant": libtorch_variant,
+            "devtoolset": abi_version,
+            "container_image": LIBTORCH_CONTAINER_IMAGES[(arch_version, abi_version)],
+        }
+        # We don't currently build libtorch for rocm
+        for arch_version in ["cpu"] + snip_if(is_pr, CUDA_ARCHES)
+        for libtorch_variant in libtorch_variants
+        # one of the values in the following list must be exactly
+        # "cxx11-abi", but the precise value of the other one doesn't
+        # matter
+        for abi_version in ["cxx11-abi", "pre-cxx11"]
+    ]


-def generate_wheels_matrix() -> List[Dict[str, str]]:
-    arches = ["cpu"] + CUDA_ARCHES + ROCM_ARCHES
-    ret: List[Dict[str, str]] = []
-    for python_version in FULL_PYTHON_VERSIONS:
-        for arch_version in arches:
-            gpu_arch_type = arch_type(arch_version)
-            gpu_arch_version = "" if arch_version == "cpu" else arch_version
-            ret.append(
-                {
-                    "python_version": python_version,
-                    "gpu_arch_type": gpu_arch_type,
-                    "gpu_arch_version": gpu_arch_version,
-                    "desired_cuda": translate_desired_cuda(
-                        gpu_arch_type, gpu_arch_version
-                    ),
-                    "container_image": WHEEL_CONTAINER_IMAGES[arch_version],
-                    "package_type": "manywheel",
-                    "build_name": f"manywheel-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace(
-                        ".", "_"
-                    ),
-                }
-            )
-    return ret
+def generate_wheels_matrix(is_pr: bool) -> List[Dict[str, str]]:
+    arches = ["cpu"]
+    arches += snip_if(is_pr, CUDA_ARCHES)
+    arches += snip_if(is_pr, ROCM_ARCHES)
+    return [
+        {
+            "python_version": python_version,
+            "gpu_arch_type": arch_type(arch_version),
+            "gpu_arch_version": arch_version,
+            "container_image": WHEEL_CONTAINER_IMAGES[arch_version],
+        }
+        for python_version in snip_if(is_pr, FULL_PYTHON_VERSIONS)
+        for arch_version in arches
+    ]


-def generate_binary_build_matrix(os: str) -> List[Dict[str, str]]:
-    return {
-        "linux": [
-            *generate_conda_matrix(),
-            *generate_libtorch_matrix(abi_version=PRE_CXX11_ABI),
-            *generate_libtorch_matrix(abi_version=CXX11_ABI),
-            *generate_wheels_matrix(),
-        ]
-    }[os]
+def from_includes(includes: List[Dict[str, str]]) -> str:
+    return json.dumps({"include": includes})
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('mode', choices=['conda', 'libtorch', 'wheels'])
+    args = parser.parse_args()
+
+    is_pr = is_pull_request()
+    print(from_includes({
+        'conda': generate_conda_matrix,
+        'libtorch': generate_libtorch_matrix,
+        'wheels': generate_wheels_matrix,
+    }[args.mode](is_pr)))
+
+
+if __name__ == "__main__":
+    main()
--- a/.github/scripts/generate_ci_workflows.py
+++ b/.github/scripts/generate_ci_workflows.py
--- a/.github/scripts/generate_pytorch_test_matrix.py
+++ b/.github/scripts/generate_pytorch_test_matrix.py
@ -15,9 +15,6 @@ from typing import Dict
 from typing_extensions import TypedDict


-BUILD_ENVIRONMENT = os.getenv('BUILD_ENVIRONMENT')
-assert BUILD_ENVIRONMENT is not None
-
 class Config(TypedDict):
    num_shards: int
    runner: str
@ -34,63 +31,28 @@ def get_disabled_issues() -> str:
    issue_numbers = [x[4] for x in re.findall(regex, pr_body)]
    return ','.join(issue_numbers)

-# When the user specifies labels that are NOT ciflow/default, the expectation is
-# that the workflows should be triggered as if they are on trunk. For example, when
-# ciflow/all is specified, we should run the full test suite for Windows CUDA
-# and NOT only the smoke tests.
-def run_as_if_on_trunk() -> bool:
-    ON_PULL_REQUEST = os.getenv('GITHUB_HEAD_REF')
-    if not ON_PULL_REQUEST:
-        return True
-
-    from pathlib import Path
-    GITHUB_DIR = Path(__file__).resolve().parent.parent
-
-    with open(f'{GITHUB_DIR}/generated-ciflow-ruleset.json') as f:
-        labels_to_workflows = json.load(f)['label_rules']
-
-    pr_labels = json.loads(os.getenv('PR_LABELS', '[]'))
-    current_workflow_triggered_by_label = False
-    for label in pr_labels:
-        if label != 'ciflow/default' and label in labels_to_workflows:
-            workflows_triggered_by_label = labels_to_workflows[label]
-            if any([BUILD_ENVIRONMENT in workflow for workflow in workflows_triggered_by_label]):
-                current_workflow_triggered_by_label = True
-                break
-
-    return current_workflow_triggered_by_label

 def main() -> None:
    TEST_RUNNER_TYPE = os.getenv('TEST_RUNNER_TYPE')
    assert TEST_RUNNER_TYPE is not None
-    RUN_SMOKE_TESTS_ONLY_ON_PR = os.getenv('RUN_SMOKE_TESTS_ONLY_ON_PR')
-    RUN_SMOKE_TESTS = RUN_SMOKE_TESTS_ONLY_ON_PR == "true" and not run_as_if_on_trunk()
+    ON_PULL_REQUEST = os.getenv('GITHUB_HEAD_REF')
    NUM_TEST_SHARDS_ON_PULL_REQUEST = os.getenv('NUM_TEST_SHARDS_ON_PULL_REQUEST')
-    NUM_TEST_SHARDS = int(os.getenv('NUM_TEST_SHARDS', '0'))
-    if not run_as_if_on_trunk() and NUM_TEST_SHARDS_ON_PULL_REQUEST:
+    NUM_TEST_SHARDS = int(os.getenv('NUM_TEST_SHARDS', '1'))
+    if ON_PULL_REQUEST and NUM_TEST_SHARDS_ON_PULL_REQUEST:
        NUM_TEST_SHARDS = int(NUM_TEST_SHARDS_ON_PULL_REQUEST)
    MULTIGPU_RUNNER_TYPE = os.getenv('MULTIGPU_RUNNER_TYPE')
-    DISTRIBUTED_GPU_RUNNER_TYPE = os.getenv('DISTRIBUTED_GPU_RUNNER_TYPE', TEST_RUNNER_TYPE)
    NOGPU_RUNNER_TYPE = os.getenv('NOGPU_RUNNER_TYPE')
    configs: Dict[str, Config] = {}
    if os.getenv('ENABLE_JIT_LEGACY_TEST'):
        configs['jit_legacy'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
    if MULTIGPU_RUNNER_TYPE is not None and os.getenv('ENABLE_MULTIGPU_TEST'):
        configs['multigpu'] = {'num_shards': 1, 'runner': MULTIGPU_RUNNER_TYPE}
-    if NOGPU_RUNNER_TYPE is not None:
-        if os.getenv('ENABLE_NOGPU_NO_AVX_TEST'):
-            configs['nogpu_NO_AVX'] = {'num_shards': 1, 'runner': NOGPU_RUNNER_TYPE}
-        if os.getenv('ENABLE_NOGPU_NO_AVX2_TEST'):
-            configs['nogpu_NO_AVX2'] = {'num_shards': 1, 'runner': NOGPU_RUNNER_TYPE}
-        if os.getenv('ENABLE_FORCE_ON_CPU_TEST'):
-            configs['force_on_cpu'] = {'num_shards': 1, 'runner': NOGPU_RUNNER_TYPE}
+    if NOGPU_RUNNER_TYPE is not None and os.getenv('ENABLE_NOGPU_NO_AVX_TEST'):
+        configs['nogpu_NO_AVX'] = {'num_shards': 1, 'runner': NOGPU_RUNNER_TYPE}
+    if NOGPU_RUNNER_TYPE is not None and os.getenv('ENABLE_NOGPU_NO_AVX2_TEST'):
+        configs['nogpu_NO_AVX2'] = {'num_shards': 1, 'runner': NOGPU_RUNNER_TYPE}
    if os.getenv('ENABLE_DISTRIBUTED_TEST'):
-        configs['distributed'] = {
-            'num_shards': 1,
-            'runner': DISTRIBUTED_GPU_RUNNER_TYPE if "cuda" in str(BUILD_ENVIRONMENT) else TEST_RUNNER_TYPE
-        }
-    if os.getenv('ENABLE_FX2TRT_TEST'):
-        configs['fx2trt'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
+        configs['distributed'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
    if os.getenv('ENABLE_SLOW_TEST'):
        configs['slow'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
    if os.getenv('ENABLE_DOCS_TEST'):
@ -101,8 +63,6 @@ def main() -> None:
        configs['xla'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
    if os.getenv('ENABLE_NOARCH_TEST'):
        configs['noarch'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
-    if RUN_SMOKE_TESTS:
-        configs['smoke_tests'] = {'num_shards': 1, 'runner': TEST_RUNNER_TYPE}
    matrix = {
        'include': [
            {
--- a/.github/scripts/install_nvidia_utils_linux.sh
+++ b/.github/scripts/install_nvidia_utils_linux.sh
@ -3,7 +3,7 @@
 set -eou pipefail

 DISTRIBUTION=$(. /etc/os-release;echo $ID$VERSION_ID) \
-DRIVER_FN="NVIDIA-Linux-x86_64-495.44.run"
+DRIVER_FN="NVIDIA-Linux-x86_64-460.39.run"
 YUM_REPO_URL="https://nvidia.github.io/nvidia-docker/${DISTRIBUTION}/nvidia-docker.repo"

 install_nvidia_docker2_amzn2() {
--- a/.github/scripts/lint_test_ownership.py
+++ b/.github/scripts/lint_test_ownership.py
@ -1,88 +0,0 @@
-#!/usr/bin/env python3
-'''
-Test ownership was introduced in https://github.com/pytorch/pytorch/issues/66232.
-
-This lint verifies that every Python test file (file that matches test_*.py or *_test.py in the test folder)
-has valid ownership information in a comment header. Valid means:
-  - The format of the header follows the pattern "# Owner(s): ["list", "of owner", "labels"]
-  - Each owner label actually exists in PyTorch
-  - Each owner label starts with "module: " or "oncall: " or is in ACCEPTABLE_OWNER_LABELS
-
-This file is expected to run in the root directory of pytorch/pytorch.
-'''
-import boto3  # type: ignore[import]
-import botocore  # type: ignore[import]
-import fnmatch
-import json
-import sys
-from pathlib import Path
-from typing import List, Any
-
-
-# Team/owner labels usually start with "module: " or "oncall: ", but the following are acceptable exceptions
-ACCEPTABLE_OWNER_LABELS = ["NNC", "high priority"]
-GLOB_EXCEPTIONS = [
-    "test/run_test.py"
-]
-
-PYTORCH_ROOT = Path(__file__).resolve().parent.parent.parent
-TEST_DIR = PYTORCH_ROOT / "test"
-CURRENT_FILE_NAME = Path(__file__).resolve().relative_to(PYTORCH_ROOT)
-
-S3_RESOURCE_READ_ONLY = boto3.resource("s3", config=botocore.config.Config(signature_version=botocore.UNSIGNED))
-
-
-def get_all_test_files() -> List[Path]:
-    test_files = list(TEST_DIR.glob("**/test_*.py"))
-    test_files.extend(list(TEST_DIR.glob("**/*_test.py")))
-    return [f for f in test_files if any([fnmatch.fnmatch(str(f), g) for g in GLOB_EXCEPTIONS])]
-
-
-def get_pytorch_labels() -> Any:
-    bucket = S3_RESOURCE_READ_ONLY.Bucket("ossci-metrics")
-    summaries = bucket.objects.filter(Prefix="pytorch_labels.json")
-    for summary in summaries:
-        labels = summary.get()["Body"].read()
-    return json.loads(labels)
-
-
-# Returns a string denoting the error invalidating the label OR an empty string if nothing is wrong
-def validate_label(label: str, pytorch_labels: List[str]) -> str:
-    if label not in pytorch_labels:
-        return f"{label} is not a PyTorch label (please choose from https://github.com/pytorch/pytorch/labels)"
-    if label.startswith("module:") or label.startswith("oncall:") or label in ACCEPTABLE_OWNER_LABELS:
-        return ""
-    return f"{label} is not an acceptable owner (please update to another label or edit ACCEPTABLE_OWNERS_LABELS " \
-        "in {CURRENT_FILE_NAME}"
-
-
-# Returns a string denoting the error invalidating the file OR an empty string if nothing is wrong
-def validate_file(filename: Path, pytorch_labels: List[str]) -> str:
-    prefix = "# Owner(s): "
-    relative_name = Path(filename).relative_to(PYTORCH_ROOT)
-    with open(filename) as f:
-        for line in f.readlines():
-            if line.startswith(prefix):
-                labels = json.loads(line[len(prefix):])
-                labels_msgs = [validate_label(label, pytorch_labels) for label in labels]
-                file_msg = ", ".join([x for x in labels_msgs if x != ""])
-                return f"{relative_name}: {file_msg}" if file_msg != "" else ""
-    return f"{relative_name}: missing a comment header with ownership information."
-
-
-def main() -> None:
-    test_file_paths = get_all_test_files()
-    pytorch_labels = get_pytorch_labels()
-
-    file_msgs = [validate_file(f, pytorch_labels) for f in test_file_paths]
-    err_msg = "\n".join([x for x in file_msgs if x != ""])
-    if err_msg != "":
-        err_msg = err_msg + "\n\nIf you see files with missing ownership information above, " \
-            "please add the following line\n\n# Owner(s): [\"<owner: label>\"]\n\nto the top of each test file. " \
-            "The owner should be an existing pytorch/pytorch label."
-        print(err_msg)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
--- a/.github/scripts/run_torchbench.py
+++ b/.github/scripts/run_torchbench.py
@ -20,6 +20,8 @@ import subprocess

 from typing import List

+CUDA_VERSION = "cu102"
+PYTHON_VERSION = "3.7"
 TORCHBENCH_CONFIG_NAME = "config.yaml"
 MAGIC_PREFIX = "RUN_TORCHBENCH:"
 MAGIC_TORCHBENCH_PREFIX = "TORCHBENCH_BRANCH:"
@ -43,17 +45,6 @@ def gen_abtest_config(control: str, treatment: str, models: List[str]) -> str:
    config = config + "\n"
    return config

-def setup_gha_env(name: str, val: str) -> None:
-    fname = os.environ["GITHUB_ENV"]
-    content = f"{name}={val}\n"
-    with open(fname, "a") as fo:
-        fo.write(content)
-
-def find_current_branch(repo_path: str) -> str:
-    repo = git.Repo(repo_path)
-    name: str = repo.active_branch.name
-    return name
-
 def deploy_torchbench_config(output_dir: str, config: str) -> None:
    # Create test dir if needed
    pathlib.Path(output_dir).mkdir(exist_ok=True)
@ -82,18 +73,25 @@ def extract_models_from_pr(torchbench_path: str, prbody_file: str) -> List[str]:
            return []
    return model_list

-def find_torchbench_branch(prbody_file: str) -> str:
-    branch_name: str = ""
+def identify_torchbench_branch(torchbench_path: str, prbody_file: str) -> None:
+    branch_name: str
    with open(prbody_file, "r") as pf:
        lines = map(lambda x: x.strip(), pf.read().splitlines())
        magic_lines = list(filter(lambda x: x.startswith(MAGIC_TORCHBENCH_PREFIX), lines))
        if magic_lines:
            # Only the first magic line will be recognized.
            branch_name = magic_lines[0][len(MAGIC_TORCHBENCH_PREFIX):].strip()
-    # If not specified, use main as the default branch
+    # If not specified, directly return without the branch checkout
    if not branch_name:
-        branch_name = "main"
-    return branch_name
+        return
+    try:
+        print(f"Checking out the TorchBench branch: {branch_name} ...")
+        repo = git.Repo(torchbench_path)
+        origin = repo.remotes.origin
+        origin.fetch(branch_name)
+        repo.create_head(branch_name, origin.refs[branch_name]).checkout()
+    except git.exc.GitCommandError:
+        raise RuntimeError(f'{branch_name} doesn\'t exist in the pytorch/benchmark repository. Please double check.')

 def run_torchbench(pytorch_path: str, torchbench_path: str, output_dir: str) -> None:
    # Copy system environment so that we will not override
@ -106,41 +104,28 @@ def run_torchbench(pytorch_path: str, torchbench_path: str, output_dir: str) ->

 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Run TorchBench tests based on PR')
+    parser.add_argument('--pr-num', required=True, type=str, help="The Pull Request number")
+    parser.add_argument('--pr-base-sha', required=True, type=str, help="The Pull Request base hash")
+    parser.add_argument('--pr-head-sha', required=True, type=str, help="The Pull Request head hash")
    parser.add_argument('--pr-body', required=True, help="The file that contains body of a Pull Request")
-
-    subparsers = parser.add_subparsers(dest='command')
-    # parser for setup the torchbench branch name env
-    branch_parser = subparsers.add_parser("set-torchbench-branch")
-    # parser to run the torchbench branch
-    run_parser = subparsers.add_parser("run")
-    run_parser.add_argument('--pr-num', required=True, type=str, help="The Pull Request number")
-    run_parser.add_argument('--pr-base-sha', required=True, type=str, help="The Pull Request base hash")
-    run_parser.add_argument('--pr-head-sha', required=True, type=str, help="The Pull Request head hash")
-    run_parser.add_argument('--pytorch-path', required=True, type=str, help="Path to pytorch repository")
-    run_parser.add_argument('--torchbench-path', required=True, type=str, help="Path to TorchBench repository")
+    parser.add_argument('--pytorch-path', required=True, type=str, help="Path to pytorch repository")
+    parser.add_argument('--torchbench-path', required=True, type=str, help="Path to TorchBench repository")
    args = parser.parse_args()

-    if args.command == 'set-torchbench-branch':
-        branch_name = find_torchbench_branch(args.pr_body)
-        # env name: "TORCHBENCH_BRANCH"
-        setup_gha_env(MAGIC_TORCHBENCH_PREFIX[:-1], branch_name)
-    elif args.command == 'run':
-        output_dir: str = os.path.join(os.environ["HOME"], ".torchbench", "bisection", f"pr{args.pr_num}")
-        # Identify the specified models and verify the input
-        models = extract_models_from_pr(args.torchbench_path, args.pr_body)
-        if not models:
-            print("Can't parse the model filter from the pr body. Currently we only support allow-list.")
-            exit(-1)
-        # Assert the current branch in args.torchbench_path is the same as the one specified in pr body
-        branch_name = find_torchbench_branch(args.pr_body)
-        current_branch = find_current_branch(args.torchbench_path)
-        assert branch_name == current_branch, f"Torchbench repo {args.torchbench_path} is on branch {current_branch}, \
-                                                but user specified to run on branch {branch_name}."
-        print(f"Ready to run TorchBench with benchmark. Result will be saved in the directory: {output_dir}.")
-        # Run TorchBench with the generated config
-        torchbench_config = gen_abtest_config(args.pr_base_sha, args.pr_head_sha, models)
-        deploy_torchbench_config(output_dir, torchbench_config)
-        run_torchbench(pytorch_path=args.pytorch_path, torchbench_path=args.torchbench_path, output_dir=output_dir)
-    else:
-        print(f"The command {args.command} is not supported.")
-        exit(-1)
+    output_dir: str = os.path.join(os.environ["HOME"], ".torchbench", "bisection", f"pr{args.pr_num}")
+    # Identify the specified models and verify the input
+    models = extract_models_from_pr(args.torchbench_path, args.pr_body)
+    if not models:
+        print("Can't parse the model filter from the pr body. Currently we only support allow-list.")
+        exit(1)
+    # Identify the specified TorchBench branch, verify the branch exists, and checkout the branch
+    try:
+        identify_torchbench_branch(args.torchbench_path, args.pr_body)
+    except RuntimeError as e:
+        print(f"Identify TorchBench branch failed: {str(e)}")
+        exit(1)
+    print(f"Ready to run TorchBench with benchmark. Result will be saved in the directory: {output_dir}.")
+    # Run TorchBench with the generated config
+    torchbench_config = gen_abtest_config(args.pr_base_sha, args.pr_head_sha, models)
+    deploy_torchbench_config(output_dir, torchbench_config)
+    run_torchbench(pytorch_path=args.pytorch_path, torchbench_path=args.torchbench_path, output_dir=output_dir)
--- a/.github/scripts/syncbranches.py
+++ b/.github/scripts/syncbranches.py
@ -1,186 +0,0 @@
-#!/usr/bin/env python3
-
-from collections import defaultdict
-from datetime import datetime
-from typing import cast, Any, Dict, List, Optional, Tuple, Union
-import os
-
-
-def _check_output(items: List[str], encoding: str = "utf-8") -> str:
-    from subprocess import check_output
-    return check_output(items).decode(encoding)
-
-
-def fuzzy_list_to_dict(items: List[Tuple[str, str]]) -> Dict[str, List[str]]:
-    """
-    Converts list to dict preserving elements with duplicate keys
-    """
-    rc: Dict[str, List[str]] = defaultdict(lambda: [])
-    for (key, val) in items:
-        rc[key].append(val)
-    return dict(rc)
-
-
-class GitCommit:
-    commit_hash: str
-    title: str
-    body: str
-    author: str
-    author_date: datetime
-    commit_date: Optional[datetime]
-
-    def __init__(self,
-                 commit_hash: str,
-                 author: str,
-                 author_date: datetime,
-                 title: str,
-                 body: str,
-                 commit_date: Optional[datetime] = None) -> None:
-        self.commit_hash = commit_hash
-        self.author = author
-        self.author_date = author_date
-        self.commit_date = commit_date
-        self.title = title
-        self.body = body
-
-    def __repr__(self) -> str:
-        return f"{self.title} ({self.commit_hash})"
-
-    def __contains__(self, item: Any) -> bool:
-        return item in self.body or item in self.title
-
-
-def parse_fuller_format(lines: Union[str, List[str]]) -> GitCommit:
-    """
-    Expect commit message generated using `--format=fuller --date=unix` format, i.e.:
-        commit <sha1>
-        Author:     <author>
-        AuthorDate: <author date>
-        Commit:     <committer>
-        CommitDate: <committer date>
-
-        <title line>
-
-        <full commit message>
-
-    """
-    if isinstance(lines, str):
-        lines = lines.split("\n")
-    # TODO: Handle merge commits correctly
-    if len(lines) > 1 and lines[1].startswith("Merge:"):
-        del lines[1]
-    assert len(lines) > 7
-    assert lines[0].startswith("commit")
-    assert lines[1].startswith("Author: ")
-    assert lines[2].startswith("AuthorDate: ")
-    assert lines[3].startswith("Commit: ")
-    assert lines[4].startswith("CommitDate: ")
-    assert len(lines[5]) == 0
-    return GitCommit(commit_hash=lines[0].split()[1].strip(),
-                     author=lines[1].split(":", 1)[1].strip(),
-                     author_date=datetime.fromtimestamp(int(lines[2].split(":", 1)[1].strip())),
-                     commit_date=datetime.fromtimestamp(int(lines[4].split(":", 1)[1].strip())),
-                     title=lines[6].strip(),
-                     body="\n".join(lines[7:]),
-                     )
-
-
-class GitRepo:
-    def __init__(self, path: str, remote: str = "origin") -> None:
-        self.repo_dir = path
-        self.remote = remote
-
-    def _run_git(self, *args: Any) -> str:
-        return _check_output(["git", "-C", self.repo_dir] + list(args))
-
-    def revlist(self, revision_range: str) -> List[str]:
-        rc = self._run_git("rev-list", revision_range, "--", ".").strip()
-        return rc.split("\n") if len(rc) > 0 else []
-
-    def current_branch(self) -> str:
-        return self._run_git("symbolic-ref", "--short", "HEAD").strip()
-
-    def checkout(self, branch: str) -> None:
-        self._run_git('checkout', branch)
-
-    def show_ref(self, name: str) -> str:
-        refs = self._run_git('show-ref', '-s', name).strip().split('\n')
-        if not all(refs[i] == refs[0] for i in range(1, len(refs))):
-            raise RuntimeError(f"referce {name} is ambigous")
-        return refs[0]
-
-    def rev_parse(self, name: str) -> str:
-        return self._run_git('rev-parse', '--verify', name).strip()
-
-    def get_merge_base(self, from_ref: str, to_ref: str) -> str:
-        return self._run_git('merge-base', from_ref, to_ref).strip()
-
-    def patch_id(self, ref: Union[str, List[str]]) -> List[Tuple[str, str]]:
-        is_list = isinstance(ref, list)
-        if is_list:
-            if len(ref) == 0:
-                return []
-            ref = " ".join(ref)
-        rc = _check_output(['sh', '-c', f'git -C {self.repo_dir} show {ref}|git patch-id --stable']).strip()
-        return [cast(Tuple[str, str], x.split(" ", 1)) for x in rc.split("\n")]
-
-    def get_commit(self, ref: str) -> GitCommit:
-        return parse_fuller_format(self._run_git('show', '--format=fuller', '--date=unix', '--shortstat', ref))
-
-    def cherry_pick(self, ref: str) -> None:
-        self._run_git('cherry-pick', '-x', ref)
-
-    def compute_branch_diffs(self, from_branch: str, to_branch: str) -> Tuple[List[str], List[str]]:
-        """
-        Returns list of commmits that are missing in each other branch since their merge base
-        Might be slow if merge base is between two branches is pretty far off
-        """
-        from_ref = self.rev_parse(from_branch)
-        to_ref = self.rev_parse(to_branch)
-        merge_base = self.get_merge_base(from_ref, to_ref)
-        from_commits = self.revlist(f'{merge_base}..{from_ref}')
-        to_commits = self.revlist(f'{merge_base}..{to_ref}')
-        from_ids = fuzzy_list_to_dict(self.patch_id(from_commits))
-        to_ids = fuzzy_list_to_dict(self.patch_id(to_commits))
-        for patch_id in set(from_ids).intersection(set(to_ids)):
-            from_values = from_ids[patch_id]
-            to_values = to_ids[patch_id]
-            if len(from_values) != len(to_values):
-                # Eliminate duplicate commits+reverts from the list
-                while len(from_values) > 0 and len(to_values) > 0:
-                    frc = self.get_commit(from_values.pop())
-                    toc = self.get_commit(to_values.pop())
-                    if frc.title != toc.title or frc.author_date != toc.author_date:
-                        raise RuntimeError(f"Unexpected differences between {frc} and {toc}")
-                    from_commits.remove(frc.commit_hash)
-                    to_commits.remove(toc.commit_hash)
-                continue
-            for commit in from_values:
-                from_commits.remove(commit)
-            for commit in to_values:
-                to_commits.remove(commit)
-        return (from_commits, to_commits)
-
-    def cherry_pick_commits(self, from_branch: str, to_branch: str) -> None:
-        orig_branch = self.current_branch()
-        self.checkout(to_branch)
-        from_commits, to_commits = self.compute_branch_diffs(from_branch, to_branch)
-        if len(from_commits) == 0:
-            print("Nothing to do")
-            self.checkout(orig_branch)
-            return
-        for commit in reversed(from_commits):
-            self.cherry_pick(commit)
-        self.checkout(orig_branch)
-
-    def push(self, branch: str) -> None:
-        self._run_git("push", self.remote, branch)
-
-
-if __name__ == '__main__':
-    repo_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
-    default_branch = 'master'
-    sync_branch = 'fbsync'
-    repo = GitRepo(repo_dir)
-    repo.cherry_pick_commits(sync_branch, default_branch)
-    repo.push(default_branch)
--- a/.github/templates/android_ci_full_workflow.yml.j2
+++ b/.github/templates/android_ci_full_workflow.yml.j2
@ -1,158 +0,0 @@
-{%- extends "linux_ci_workflow.yml.j2" -%}
-{% import 'common_android.yml.j2' as common_android %}
-{%- set exclude_test = true -%}
-{% block name -%}
-# Template is at:    .github/templates/android_ci_full_workflow.yml.j2
-# Generation script: .github/scripts/generate_ci_workflows.py
-name: !{{ build_environment }}
-{%- endblock %}
-
-on:
-  pull_request:
-    types: [opened, synchronize, reopened, !{{ ciflow_config.trigger_action }}]
-
-{% block build +%}
-  # building and testing in a single job since bazel runs only small subset of tests
-  build-and-test:
-    runs-on: !{{ test_runner_type }}
-    env:
-      JOB_BASE_NAME: !{{ build_environment }}-build-and-test
-      NUM_TEST_SHARDS: !{{ num_test_shards }}
-      IS_PROBOT_TRIGGER_EVENT: ${{ (github.event.action == '!{{ ciflow_config.trigger_action }}') && (github.event.assigneed.login == '!{{ ciflow_config.trigger_actor }}') }}
-      LABEL_CONDITIONS: ${{ !{{ ciflow_config.label_conditions }} }}
-    if: !{{ ciflow_config.root_job_condition }}
-    steps:
-      - name: print labels
-        run: echo "${PR_LABELS}"
-      !{{ common.setup_ec2_linux() }}
-      !{{ common.checkout_pytorch("recursive") }}
-      !{{ common.calculate_docker_image(false) }}
-      - name: Pull Docker image
-        run: |
-          !{{ common.add_retry_to_env() }}
-          retry docker pull "${DOCKER_IMAGE}"
-      - name: Determine shm-size
-        run: |
-          shm_size="1g"
-          case "${BUILD_ENVIRONMENT}" in
-            *cuda*)
-              shm_size="2g"
-              ;;
-            *rocm*)
-              shm_size="8g"
-              ;;
-          esac
-          echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
-      - name: Output disk space left
-        run: |
-          sudo df -H
-      - name: Preserve github env variables for use in docker
-        run: |
-          env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
-      !{{ common.parse_ref() }}
-      !{{ common_android.build_android("pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v7a-build", "arm-v7a") }}
-      !{{ common_android.build_android("pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v8a-build", "arm-v8a") }}
-      !{{ common_android.build_android("pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_32-build", "x86_32") }}
-      !{{ common_android.build_android("pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_64-build", "x86_64") }}
-      - name: Build-Final-Artifcact
-        env:
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
-        run: |
-          set -eux
-
-          docker_image_libtorch_android_x86_32="${DOCKER_IMAGE}-x86_32"
-          docker_image_libtorch_android_x86_64="${DOCKER_IMAGE}-x86_64"
-          docker_image_libtorch_android_arm_v7a="${DOCKER_IMAGE}-arm-v7a"
-          docker_image_libtorch_android_arm_v8a="${DOCKER_IMAGE}-arm-v8a"
-
-          echo "docker_image_commit: ${DOCKER_IMAGE}"
-          echo "docker_image_libtorch_android_x86_32: ${docker_image_libtorch_android_x86_32}"
-          echo "docker_image_libtorch_android_x86_64: ${docker_image_libtorch_android_x86_64}"
-          echo "docker_image_libtorch_android_arm_v7a: ${docker_image_libtorch_android_arm_v7a}"
-          echo "docker_image_libtorch_android_arm_v8a: ${docker_image_libtorch_android_arm_v8a}"
-
-          # x86_32
-          time docker pull "${docker_image_libtorch_android_x86_32}" >/dev/null
-          export id_x86_32
-          id_x86_32=$(docker run -e GRADLE_OFFLINE=1 --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins "${docker_image_libtorch_android_x86_32}")
-
-          # shellcheck disable=SC1105
-          ((echo "sudo chown -R jenkins workspace") | docker exec -u jenkins -i "${id_x86_32}" bash) 2>&1
-
-          # arm-v7a
-          time docker pull "${docker_image_libtorch_android_arm_v7a}" >/dev/null
-          export id_arm_v7a
-          id_arm_v7a=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins "${docker_image_libtorch_android_arm_v7a}")
-
-          # shellcheck disable=SC1105
-          ((echo "sudo chown -R jenkins workspace") | docker exec -u jenkins -i "${id_arm_v7a}" bash) 2>&1
-
-          mkdir -p "${GITHUB_WORKSPACE}/build_android_install_arm_v7a"
-          docker cp "${id_arm_v7a}:/var/lib/jenkins/workspace/build_android/install" "${GITHUB_WORKSPACE}/build_android_install_arm_v7a"
-
-          # x86_64
-          time docker pull "${docker_image_libtorch_android_x86_64}" >/dev/null
-          export id_x86_64
-          id_x86_64=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins "${docker_image_libtorch_android_x86_64}")
-
-          # shellcheck disable=SC1105
-          ((echo "sudo chown -R jenkins workspace") | docker exec -u jenkins -i "${id_x86_64}" bash) 2>&1
-
-          mkdir -p "${GITHUB_WORKSPACE}/build_android_install_x86_64"
-          docker cp "${id_x86_64}:/var/lib/jenkins/workspace/build_android/install" "${GITHUB_WORKSPACE}/build_android_install_x86_64"
-
-          # arm-v8a
-          time docker pull "${docker_image_libtorch_android_arm_v8a}" >/dev/null
-          export id_arm_v8a
-          id_arm_v8a=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins "${docker_image_libtorch_android_arm_v8a}")
-
-          # shellcheck disable=SC1105
-          ((echo "sudo chown -R jenkins workspace") | docker exec -u jenkins -i "$id_arm_v8a" bash) 2>&1
-
-          mkdir -p "${GITHUB_WORKSPACE}/build_android_install_arm_v8a"
-          docker cp "${id_arm_v8a}:/var/lib/jenkins/workspace/build_android/install" "${GITHUB_WORKSPACE}/build_android_install_arm_v8a"
-
-          # Putting everything together
-          docker cp "${GITHUB_WORKSPACE}/build_android_install_arm_v7a" "${id_x86_32}:/var/lib/jenkins/workspace/build_android_install_arm_v7a"
-          docker cp "${GITHUB_WORKSPACE}/build_android_install_x86_64" "${id_x86_32}:/var/lib/jenkins/workspace/build_android_install_x86_64"
-          docker cp "${GITHUB_WORKSPACE}/build_android_install_arm_v8a" "${id_x86_32}:/var/lib/jenkins/workspace/build_android_install_arm_v8a"
-
-          # run gradle buildRelease
-          # shellcheck disable=SC1105
-          ((echo "sudo chown -R jenkins workspace && cd workspace && ./.circleci/scripts/build_android_gradle.sh") | docker exec \
-            -e BUILD_ENVIRONMENT="pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-build" \
-            -e MAX_JOBS="$(nproc --ignore=2)" \
-            -e AWS_DEFAULT_REGION \
-            -e IS_GHA \
-            -e PR_NUMBER \
-            -e SHA1 \
-            -e BRANCH \
-            -e GITHUB_RUN_ID \
-            -e SCCACHE_BUCKET \
-            -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-            -e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-            -e SKIP_SCCACHE_INITIALIZATION=1 \
-            -e TORCH_CUDA_ARCH_LIST \
-            -e PR_LABELS \
-            -e http_proxy="!{{ common.squid_proxy }}" -e https_proxy="!{{ common.squid_proxy }}" -e no_proxy="!{{ common.squid_no_proxy }}" \
-            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
-            --user jenkins \
-            -u jenkins -i "${id_x86_32}" bash) 2>&1
-
-          mkdir -p "${GITHUB_WORKSPACE}/build_android_artifacts"
-          docker cp "${id_x86_32}:/var/lib/jenkins/workspace/android/artifacts.tgz" "${GITHUB_WORKSPACE}/build_android_artifacts/"
-
-          output_image="${DOCKER_IMAGE}-android-x86_32-gradle"
-          docker commit "${id_x86_32}" "${output_image}"
-          time docker push "${output_image}"
-      !{{ common_android.upload_androind_binary_size("prebuilt", "${GITHUB_WORKSPACE}/build_android_artifacts/artifacts.tgz") }}
-      - uses: !{{ common.upload_artifact_s3_action }}
-        name: Store PyTorch Android Build Artifacts on S3
-        with:
-          name: ${{ env.BUILD_ENVIRONMENT }}
-          retention-days: 14
-          if-no-files-found: error
-          path:
-            build_android_artifacts/artifacts.tgz
-      !{{ common.teardown_ec2_linux() }}
-{%- endblock %}
--- a/.github/templates/android_ci_workflow.yml.j2
+++ b/.github/templates/android_ci_workflow.yml.j2
@ -1,104 +0,0 @@
-{%- extends "linux_ci_workflow.yml.j2" -%}
-{% import 'common_android.yml.j2' as common_android %}
-{%- set exclude_test = true -%}
-{% block name -%}
-# Template is at:    .github/templates/android_ci_workflow.yml.j2
-# Generation script: .github/scripts/generate_ci_workflows.py
-name: !{{ build_environment }}
-{%- endblock %}
-
-on:
-  pull_request:
-    types: [opened, synchronize, reopened, !{{ ciflow_config.trigger_action }}]
-
-{% block build +%}
-  # building and testing in a single job since bazel runs only small subset of tests
-  build-and-test:
-    runs-on: !{{ test_runner_type }}
-    env:
-      JOB_BASE_NAME: !{{ build_environment }}-build-and-test
-      NUM_TEST_SHARDS: !{{ num_test_shards }}
-      IS_PROBOT_TRIGGER_EVENT: ${{ (github.event.action == '!{{ ciflow_config.trigger_action }}') && (github.event.assigneed.login == '!{{ ciflow_config.trigger_actor }}') }}
-      LABEL_CONDITIONS: ${{ !{{ ciflow_config.label_conditions }} }}
-    if: !{{ ciflow_config.root_job_condition }}
-    steps:
-      - name: print labels
-        run: echo "${PR_LABELS}"
-      !{{ common.setup_ec2_linux() }}
-      !{{ common.checkout_pytorch("recursive") }}
-      !{{ common.calculate_docker_image(false) }}
-      - name: Pull Docker image
-        run: |
-          !{{ common.add_retry_to_env() }}
-          retry docker pull "${DOCKER_IMAGE}"
-      - name: Determine shm-size
-        run: |
-          shm_size="1g"
-          case "${BUILD_ENVIRONMENT}" in
-            *cuda*)
-              shm_size="2g"
-              ;;
-            *rocm*)
-              shm_size="8g"
-              ;;
-          esac
-          echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
-      - name: Output disk space left
-        run: |
-          sudo df -H
-      - name: Preserve github env variables for use in docker
-        run: |
-          env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
-      - name: Build
-        run: |
-          set -e
-          # Unlike other gradle jobs, it's not worth building libtorch in a separate CI job and share via docker, because:
-          # 1) Not shareable: it's custom selective build, which is different from default libtorch mobile build;
-          # 2) Not parallelizable by architecture: it only builds libtorch for one architecture;
-
-          echo "DOCKER_IMAGE: ${DOCKER_IMAGE}"
-          time docker pull "${DOCKER_IMAGE}" >/dev/null
-
-          export BUILD_LITE_INTERPRETER
-          BUILD_LITE_INTERPRETER="1"
-          if [[ "${BUILD_ENVIRONMENT}" == *"full-jit" ]]; then
-            BUILD_LITE_INTERPRETER="0"
-          fi
-
-          git submodule sync && git submodule update -q --init --recursive --depth 1 --jobs 0
-          # shellcheck disable=SC2016
-          export id
-          id=$(docker run -e BUILD_ENVIRONMENT \
-            -e JOB_BASE_NAME \
-            -e MAX_JOBS="$(nproc --ignore=2)" \
-            -e SCCACHE_BUCKET \
-            -e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-            -e PR_LABELS \
-            -e SKIP_SCCACHE_INITIALIZATION=1 \
-            -e TORCH_CUDA_ARCH_LIST \
-            -e BUILD_LITE_INTERPRETER \
-            -e http_proxy="!{{ common.squid_proxy }}" -e https_proxy="!{{ common.squid_proxy }}" -e no_proxy="!{{ common.squid_no_proxy }}" \
-            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
-            --security-opt seccomp=unconfined \
-            --cap-add=SYS_PTRACE \
-            --tty \
-            --detach \
-            --user jenkins \
-            -v "$(pwd):/var/lib/jenkins/workspace" \
-            --cap-add=SYS_PTRACE \
-            --security-opt seccomp=unconfined \
-            --cap-add=SYS_PTRACE \
-            --security-opt seccomp=unconfined \
-            -t -d -w /var/lib/jenkins "${DOCKER_IMAGE}")
-
-          # shellcheck disable=SC2016
-          export COMMAND
-          # shellcheck disable=SC2016
-          COMMAND='((echo "export GRADLE_OFFLINE=1" && echo "export BUILD_LITE_INTERPRETER=${BUILD_LITE_INTERPRETER}" && echo "sudo chown -R jenkins workspace && cd workspace && ./.circleci/scripts/build_android_gradle.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
-          echo "${COMMAND}" > ./command.sh && bash ./command.sh
-          # Skip docker push as this job is purely for size analysis purpose.
-          # Result binaries are already in `/home/circleci/project/` as it's mounted instead of copied.
-      !{{ common.parse_ref() }}
-      !{{ common_android.upload_androind_binary_size("custom-build-single", "") }}
-      !{{ common.teardown_ec2_linux() }}
-{%- endblock %}
--- a/.github/templates/bazel_ci_workflow.yml.j2
+++ b/.github/templates/bazel_ci_workflow.yml.j2
@ -1,5 +1,4 @@
 {%- extends "linux_ci_workflow.yml.j2" -%}
-{% import 'common_android.yml.j2' as common_android %}
 {%- set exclude_test = true -%}
 {% block name -%}
 # Template is at:    .github/templates/bazel_ci_workflow.yml.j2
@ -8,29 +7,35 @@ name: !{{ build_environment }}
 {%- endblock %}

 on:
+{%- if on_pull_request %}
  pull_request:
+  {%- if ciflow_config.enabled %}
+    {%- if ciflow_config.trigger_action_only %}
+    types: [!{{ ciflow_config.trigger_action }}]
+    {%- else %}
    types: [opened, synchronize, reopened, !{{ ciflow_config.trigger_action }}]
+    {%- endif %}
+  {%- endif %}
+{%- else %}
+  # TODO: Enable pull_request builds when we can verify capacity can be met by auto-scalers
+{%- endif %}

 {% block build +%}
  # building and testing in a single job since bazel runs only small subset of tests
  build-and-test:
    runs-on: !{{ test_runner_type }}
+    needs: [calculate-docker-image, !{{ ciflow_config.root_job_name }}]
    env:
+      DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
      JOB_BASE_NAME: !{{ build_environment }}-build-and-test
      NUM_TEST_SHARDS: !{{ num_test_shards }}
-      IS_PROBOT_TRIGGER_EVENT: ${{ (github.event.action == '!{{ ciflow_config.trigger_action }}') && (github.event.assigneed.login == '!{{ ciflow_config.trigger_actor }}') }}
-      LABEL_CONDITIONS: ${{ !{{ ciflow_config.label_conditions }} }}
-    if: !{{ ciflow_config.root_job_condition }}
+      CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }}
    steps:
-      - name: print labels
-        run: echo "${PR_LABELS}"
      !{{ common.setup_ec2_linux() }}
      !{{ common.checkout_pytorch("recursive") }}
-      !{{ common.calculate_docker_image(false) }}
-      - name: Pull Docker image
+      - name: Pull docker image
        run: |
-          !{{ common.add_retry_to_env() }}
-          retry docker pull "${DOCKER_IMAGE}"
+          docker pull "${DOCKER_IMAGE}"
      - name: Determine shm-size
        run: |
          shm_size="1g"
@ -74,10 +79,23 @@ on:
          )
          docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && sudo chown -R jenkins /dev && .jenkins/pytorch/build.sh'
      !{{ common.parse_ref() }}
-      !{{ common_android.upload_androind_binary_size("", "")}}
+      - name: Display and upload binary build size statistics (Click Me)
+        # temporary hack: set CIRCLE_* vars, until we update
+        # tools/stats/print_test_stats.py to natively support GitHub Actions
+        env:
+          AWS_DEFAULT_REGION: us-east-1
+          SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
+          CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
+          CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
+          CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
+          CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
+          CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
+        run: |
+          COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
+          export COMMIT_TIME
+          pip3 install requests==2.26
+          python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
      - name: Test
-        # Time out the test phase after 3.5 hours
-        timeout-minutes: 210
        run: |
          # detached container should get cleaned up by teardown_ec2_linux
          export SHARD_NUMBER=0
@ -90,10 +108,10 @@ on:
            -e GITHUB_ACTIONS \
            -e IN_CI \
            -e SHARD_NUMBER \
-            -e NUM_TEST_SHARDS \
            -e JOB_BASE_NAME \
            -e MAX_JOBS="$(nproc --ignore=2)" \
            -e SCCACHE_BUCKET \
+            -e CONTINUE_THROUGH_ERROR \
            -e PR_LABELS \
            -e http_proxy="!{{ common.squid_proxy }}" -e https_proxy="!{{ common.squid_proxy }}" -e no_proxy="!{{ common.squid_no_proxy }}" \
            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
@ -114,7 +132,6 @@ on:
          # Ensure the working directory gets chowned back to the current user
          docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
      !{{ common.upload_test_reports(name='bazel') }}
-      !{{ common.upload_downloaded_files(name='bazel') }}
      !{{ common.upload_test_statistics(build_environment) }}
      !{{ common.teardown_ec2_linux() }}
 {%- endblock %}
--- a/.github/templates/common.yml.j2
+++ b/.github/templates/common.yml.j2
@ -4,7 +4,6 @@
 {%- set squid_proxy    = "http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -%}
 {# squid_no_proxy is a list of common set of fixed domains or IPs that we don't need to proxy. See https://docs.aws.amazon.com/AmazonECS/latest/developerguide/http_proxy_config.html#windows-proxy #}
 {%- set squid_no_proxy = "localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" -%}
-{%- set timeout_minutes = 240 -%}

 {%- macro concurrency(build_environment) -%}
 concurrency:
@ -12,12 +11,6 @@ concurrency:
  cancel-in-progress: true
 {%- endmacro -%}

-{%- macro add_retry_to_env() -%}
-          retry () {
-              "$@"  || (sleep 1 && "$@") || (sleep 2 && "$@")
-          }
-{%- endmacro -%}
-
 {%- macro display_ec2_information() -%}
      - name: Display EC2 information
        shell: bash
@ -34,46 +27,32 @@ concurrency:
          echo "instance-type: $(get_ec2_metadata instance-type)"
 {%- endmacro -%}

-{%- macro parse_ref(pytorch_directory="") -%}
+{%- macro parse_ref() -%}
      - name: Parse ref
-{%- if pytorch_directory %}
-        working-directory: !{{ pytorch_directory }}
-{%- endif %}
        id: parse-ref
        run: .github/scripts/parse_ref.py
 {%- endmacro -%}

-{%- macro upload_test_statistics(build_environment, when="always()", pytorch_directory="") -%}
+{%- macro upload_test_statistics(build_environment) -%}
      - name: Display and upload test statistics (Click Me)
-{%- if pytorch_directory %}
-        working-directory: !{{ pytorch_directory }}
-{%- endif %}
-        if: !{{ when }}
+        if: always()
        # temporary hack: set CIRCLE_* vars, until we update
        # tools/stats/print_test_stats.py to natively support GitHub Actions
        env:
          AWS_DEFAULT_REGION: us-east-1
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
+          CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
          JOB_BASE_NAME: !{{ build_environment }}-test
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-          TAG: ${{ steps.parse-ref.outputs.tag }}
-          WORKFLOW_ID: '${{ github.run_id }}'
+          CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
+          CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
+          CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
+          CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
        shell: bash
        run: |
          python3 -m pip install -r requirements.txt
-          python3 -m pip install boto3==1.19.12
+          python3 -m pip install boto3==1.16.34
          python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
 {%- endmacro -%}

-{%- macro chown_dir(dir) -%}
-      - name: Chown artifacts
-        if: always()
-        run: |
-          # Ensure the working directory gets chowned back to the current user
-          docker run --rm -v "!{{ dir }}:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
-{%- endmacro -%}
-
 {%- macro setup_ec2_linux() -%}
      !{{ display_ec2_information() }}
      - name: Log in to ECR
@ -81,20 +60,23 @@ concurrency:
          AWS_RETRY_MODE: standard
          AWS_MAX_ATTEMPTS: 5
        run: |
-          AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
-          !{{ add_retry_to_env() }}
-          retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
-              --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
+          aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
+          bash /tmp/ecr-login.sh
+          rm /tmp/ecr-login.sh
      - name: Chown workspace
+        env:
+          ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
        run: |
-          !{{ add_retry_to_env() }}
-          retry docker pull "${ALPINE_IMAGE}"
+          retry () {
+              "$@"  || (sleep 1 && "$@") || (sleep 2 && "$@")
+          }
          # Ensure the working directory gets chowned back to the current user
+          retry docker pull "${ALPINE_IMAGE}"
          docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
      - name: Clean workspace
        run: |
-          rm -rf "${GITHUB_WORKSPACE}"
-          mkdir "${GITHUB_WORKSPACE}"
+          rm -rf "${GITHUB_WORKSPACE:?}/*"
+          rm -f ~/.ssh/authorized_keys
      - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
        uses: seemethere/add-github-ssh-key@v1
        with:
@ -104,16 +86,15 @@ concurrency:
          env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
 {%- endmacro -%}

-{%- macro teardown_ec2_linux(pytorch_directory="") -%}
+{%- macro teardown_ec2_linux() -%}
      - name: Hold runner for 2 hours or until ssh sessions have drained
-{%- if pytorch_directory %}
-        working-directory: !{{ pytorch_directory }}
-{%- endif %}
        # Always hold for active ssh sessions
        if: always()
        run: .github/scripts/wait_for_ssh_to_drain.sh
      - name: Chown workspace
        if: always()
+        env:
+          ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
        run: |
          # Ensure the working directory gets chowned back to the current user
          docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
@ -134,56 +115,14 @@ concurrency:
          # deep clone, to allow use of git merge-base
          fetch-depth: 0
          submodules: !{{ submodules }}
-      - name: Clean PyTorch checkout
-        run: |
-          # Remove any artifacts from the previous checkouts
-          git clean -fxd
-
 {%- endmacro -%}

-{%- macro upload_downloaded_files(name, artifact_name="", use_s3=True, when="always()") -%}
-      - name: Zip JSONs for upload
-        if: !{{ when }}
-        env:
-{%- if name == 'linux' or name == 'windows' or name == 'macos' %}
-          FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
-{%- else %}
-          FILE_SUFFIX: '!{{ name }}-${{ github.job }}'
-{%- endif %}
-{%- if name == 'windows' %}
-        shell: powershell
-        run: |
-          # -ir => recursive include all files in pattern
-          7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json'
-{%- else %}
-        run: |
-          # Remove any previous test jsons if they exist
-          rm -f test-jsons-*.zip
-          zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
-{%- endif %}
-{%- if use_s3 %}
-      - uses: !{{ upload_artifact_s3_action }}
-        name: Store Test Downloaded JSONs on S3
-{%- else %}
-      - uses: actions/upload-artifact@v2
-        name: Store Test Downloaded JSONs on Github
-{%- endif %}
-        if: !{{ when }}
-        with:
-{%- if artifact_name != "" %}
-          name: !{{ artifact_name }}
-{%- endif %}
-          retention-days: 14
-          if-no-files-found: warn
-          path:
-            test-jsons-*.zip
-{%- endmacro -%}

-{%- macro upload_test_reports(name, artifact_name="", use_s3=True) -%}
+{%- macro upload_test_reports(name) -%}
      - name: Zip test reports for upload
        if: always()
        env:
-{%- if name == 'linux' or name == 'windows' or name == 'macos' %}
+{%- if name == 'linux' or name == 'windows' %}
          FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
 {%- else %}
          FILE_SUFFIX: '!{{ name }}-${{ github.job }}'
@ -199,22 +138,35 @@ concurrency:
          rm -f test-reports-*.zip
          zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
 {%- endif %}
-{%- if use_s3 %}
-      - uses: !{{ upload_artifact_s3_action }}
-        name: Store Test Reports on S3
-{%- else %}
      - uses: actions/upload-artifact@v2
-        name: Store Test Reports on Github
-{%- endif %}
+        name: Store Test Reports
        if: always()
        with:
-{%- if artifact_name != "" %}
-          name: !{{ artifact_name }}
+{%- if name == 'linux' or name == 'windows' %}
+          name: test-reports-${{ matrix.config }}
+{%- else %}
+          name: test-reports-!{{ name }}
 {%- endif %}
          retention-days: 14
          if-no-files-found: error
          path:
+{%- if name == 'windows' %}
+            pytorch-${{ github.run_id }}/test-reports-*.zip
+{%- else %}
            test-reports-*.zip
+{%- endif %}
+      - uses: !{{ upload_artifact_s3_action }}
+        name: Store Test Reports on S3
+        if: always()
+        with:
+          retention-days: 14
+          if-no-files-found: error
+          path:
+{%- if name == 'windows' %}
+            pytorch-${{ github.run_id }}/test-reports-*.zip
+{%- else %}
+            test-reports-*.zip
+{%- endif %}
 {%- endmacro -%}

 {%- macro render_test_results() -%}
@ -232,71 +184,3 @@ concurrency:
        run: |
          python3 tools/render_junit.py test/
 {%- endmacro -%}
-
-{%- macro calculate_docker_image(always_rebuild) -%}
-      - name: Calculate docker image tag
-        id: calculate-tag
-        run: |
-          DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
-          echo "DOCKER_TAG=${DOCKER_TAG}" >> "${GITHUB_ENV}"
-          echo "DOCKER_IMAGE=${DOCKER_IMAGE_BASE}:${DOCKER_TAG}" >> "${GITHUB_ENV}"
-          echo "::set-output name=docker_tag::${DOCKER_TAG}"
-          echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
-      - name: Check if image should be built
-        id: check
-        env:
-          BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
-        run: |
-          set -x
-{%- if not always_rebuild %}
-          # Check if image already exists, if it does then skip building it
-          if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
-            exit 0
-          fi
-          if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
-            # if we're on the base branch then use the parent commit
-            MERGE_BASE=$(git rev-parse HEAD~)
-          else
-            # otherwise we're on a PR, so use the most recent base commit
-            MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
-          fi
-          # Covers the case where a previous tag doesn't exist for the tree
-          # this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
-          if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
-            echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
-            exit 1
-          fi
-          PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
-          # If no image exists but the hash is the same as the previous hash then we should error out here
-          if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
-            echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
-            echo "       contact the PyTorch team to restore the original images"
-            exit 1
-          fi
-{%- endif %}
-          echo ::set-output name=rebuild::yes
-      - name: Build and push docker image
-        if: ${{ steps.check.outputs.rebuild }}
-        env:
-          DOCKER_SKIP_S3_UPLOAD: 1
-        working-directory: .circleci/docker
-        run: |
-          export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
-          ./build_docker.sh
-{%- endmacro -%}
-
-{%- macro setup_miniconda(python_version) -%}
-      - name: Setup miniconda
-        uses: conda-incubator/setup-miniconda@v2
-        with:
-          auto-update-conda: true
-          python-version: !{{ python_version }}
-          activate-environment: build
-{%- endmacro -%}
-
-{%- macro set_xcode_version(xcode_version) -%}
-{%- if xcode_version != '' %}
-  # Set xcode xcode version to !{{ xcode_version }}
-  DEVELOPER_DIR: /Applications/Xcode_!{{ xcode_version }}.app/Contents/Developer
-{%- endif %}
-{%- endmacro -%}
--- a/.github/templates/common_android.yml.j2
+++ b/.github/templates/common_android.yml.j2
@ -1,81 +0,0 @@
-{% import 'common.yml.j2' as common %}
-
-{%- macro upload_androind_binary_size(build_type, artifacts) -%}
-      - name: Display and upload binary build size statistics (Click Me)
-        # temporary hack: set CIRCLE_* vars, until we update
-        # tools/stats/print_test_stats.py to natively support GitHub Actions
-        env:
-          AWS_DEFAULT_REGION: us-east-1
-          SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-          TAG: ${{ steps.parse-ref.outputs.tag }}
-          WORKFLOW_ID: '${{ github.run_id }}'
-        run: |
-          # The artifact file is created inside docker container, which contains the result binaries.
-          # Now unpackage it into the project folder. The subsequent script will scan project folder
-          # to locate result binaries and report their sizes.
-          # If artifact file is not provided it assumes that the project folder has been mounted in
-          # the docker during build and already contains the result binaries, so this step can be skipped.
-          export ARTIFACTS=!{{ artifacts }}
-          if [ -n "${ARTIFACTS}" ]; then
-            tar xf "${ARTIFACTS}" -C "${GITHUB_WORKSPACE}"
-            cd "${GITHUB_WORKSPACE}"
-          fi
-          COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
-          export COMMIT_TIME
-          ANDROID_BUILD_TYPE=!{{ build_type}}
-          export ANDROID_BUILD_TYPE
-          pip3 install requests==2.26 boto3==1.16.34
-          python3 -m tools.stats.upload_binary_size_to_scuba "android" || exit 0
-{%- endmacro -%}
-
-{%- macro build_android(env_name, container_suffix) -%}
-      - name: Build-!{{ container_suffix }}
-        env:
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
-        run: |
-          # detached container should get cleaned up by teardown_ec2_linux
-          #!/bin/bash -eo pipefail
-          # Pull Docker image and run build
-          time docker pull "${DOCKER_IMAGE}" >/dev/null
-          echo "${DOCKER_IMAGE}"
-          export container_name
-          container_name=$(docker run \
-            -e BUILD_ENVIRONMENT=!{{ env_name }} \
-            -e JOB_BASE_NAME \
-            -e MAX_JOBS="$(nproc --ignore=2)" \
-            -e AWS_DEFAULT_REGION \
-            -e IS_GHA \
-            -e PR_NUMBER \
-            -e SHA1 \
-            -e BRANCH \
-            -e GITHUB_RUN_ID \
-            -e SCCACHE_BUCKET \
-            -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-            -e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-            -e SKIP_SCCACHE_INITIALIZATION=1 \
-            -e TORCH_CUDA_ARCH_LIST \
-            -e PR_LABELS \
-            -e http_proxy="!{{ common.squid_proxy }}" -e https_proxy="!{{ common.squid_proxy }}" -e no_proxy="!{{ common.squid_no_proxy }}" \
-            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
-            --security-opt seccomp=unconfined \
-            --cap-add=SYS_PTRACE \
-            --tty \
-            --detach \
-            --user jenkins \
-            -w /var/lib/jenkins/workspace \
-            "${DOCKER_IMAGE}"
-          )
-          git submodule sync && git submodule update -q --init --recursive --depth 1 --jobs 0
-          docker cp "${GITHUB_WORKSPACE}/." "${container_name}:/var/lib/jenkins/workspace"
-          # shellcheck disable=SC1105
-          ((echo "sudo chown -R jenkins . && .jenkins/pytorch/build.sh && find ${BUILD_ROOT} -type f -name "*.a" -or -name "*.o" -delete") | docker exec -u jenkins -i "${container_name}" bash) 2>&1
-
-          # Copy dist folder back
-          export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}-!{{ container_suffix }}
-          docker cp "${container_name}:/var/lib/jenkins/workspace/dist" "${GITHUB_WORKSPACE}/." || echo "Dist folder not found"
-          docker commit "${container_name}" "${COMMIT_DOCKER_IMAGE}"
-          time docker push "${COMMIT_DOCKER_IMAGE}"
-{%- endmacro -%}
--- a/.github/templates/docker_builds_ci_workflow.yml.j2
+++ b/.github/templates/docker_builds_ci_workflow.yml.j2
@ -1,60 +0,0 @@
-{% import 'common.yml.j2' as common %}
-
-{%- block name -%}
-# Template is at:    .github/templates/docker_builds_ci_workflow.yml.j2
-# Generation script: .github/scripts/generate_ci_workflows.py
-name: !{{ build_environment }}
-{%- endblock %}
-
-on:
-  workflow_dispatch:
-  pull_request:
-    types: [opened, synchronize, reopened]
-    paths:
-      - '.circleci/docker/**'
-      - '.github/workflows/generated-docker-builds.yml'
-{%- if is_scheduled %}
-  schedule:
-    - cron: !{{ is_scheduled }}
-{%- endif %}
-!{{ common.concurrency(build_environment) }}
-
-env:
-  ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
-  AWS_DEFAULT_REGION: us-east-1
-
-jobs:
-{% block docker_build +%}
-  docker-build:
-    runs-on: linux.2xlarge
-    timeout-minutes: !{{ common.timeout_minutes }}
-    strategy:
-      matrix:
-        include:
-          {%- for docker_image in docker_images %}
-            - docker_image_base: '!{{ docker_image }}'
-              docker_image_short_name: '!{{ docker_image.split('/')[-1] }}'
-          {%- endfor %}
-    env:
-      DOCKER_IMAGE_BASE: '${{ matrix.docker_image_base }}'
-    name: docker-build (${{ matrix.docker_image_short_name }})
-    steps:
-      !{{ common.setup_ec2_linux() }}
-      !{{ common.checkout_pytorch("recursive") }}
-      !{{ common.calculate_docker_image(true) }}
-      - name: Pull Docker image
-        run: |
-          !{{ common.add_retry_to_env() }}
-          retry docker pull "${DOCKER_IMAGE}"
-      !{{ common.parse_ref() }}
-      !{{ common.teardown_ec2_linux() }}
-      - name: Hold runner for 2 hours or until ssh sessions have drained
-        # Always hold for active ssh sessions
-        if: always()
-        run: .github/scripts/wait_for_ssh_to_drain.sh
-      - name: Clean up docker images
-        if: always()
-        run: |
-          # Prune all of the docker images
-          docker system prune -af
-{%- endblock %}
--- a/.github/templates/ios_ci_workflow.yml.j2
+++ b/.github/templates/ios_ci_workflow.yml.j2
@ -1,86 +0,0 @@
-{% import 'common.yml.j2' as common %}
-
-{%- block name -%}
-# Template is at:    .github/templates/ios_ci_workflow.yml.j2
-# Generation script: .github/scripts/generate_ci_workflows.py
-name: !{{ build_environment }}
-{%- endblock %}
-
-on:
-  pull_request:
-    types: [opened, synchronize, reopened, !{{ ciflow_config.trigger_action }}]
-
-{%- if is_scheduled %}
-  schedule:
-    - cron: !{{ is_scheduled }}
-{%- else %}
-  push:
-    branches:
-      - master
-      - release/*
-{%- endif %}
-  workflow_dispatch:
-
-# For setup-miniconda, see https://github.com/conda-incubator/setup-miniconda/issues/179
-defaults:
-  run:
-    shell: bash -x -e -l {0}
-env:
-  BUILD_ENVIRONMENT: !{{ build_environment }}
-  IN_CI: 1
-  IS_GHA: 1
-!{{ common.set_xcode_version(xcode_version) }}
-
-jobs:
-{% block build +%}
-  build:
-    runs-on: macos-10.15
-    timeout-minutes: !{{ common.timeout_minutes }}
-    env:
-      JOB_BASE_NAME: !{{ build_environment }}-build
-      IOS_CERT_KEY_2022: ${{ secrets.IOS_CERT_KEY_2022 }}
-      IOS_SIGN_KEY_2022: ${{ secrets.IOS_SIGN_KEY_2022 }}
-      IS_PROBOT_TRIGGER_EVENT: ${{ (github.event.action == '!{{ ciflow_config.trigger_action }}') && (github.event.assigneed.login == '!{{ ciflow_config.trigger_actor }}') }}
-      LABEL_CONDITIONS: ${{ !{{ ciflow_config.label_conditions }} }}
-      PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
-    if: !{{ ciflow_config.root_job_condition }}
-    steps:
-      - name: print labels
-        run: echo "${PR_LABELS}"
-      !{{ common.checkout_pytorch("recursive") }}
-      !{{ common.setup_miniconda("3.8") }}
-      - name: Install ios / conda Dependencies
-        run: |
-          # Install dependencies
-          brew install libtool
-          conda install numpy ninja pyyaml mkl mkl-include setuptools cmake cffi requests typing_extensions --yes
-      - name: Run Fastlane
-        shell: bash -e {0}
-        run: |
-          set -x
-          cd ios/TestApp
-          # install fastlane
-          sudo gem install bundler && bundle install
-          # install certificates
-          echo "${IOS_CERT_KEY_2022}" >> cert.txt
-          base64 --decode cert.txt -o Certificates.p12
-          rm cert.txt
-          bundle exec fastlane install_root_cert
-          bundle exec fastlane install_dev_cert
-          # install the provisioning profile
-          PROFILE=PyTorch_CI_2022.mobileprovision
-          PROVISIONING_PROFILES=~/Library/MobileDevice/Provisioning\ Profiles
-          mkdir -pv "${PROVISIONING_PROFILES}"
-          cd "${PROVISIONING_PROFILES}"
-          echo "${IOS_SIGN_KEY_2022}" >> cert.txt
-          base64 --decode cert.txt -o ${PROFILE}
-          rm cert.txt
-      - name: Build
-        run: |
-          export TCLLIBPATH="/usr/local/lib"
-          python -VV
-          export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname "$(which conda)")/../"}
-          scripts/build_ios.sh
-{% endblock +%}
-
-!{{ common.concurrency(build_environment) }}
--- a/.github/templates/linux_binary_build_workflow.yml.j2
+++ b/.github/templates/linux_binary_build_workflow.yml.j2
@ -1,255 +0,0 @@
-{% import 'common.yml.j2' as common %}
-
-{%- block name -%}
-# Template is at:    .github/templates/linux_binary_build_workflow.yml.j2
-# Generation script: .github/scripts/generate_ci_workflows.py
-name: !{{ build_environment }}
-{%- endblock %}
-
-{%- macro binary_env(config) -%}
-    env:
-      PACKAGE_TYPE: !{{ config["package_type"] }}
-      # TODO: This is a legacy variable that we eventually want to get rid of in
-      #       favor of GPU_ARCH_VERSION
-      DESIRED_CUDA: !{{ config["desired_cuda"] }}
-{%- if config["gpu_arch_version"] %}
-      GPU_ARCH_VERSION: !{{ config["gpu_arch_version"] }}
-{%- endif %}
-      GPU_ARCH_TYPE: !{{ config["gpu_arch_type"] }}
-      DOCKER_IMAGE: !{{ config["container_image"] }}
-      SKIP_ALL_TESTS: 1
-{%- if config["package_type"] == "libtorch" %}
-      LIBTORCH_VARIANT: !{{ config["libtorch_variant"] }}
-      DESIRED_DEVTOOLSET: !{{ config["devtoolset"] }}
-{%- else %}
-      DESIRED_PYTHON: !{{ config["python_version"] }}
-{%- endif %}
-{%- endmacro %}
-
-on:
-# NOTE: Uncomment this to test within your PR
-# TODO: Migrate to new ciflow trigger, reference https://github.com/pytorch/pytorch/pull/70321
-#  pull_request:
-#    types: [opened, synchronize, reopened, !{{ ciflow_config.trigger_action }}]
-  push:
-    tags:
-      - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
-      - nightly-binary
-  workflow_dispatch:
-
-env:
-  # Needed for conda builds
-  ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
-  ANACONDA_USER: pytorch
-  AWS_DEFAULT_REGION: us-east-1
-  BINARY_ENV_FILE: /tmp/env
-  BUILD_ENVIRONMENT: !{{ build_environment }}
-  BUILDER_ROOT: /builder
-  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-  IN_CI: 1
-  IS_GHA: 1
-  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
-  PR_NUMBER: ${{ github.event.pull_request.number }}
-  PYTORCH_FINAL_PACKAGE_DIR: /artifacts
-  PYTORCH_RETRY_TEST_CASES: 1
-  PYTORCH_ROOT: /pytorch
-  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-  SKIP_ALL_TESTS: 1
-!{{ common.concurrency(build_environment) }}
-
-jobs:
-  should-run:
-    if: !{{ ciflow_config.root_job_condition }}
-    runs-on: ubuntu-latest
-    steps:
-      - name: noop
-        run: echo "This job is here so we don't have a bunch of skipped binary builds :D"
-{%- for config in build_configs %}
-  !{{ config["build_name"] }}-build:
-    if: ${{ github.repository_owner == 'pytorch' }}
-    needs: should-run
-    runs-on: linux.4xlarge
-    timeout-minutes: !{{ common.timeout_minutes }}
-    !{{ binary_env(config) }}
-    steps:
-      !{{ common.setup_ec2_linux() }}
-      - name: Clone pytorch/pytorch
-        uses: actions/checkout@v2
-        with:
-          path: pytorch
-          submodules: recursive
-      - name: Clone pytorch/builder
-        uses: actions/checkout@v2
-        with:
-          repository: pytorch/builder
-          path: builder
-{%- if config["gpu_arch_type"] == 'cuda' and config["gpu_arch_version"].startswith('11') %}
-      - name: Set BUILD_SPLIT_CUDA
-        run: |
-          echo "BUILD_SPLIT_CUDA='ON'" >> "$GITHUB_ENV"
-{%- endif %}
-      - name: Pull Docker image
-        run: |
-          !{{ common.add_retry_to_env() }}
-          retry docker pull "${DOCKER_IMAGE}"
-      - name: Build PyTorch binary
-        run: |
-          set -x
-          mkdir -p artifacts/
-          container_name=$(docker run \
-            -e BINARY_ENV_FILE \
-            -e BUILDER_ROOT \
-            -e BUILD_ENVIRONMENT \
-            -e BUILD_SPLIT_CUDA \
-            -e DESIRED_CUDA \
-            -e DESIRED_DEVTOOLSET \
-            -e DESIRED_PYTHON \
-            -e GPU_ARCH_TYPE \
-            -e GPU_ARCH_VERSION \
-            -e IS_GHA \
-            -e LIBTORCH_VARIANT \
-            -e PACKAGE_TYPE \
-            -e PYTORCH_FINAL_PACKAGE_DIR \
-            -e PYTORCH_ROOT \
-            -e SKIP_ALL_TESTS \
-            --tty \
-            --detach \
-            -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
-            -v "${GITHUB_WORKSPACE}/builder:/builder" \
-            -v "${RUNNER_TEMP}/artifacts:/artifacts" \
-            -w / \
-            "${DOCKER_IMAGE}"
-          )
-          docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
-          docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/!{{ config["package_type"] }}/build.sh"
-      !{{ common.chown_dir("${RUNNER_TEMP}/artifacts") }}
-      - uses: !{{ common.upload_artifact_s3_action }}
-        with:
-          name: !{{ config["build_name"] }}
-          retention-days: 14
-          if-no-files-found: error
-          path:
-            ${{ runner.temp }}/artifacts/*
-      !{{ common.teardown_ec2_linux("pytorch/") }}
-  !{{ config["build_name"] }}-test:  # Testing
-    if: ${{ github.repository_owner == 'pytorch' }}
-    needs: !{{ config["build_name"] }}-build
-{%- if config["gpu_arch_type"] == "cuda" %}
-    runs-on: linux.4xlarge.nvidia.gpu
-{%- else %}
-    runs-on: linux.4xlarge
-{%- endif %}
-    timeout-minutes: !{{ common.timeout_minutes }}
-    !{{ binary_env(config) }}
-    steps:
-      !{{ common.setup_ec2_linux() }}
-      - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
-        name: Download Build Artifacts
-        with:
-          name: !{{ config["build_name"] }}
-          path: "${{ runner.temp }}/artifacts/"
-      - name: Clone pytorch/pytorch
-        uses: actions/checkout@v2
-        with:
-          path: pytorch
-          submodules: recursive
-      - name: Clone pytorch/builder
-        uses: actions/checkout@v2
-        with:
-          repository: pytorch/builder
-          path: builder
-{%- if config["gpu_arch_type"] == "cuda" %}
-      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
-        working-directory: pytorch/
-        run: |
-          bash .github/scripts/install_nvidia_utils_linux.sh
-          echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
-{%- endif %}
-      - name: Pull Docker image
-        run: |
-          !{{ common.add_retry_to_env() }}
-          retry docker pull "${DOCKER_IMAGE}"
-      - name: Test PyTorch binary
-        run: |
-          set -x
-          # shellcheck disable=SC2086,SC2090
-          container_name=$(docker run \
-            ${GPU_FLAG:-} \
-            -e BINARY_ENV_FILE \
-            -e BUILDER_ROOT \
-            -e BUILD_ENVIRONMENT \
-            -e BUILD_SPLIT_CUDA \
-            -e DESIRED_CUDA \
-            -e DESIRED_DEVTOOLSET \
-            -e DESIRED_PYTHON \
-            -e GPU_ARCH_TYPE \
-            -e GPU_ARCH_VERSION \
-            -e IS_GHA \
-            -e LIBTORCH_VARIANT \
-            -e PACKAGE_TYPE \
-            -e PYTORCH_FINAL_PACKAGE_DIR \
-            -e PYTORCH_ROOT \
-            -e SKIP_ALL_TESTS \
-            --tty \
-            --detach \
-            -v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
-            -v "${GITHUB_WORKSPACE}/builder:/builder" \
-            -v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
-            -w / \
-            "${DOCKER_IMAGE}"
-          )
-          docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
-          # Generate test script
-          docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
-          docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
-      !{{ common.teardown_ec2_linux("pytorch/") }}
-  !{{ config["build_name"] }}-upload:  # Uploading
-    runs-on: linux.2xlarge  # self hosted runner to download ec2 artifacts
-    if: ${{ github.repository_owner == 'pytorch' }}
-    needs: !{{ config["build_name"] }}-test
-    !{{ binary_env(config) }}
-    steps:
-      !{{ common.setup_ec2_linux() }}
-      - name: Clone pytorch/pytorch
-        uses: actions/checkout@v2
-      - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
-        name: Download Build Artifacts
-        with:
-          name: !{{ config["build_name"] }}
-          path: "${{ runner.temp }}/artifacts/"
-      - name: Set DRY_RUN (only for tagged pushes)
-        if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}}
-        run: |
-          echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
-      - name: Set UPLOAD_CHANNEL (only for tagged pushes)
-        if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/')}}
-        run: |
-          # reference ends with an RC suffix
-          if [[ ${GITHUB_REF_NAME} = *-rc[0-9]* ]]; then
-            echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
-          fi
-      - name: Upload binaries
-        env:
-          PKG_DIR: "${{ runner.temp }}/artifacts"
-          UPLOAD_SUBFOLDER: "${{ env.DESIRED_CUDA }}"
-          # When running these on pull_request events these should be blank
-          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_ACCESS_KEY_ID }}
-          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_SECRET_KEY }}
-          ANACONDA_API_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
-        run: |
-          docker run --rm -i \
-            -e ANACONDA_API_TOKEN \
-            -e AWS_ACCESS_KEY_ID \
-            -e AWS_SECRET_ACCESS_KEY \
-            -e DRY_RUN \
-            -e PACKAGE_TYPE \
-            -e PKG_DIR=/artifacts \
-            -e UPLOAD_CHANNEL \
-            -e UPLOAD_SUBFOLDER \
-            -v "${RUNNER_TEMP}/artifacts:/artifacts" \
-            -v "${GITHUB_WORKSPACE}:/v" \
-            -w /v \
-            308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/miniconda3:4.10.3 \
-            bash -c '.circleci/scripts/binary_upload.sh'
-      !{{ common.teardown_ec2_linux() }}
-{%- endfor %}
--- a/.github/templates/linux_ci_workflow.yml.j2
+++ b/.github/templates/linux_ci_workflow.yml.j2
@ -7,8 +7,18 @@ name: !{{ build_environment }}
 {%- endblock %}

 on:
+{%- if on_pull_request %}
  pull_request:
+  {%- if ciflow_config.enabled %}
+    {%- if ciflow_config.trigger_action_only %}
+    types: [!{{ ciflow_config.trigger_action }}]
+    {%- else %}
    types: [opened, synchronize, reopened, !{{ ciflow_config.trigger_action }}]
+    {%- endif %}
+  {%- endif %}
+{%- else %}
+  # TODO: Enable pull_request builds when we can verify capacity can be met by auto-scalers
+{%- endif %}

 {%- if is_scheduled %}
  schedule:
@ -28,7 +38,6 @@ env:
  XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
  TORCH_CUDA_ARCH_LIST: 5.2
  IN_CI: 1
-  IS_GHA: 1
  # This is used for the phase of adding wheel tests only, will be removed once completed
  IN_WHEEL_TEST: 1
  # Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
@ -36,53 +45,101 @@ env:
  ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-  AWS_DEFAULT_REGION: us-east-1
-  PR_NUMBER: ${{ github.event.pull_request.number }}
-  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-  PYTORCH_RETRY_TEST_CASES: 1
-{%- if build_with_debug %}
-  DEBUG: 1
-{%- endif %}
+
 !{{ common.concurrency(build_environment) }}

 jobs:
-{% block build +%}
-  build:
-    runs-on: linux.2xlarge
-    timeout-minutes: !{{ common.timeout_minutes }}
-    if: !{{ ciflow_config.root_job_condition }}
+{%- if ciflow_config.enabled %}
+  !{{ ciflow_config.root_job_name }}:
+    runs-on: ubuntu-18.04
+    if: ${{ !{{ ciflow_config.root_job_condition }} }}
    env:
-      JOB_BASE_NAME: !{{ build_environment }}-build
-      IS_PROBOT_TRIGGER_EVENT: ${{ (github.event.action == '!{{ ciflow_config.trigger_action }}') && (github.event.assigneed.login == '!{{ ciflow_config.trigger_actor }}') }}
-      LABEL_CONDITIONS: ${{ !{{ ciflow_config.label_conditions }} }}
+      LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
+    steps:
+      - name: noop
+        run: echo running !{{ ciflow_config.root_job_name }}
+      - name: print labels
+        run: echo "${LABELS}"
+{%- endif %}
+  calculate-docker-image:
+    runs-on: linux.2xlarge
+    {%- if ciflow_config.enabled %}
+    needs: [!{{ ciflow_config.root_job_name }}]
+    {%- endif %}
+    env:
+      DOCKER_BUILDKIT: 1
+    timeout-minutes: 90
    outputs:
      docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
    steps:
-      - name: print labels
-        run: echo "${PR_LABELS}"
+      !{{ common.setup_ec2_linux() }}
+      !{{ common.checkout_pytorch("false") }}
+      - name: Calculate docker image tag
+        id: calculate-tag
+        run: |
+          DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
+          echo "::set-output name=docker_tag::${DOCKER_TAG}"
+          echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
+      - name: Check if image should be built
+        id: check
+        env:
+          DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
+          BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
+        run: |
+          set -x
+          # Check if image already exists, if it does then skip building it
+          if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
+            exit 0
+          fi
+          if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
+            # if we're on the base branch then use the parent commit
+            MERGE_BASE=$(git rev-parse HEAD~)
+          else
+            # otherwise we're on a PR, so use the most recent base commit
+            MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
+          fi
+          # Covers the case where a previous tag doesn't exist for the tree
+          # this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
+          if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
+            echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
+            exit 1
+          fi
+          PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
+          # If no image exists but the hash is the same as the previous hash then we should error out here
+          if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
+            echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
+            echo "       contact the PyTorch team to restore the original images"
+            exit 1
+          fi
+          echo ::set-output name=rebuild::yes
+      - name: Build and push docker image
+        if: ${{ steps.check.outputs.rebuild }}
+        env:
+          DOCKER_TAG: ${{ steps.calculate-tag.outputs.docker_tag }}
+          DOCKER_SKIP_S3_UPLOAD: 1
+        run: |
+          export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
+          cd .circleci/docker && ./build_docker.sh
+{% block build +%}
+  build:
+    runs-on: linux.2xlarge
+    needs: [calculate-docker-image, !{{ ciflow_config.root_job_name }}]
+    env:
+      DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
+      JOB_BASE_NAME: !{{ build_environment }}-build
+    steps:
      !{{ common.setup_ec2_linux() }}
      !{{ common.checkout_pytorch("recursive") }}
-      !{{ common.calculate_docker_image(false) }}
-      - name: Pull Docker image
+      - name: Pull docker image
        run: |
-          !{{ common.add_retry_to_env() }}
-          retry docker pull "${DOCKER_IMAGE}"
-      !{{ common.parse_ref() }}
+          docker pull "${DOCKER_IMAGE}"
      - name: Build
-        env:
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
        run: |
          # detached container should get cleaned up by teardown_ec2_linux
          container_name=$(docker run \
            -e BUILD_ENVIRONMENT \
            -e JOB_BASE_NAME \
            -e MAX_JOBS="$(nproc --ignore=2)" \
-            -e AWS_DEFAULT_REGION \
-            -e IS_GHA \
-            -e PR_NUMBER \
-            -e SHA1 \
-            -e BRANCH \
-            -e GITHUB_RUN_ID \
            -e SCCACHE_BUCKET \
            -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
            -e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
@ -101,14 +158,19 @@ jobs:
            "${DOCKER_IMAGE}"
          )
          docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
+      !{{ common.parse_ref() }}
      - name: Display and upload binary build size statistics (Click Me)
        # temporary hack: set CIRCLE_* vars, until we update
        # tools/stats/print_test_stats.py to natively support GitHub Actions
        env:
+          AWS_DEFAULT_REGION: us-east-1
+          IS_GHA: 1
          SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
-          TAG: ${{ steps.parse-ref.outputs.tag }}
-          WORKFLOW_ID: '${{ github.run_id }}'
+          CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
+          CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
+          CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
+          CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
+          CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
        run: |
          COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
          export COMMIT_TIME
@ -118,7 +180,7 @@ jobs:
        run: |
          # Ensure the working directory gets chowned back to the current user
          docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
-      {%- if build_generates_artifacts %}
+      {%- if not is_libtorch %}
      - name: Archive artifacts into zip
        run: |
          zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .pytorch-test-times.json
@ -145,14 +207,14 @@ jobs:
 {%- if not exclude_test %}
 {% block test +%}
  generate-test-matrix:
-    needs: build
    runs-on: ubuntu-18.04
-    timeout-minutes: !{{ common.timeout_minutes }}
+    {%- if ciflow_config.enabled %}
+    needs: [!{{ ciflow_config.root_job_name }}]
+    {%- endif %}
    env:
      TEST_RUNNER_TYPE: !{{ test_runner_type }}
      ENABLE_DISTRIBUTED_TEST: !{{ enable_distributed_test }}
      ENABLE_JIT_LEGACY_TEST: !{{ enable_jit_legacy_test }}
-      ENABLE_FX2TRT_TEST: !{{ enable_fx2trt_test }}
      ENABLE_MULTIGPU_TEST: !{{ enable_multigpu_test }}
      ENABLE_NOGPU_NO_AVX_TEST: !{{ enable_nogpu_no_avx_test }}
      ENABLE_NOGPU_NO_AVX2_TEST: !{{ enable_nogpu_no_avx2_test }}
@ -163,7 +225,6 @@ jobs:
      ENABLE_NOARCH_TEST: !{{ enable_noarch_test }}
      NUM_TEST_SHARDS: !{{ num_test_shards }}
      MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
-      DISTRIBUTED_GPU_RUNNER_TYPE: linux.8xlarge.nvidia.gpu
      NOGPU_RUNNER_TYPE: linux.2xlarge
      PR_BODY: ${{ github.event.pull_request.body }}
    outputs:
@ -182,26 +243,25 @@ jobs:
        run: .github/scripts/generate_pytorch_test_matrix.py

  test:
-    needs: [build, generate-test-matrix]
+    needs: [calculate-docker-image, build, generate-test-matrix, !{{ ciflow_config.root_job_name }}]
    strategy:
      matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
      fail-fast: false
    runs-on: ${{ matrix.runner }}
-    timeout-minutes: !{{ common.timeout_minutes }}
    env:
-      DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
+      DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
      JOB_BASE_NAME: !{{ build_environment }}-test
      TEST_CONFIG: ${{ matrix.config }}
      SHARD_NUMBER: ${{ matrix.shard }}
      NUM_TEST_SHARDS: ${{ matrix.num_shards }}
      PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
+      CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }}
    steps:
      !{{ common.setup_ec2_linux() }}
      !{{ common.checkout_pytorch("recursive") }}
-      - name: Pull Docker image
+      - name: Pull docker image
        run: |
-          !{{ common.add_retry_to_env() }}
-          retry docker pull "${DOCKER_IMAGE}"
+          docker pull "${DOCKER_IMAGE}"
      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
        if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
        run: |
@ -233,31 +293,24 @@ jobs:
      - name: Test
        env:
          PR_NUMBER: ${{ github.event.pull_request.number }}
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
-        # Time out the test phase after !{{ timeout_after }} minutes
-        timeout-minutes: !{{ timeout_after }}
+          IS_GHA: 1
+          CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
+          CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
+          CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
+          AWS_DEFAULT_REGION: us-east-1
        run: |
-          set -x
-
          if [[ $TEST_CONFIG == 'multigpu' ]]; then
            TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
-          elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
-            TEST_COMMAND=.jenkins/caffe2/test.sh
          else
            TEST_COMMAND=.jenkins/pytorch/test.sh
          fi
-          PROXY_ENV=
-          # NOTE: XLA multiprocessing tests appear to have issues with squid proxy, going to disable for now
-          #       We should investigate whether or not there's a list of hostnames we can add to no_proxy to
-          #       make it so that we shouldn't have to fully disable squid for XLA tests
-          if [[ $TEST_CONFIG != 'xla' ]]; then
-            # shellcheck disable=SC2089
-            PROXY_ENV="-e http_proxy=!{{ common.squid_proxy }} -e https_proxy=!{{ common.squid_proxy }} -e no_proxy=!{{ common.squid_no_proxy }}"
+          if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
+            export SHARD_NUMBER=0
          fi
          # detached container should get cleaned up by teardown_ec2_linux
          # TODO: Stop building test binaries as part of the build phase
          # Used for GPU_FLAG since that doesn't play nice
-          # shellcheck disable=SC2086,SC2090
+          # shellcheck disable=SC2086
          container_name=$(docker run \
            ${GPU_FLAG:-} \
            -e BUILD_ENVIRONMENT \
@ -266,8 +319,9 @@ jobs:
            -e GITHUB_ACTIONS \
            -e IN_CI \
            -e IS_GHA \
-            -e BRANCH \
-            -e SHA1 \
+            -e CIRCLE_BRANCH \
+            -e CIRCLE_SHA1 \
+            -e CIRCLE_PR_NUMBER \
            -e AWS_DEFAULT_REGION \
            -e IN_WHEEL_TEST \
            -e SHARD_NUMBER \
@ -275,17 +329,15 @@ jobs:
            -e TEST_CONFIG \
            -e NUM_TEST_SHARDS \
            -e PYTORCH_IGNORE_DISABLED_ISSUES \
-            -e PYTORCH_RETRY_TEST_CASES \
            -e PR_LABELS \
+            -e CONTINUE_THROUGH_ERROR \
            -e MAX_JOBS="$(nproc --ignore=2)" \
            -e SCCACHE_BUCKET \
+            -e http_proxy="!{{ common.squid_proxy }}" -e https_proxy="!{{ common.squid_proxy }}" -e no_proxy="!{{ common.squid_no_proxy }}" \
            -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-            ${PROXY_ENV} \
            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
-            --ulimit stack=10485760:83886080 \
            --security-opt seccomp=unconfined \
            --cap-add=SYS_PTRACE \
-            --ipc=host \
            --shm-size="${SHM_SIZE}" \
            --tty \
            --detach \
@ -302,7 +354,12 @@ jobs:
          # Ensure the working directory gets chowned back to the current user
          docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
      !{{ common.render_test_results() }}
-      !{{ common.upload_downloaded_files(name='linux') }}
+      {%- if is_coverage %}
+      - name: Report coverage
+        run: |
+          python3 -mpip install codecov==2.1.12
+          python3 -mcodecov
+      {%- endif %}
      !{{ common.upload_test_reports(name='linux') }}
      !{{ common.upload_test_statistics(build_environment) }}
      !{{ common.teardown_ec2_linux() }}
@ -311,22 +368,19 @@ jobs:
 {%- if enable_doc_jobs %}
  build-docs:
    runs-on: linux.2xlarge
-    timeout-minutes: !{{ common.timeout_minutes }}
    strategy:
      matrix:
        docs_type: [cpp, python]
-    needs: [build]
+    needs: [calculate-docker-image, build, !{{ ciflow_config.root_job_name }}]
    env:
-      DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
+      DOCKER_IMAGE: ${{ needs.calculate-docker-image.outputs.docker_image }}
      DOCS_TYPE: ${{ matrix.docs_type }}
-      WITH_PUSH: ${{ github.event_name == 'schedule' }}
    steps:
      !{{ common.setup_ec2_linux() }}
      !{{ common.checkout_pytorch("recursive") }}
-      - name: Pull Docker image
+      - name: Pull docker image
        run: |
-          !{{ common.add_retry_to_env() }}
-          retry docker pull "${DOCKER_IMAGE}"
+          docker pull "${DOCKER_IMAGE}"
      - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
        name: Download PyTorch Build Artifacts
        with:
@ -334,44 +388,29 @@ jobs:
      - name: Unzip artifacts
        run: |
          unzip -o artifacts.zip
-{%- if is_scheduled %}
-      - name: Generate netrc (only for docs-push)
-        if: ${{ github.event_name == 'schedule' }}
-        env:
-          GITHUB_PYTORCHBOT_TOKEN: ${{ secrets.GH_PYTORCHBOT_TOKEN }}
-        run: |
-          # set credentials for https pushing
-          echo "machine github.com" > "${RUNNER_TEMP}/.netrc"
-          echo "login pytorchbot" >> "${RUNNER_TEMP}/.netrc"
-          echo "password ${GITHUB_PYTORCHBOT_TOKEN}" >> "${RUNNER_TEMP}/.netrc"
-{%- endif %}
      - name: Build ${{ matrix.docs_type }} docs
        run: |
          set -ex
          time docker pull "${DOCKER_IMAGE}" > /dev/null
          echo "${GITHUB_REF}"
-          # TODO: Set it correctly when workflows are scheduled on tags
-          target="master"
+          ref=${GITHUB_REF##*/}
+          target=${ref//v}
          # detached container should get cleaned up by teardown_ec2_linux
          container_name=$(docker run \
            -e BUILD_ENVIRONMENT \
            -e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
            -e IN_CI \
            -e MAX_JOBS="$(nproc --ignore=2)" \
-            -e SHA1="$GITHUB_SHA" \
+            -e CIRCLE_SHA1="$GITHUB_SHA" \
            -e DOCS_VERSION="${target}" \
            -e DOCS_TYPE \
            -e PR_LABELS \
-            -e WITH_PUSH \
            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
            --security-opt seccomp=unconfined \
            --cap-add=SYS_PTRACE \
            --tty \
            --detach \
            --user jenkins \
-{%- if is_scheduled %}
-            -v "${RUNNER_TEMP}/.netrc":/var/lib/jenkins/.netrc \
-{%- endif %}
            -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
            -w /var/lib/jenkins/workspace \
            "${DOCKER_IMAGE}"
@ -388,7 +427,7 @@ jobs:
          retention-days: 14
          s3-bucket: doc-previews
          if-no-files-found: error
-          path: pytorch.github.io/docs/master/
+          path: pytorch.github.io/docs/merge/
          s3-prefix: pytorch/${{ github.event.pull_request.number }}
      - uses: !{{ common.upload_artifact_s3_action }}
        name: Upload C++ Docs Preview
@ -399,4 +438,14 @@ jobs:
          s3-bucket: doc-previews
          path: cppdocs/
          s3-prefix: pytorch/${{ github.event.pull_request.number }}/cppdocs
+      - name: Archive artifacts into zip
+        run: |
+          zip -r "docs_${DOCS_TYPE}.zip" "${GITHUB_WORKSPACE}/pytorch.github.io" "${GITHUB_WORKSPACE}/cppdocs"
+      - uses: actions/upload-artifact@v2
+        name: Store PyTorch Build Artifacts
+        with:
+          name: docs_${{ matrix.docs_type }}
+          path: docs_${{ matrix.docs_type }}.zip
+          if-no-files-found: error
+      !{{ common.teardown_ec2_linux() }}
 {%- endif -%}
--- a/.github/templates/macos_ci_workflow.yml.j2
+++ b/.github/templates/macos_ci_workflow.yml.j2
@ -1,148 +0,0 @@
-{% import 'common.yml.j2' as common %}
-
-{%- block name -%}
-# Template is at:    .github/templates/macos_ci_workflow.yml.j2
-# Generation script: .github/scripts/generate_ci_workflows.py
-name: !{{ build_environment }}
-{%- endblock %}
-
-on:
-  pull_request:
-    types: [opened, synchronize, reopened, !{{ ciflow_config.trigger_action }}]
-
-{%- if is_scheduled %}
-  schedule:
-    - cron: !{{ is_scheduled }}
-{%- else %}
-  push:
-    branches:
-      - master
-      - release/*
-{%- endif %}
-  workflow_dispatch:
-
-# For setup-miniconda, see https://github.com/conda-incubator/setup-miniconda/issues/179
-defaults:
-  run:
-    shell: bash -e -l {0}
-env:
-  BUILD_ENVIRONMENT: !{{ build_environment }}
-  COMPACT_JOB_NAME: !{{ build_environment }}
-  IN_CI: 1
-  IS_GHA: 1
-  PYTORCH_RETRY_TEST_CASES: 1
-!{{ common.set_xcode_version(xcode_version) }}
-
-jobs:
-{% block build +%}
-  build:
-    runs-on: !{{ test_runner_type }}
-    env:
-      JOB_BASE_NAME: !{{ build_environment }}
-      # For sccache access (only on non-forked PRs)
-      AWS_ACCESS_KEY_ID: ${{ secrets.MACOS_SCCACHE_S3_ACCESS_KEY_ID }}
-      AWS_SECRET_ACCESS_KEY: ${{ secrets.MACOS_SCCACHE_S3_SECRET_ACCESS_KEY }}
-      IS_PROBOT_TRIGGER_EVENT: ${{ (github.event.action == '!{{ ciflow_config.trigger_action }}') && (github.event.assigneed.login == '!{{ ciflow_config.trigger_actor }}') }}
-      LABEL_CONDITIONS: ${{ !{{ ciflow_config.label_conditions }} }}
-      PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
-    if: !{{ ciflow_config.root_job_condition }}
-    steps:
-      - name: print labels
-        run: echo "${PR_LABELS}"
-      !{{ common.checkout_pytorch("recursive") }}
-      !{{ common.setup_miniconda("3.8") }}
-      - name: Install macOS homebrew dependencies
-        run: |
-          # Install dependencies
-          brew install libomp
-      - name: Install sccache (only for non-forked PRs, and pushes to trunk)
-        if: ${{ github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository }}
-        run: |
-          sudo curl --retry 3 https://s3.amazonaws.com/ossci-macos/sccache_v2.15 --output /usr/local/bin/sccache
-          sudo chmod +x /usr/local/bin/sccache
-          echo "SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2" >> "${GITHUB_ENV}"
-      - name: Build
-        run: |
-          echo "CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname "$(which conda)")/../"}" >> "${GITHUB_ENV}"
-          .jenkins/pytorch/macos-build.sh
-{%- if build_generates_artifacts %}
-      - name: Archive artifacts into zip
-        run: |
-          zip -1 -r artifacts.zip dist/
-      - uses: actions/upload-artifact@v2
-        name: Store PyTorch Build Artifacts on GHA
-        with:
-          name: ${{ env.BUILD_ENVIRONMENT }}
-          retention-days: 14
-          if-no-files-found: error
-          path:
-            artifacts.zip
-{%- endif %}
-{% endblock +%}
-{%- if not exclude_test %}
-{% block test +%}
-  generate-test-matrix:
-    needs: build
-    runs-on: ubuntu-18.04
-    timeout-minutes: !{{ common.timeout_minutes }}
-    env:
-      TEST_RUNNER_TYPE: !{{ test_runner_type }}
-      ENABLE_DISTRIBUTED_TEST: !{{ enable_distributed_test }}
-      NUM_TEST_SHARDS: !{{ num_test_shards }}
-      PR_BODY: ${{ github.event.pull_request.body }}
-    outputs:
-      matrix: ${{ steps.set-matrix.outputs.matrix }}
-      render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
-      ignore-disabled-issues: ${{ steps.set-matrix.outputs.ignore-disabled-issues }}
-    container:
-      image: python:3.9
-    steps:
-      - name: Install dependencies
-        run: pip install typing-extensions==3.10
-      - name: Clone pytorch/pytorch
-        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
-      - name: Generating test matrix
-        id: set-matrix
-        run: .github/scripts/generate_pytorch_test_matrix.py
-
-  test:
-    needs: [build, generate-test-matrix]
-    strategy:
-      matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
-      fail-fast: false
-    runs-on: ${{ matrix.runner }}
-    timeout-minutes: !{{ common.timeout_minutes }}
-    env:
-      JOB_BASE_NAME: !{{ build_environment }}-test
-      TEST_CONFIG: ${{ matrix.config }}
-      SHARD_NUMBER: ${{ matrix.shard }}
-      NUM_TEST_SHARDS: ${{ matrix.num_shards }}
-      PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
-    steps:
-      !{{ common.checkout_pytorch("false") }}
-      - uses: actions/download-artifact@v2
-        name: Download PyTorch Build Artifacts from GHA
-        with:
-          name: ${{ env.BUILD_ENVIRONMENT }}
-          path: .
-      - name: Unzip artifacts
-        run: |
-          unzip -o artifacts.zip
-      !{{ common.setup_miniconda("3.8") }}
-      - name: Install macOS homebrew dependencies
-        run: |
-          # Install dependencies
-          brew install libomp
-      !{{ common.parse_ref() }}
-      - name: Test
-        run: |
-          python3 -mpip install dist/*.whl
-          .jenkins/pytorch/macos-test.sh
-      !{{ common.render_test_results() }}
-      !{{ common.upload_downloaded_files(name='macos', artifact_name="test-jsons", use_s3=False) }}
-      !{{ common.upload_test_reports("macos", artifact_name="test-reports", use_s3=False) }}
-      !{{ common.upload_test_statistics(build_environment) }}
-{% endblock +%}
-{%- endif %}
-
-!{{ common.concurrency(build_environment) }}
--- a/.github/templates/windows_ci_workflow.yml.j2
+++ b/.github/templates/windows_ci_workflow.yml.j2
@ -19,8 +19,16 @@
 name: !{{ build_environment }}

 on:
+{%- if on_pull_request %}
  pull_request:
+  {%- if ciflow_config.enabled %}
+    {%- if ciflow_config.trigger_action_only %}
+    types: [!{{ ciflow_config.trigger_action }}]
+    {%- else %}
    types: [opened, synchronize, reopened, !{{ ciflow_config.trigger_action }}]
+    {%- endif %}
+  {%- endif %}
+{%- endif %}
 {%- if is_scheduled %}
  schedule:
    - cron: !{{ is_scheduled }}
@ -35,13 +43,10 @@ on:
 env:
  BUILD_ENVIRONMENT: !{{ build_environment }}
  BUILD_WHEEL: 1
-  MAX_JOBS: 8
  CUDA_VERSION: "!{{ cuda_version }}"
  IN_CI: 1
-  IS_GHA: 1
  INSTALL_WINDOWS_SDK: 1
  PYTHON_VERSION: "3.8"
-  PYTORCH_RETRY_TEST_CASES: 1
  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
  SCCACHE_BUCKET: "ossci-compiler-cache"
  VC_PRODUCT: "BuildTools"
@ -50,38 +55,46 @@ env:
  VC_YEAR: "2019"
  ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
  no_proxy: !{{ common.squid_no_proxy }}
-  AWS_DEFAULT_REGION: us-east-1
-  PR_NUMBER: ${{ github.event.pull_request.number }}
-  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-{%- if build_with_debug %}
-  DEBUG: 1
-{%- endif %}
 {%- if cuda_version != "cpu" %}
  TORCH_CUDA_ARCH_LIST: "7.0"
+  USE_CUDA: 1
 {%- endif %}
-  USE_CUDA: !{{ 1 if cuda_version != "cpu" else 0 }}

 !{{ common.concurrency(build_environment) }}

 jobs:
+{%- if ciflow_config.enabled %}
+  !{{ ciflow_config.root_job_name }}:
+    runs-on: ubuntu-18.04
+    if: ${{ !{{ ciflow_config.root_job_condition }} }}
+    steps:
+      - name: noop
+        run: echo running !{{ ciflow_config.root_job_name }}
+{%- endif %}
  build:
    runs-on: "windows.4xlarge"
-    timeout-minutes: !{{ common.timeout_minutes }}
+    defaults:
+      run:
+        working-directory: pytorch-${{ github.run_id }}
+    {%- if ciflow_config.enabled %}
+    needs: [!{{ ciflow_config.root_job_name }}]
+    {%- endif %}
    env:
      JOB_BASE_NAME: !{{ build_environment }}-build
      http_proxy: "!{{ common. squid_proxy }}"
      https_proxy: "!{{ common.squid_proxy }}"
-      IS_PROBOT_TRIGGER_EVENT: ${{ (github.event.action == '!{{ ciflow_config.trigger_action }}') && (github.event.assigneed.login == '!{{ ciflow_config.trigger_actor }}') }}
-      LABEL_CONDITIONS: ${{ !{{ ciflow_config.label_conditions }} }}
-    if: !{{ ciflow_config.root_job_condition }}
    steps:
-      - name: print labels
-        run: echo "${PR_LABELS}"
      - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
        uses: seemethere/add-github-ssh-key@v1
        with:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      !{{ common.checkout_pytorch("recursive") }}
+      - name: Checkout PyTorch
+        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
+        with:
+          submodules: recursive
+          path: pytorch-${{ github.run_id }}
+          # deep clone, to allow use of git merge-base
+          fetch-depth: 0
      !{{ common.display_ec2_information() }}
      - name: Install Visual Studio 2019 toolchain
        shell: powershell
@ -97,16 +110,25 @@ jobs:
        run: |
          .circleci/scripts/windows_cudnn_install.sh
 {%- endif %}
-      !{{ common.parse_ref() }}
      - name: Build
        shell: bash
        env:
          PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
        run: |
          .jenkins/pytorch/win-build.sh
      # Upload to github so that people can click and download artifacts
+      - name: Upload artifacts to Github
+        if: always()
+        uses: actions/upload-artifact@v2
+        # Don't fail on upload to GH since it's only for user convenience
+        continue-on-error: true
+        with:
+          retention-days: 14
+          if-no-files-found: error
+          name: ${{ env.BUILD_ENVIRONMENT }}
+          path: C:\${{ github.run_id }}\build-results
      - name: Upload artifacts to s3
+        if: always()
        uses: !{{ common.upload_artifact_s3_action }}
        with:
          retention-days: 14
@ -125,17 +147,15 @@ jobs:
          rm -rf ./*

  generate-test-matrix:
-    needs: build
+    {%- if ciflow_config.enabled %}
+    needs: [!{{ ciflow_config.root_job_name }}]
+    {%- endif %}
    runs-on: ubuntu-18.04
-    timeout-minutes: !{{ common.timeout_minutes }}
    env:
      TEST_RUNNER_TYPE: !{{ test_runner_type }}
      NUM_TEST_SHARDS: !{{ num_test_shards }}
      NUM_TEST_SHARDS_ON_PULL_REQUEST: !{{ num_test_shards_on_pull_request }}
      PR_BODY: ${{ github.event.pull_request.body }}
-      NOGPU_RUNNER_TYPE: windows.4xlarge
-      ENABLE_FORCE_ON_CPU_TEST: !{{ enable_force_on_cpu_test }}
-      RUN_SMOKE_TESTS_ONLY_ON_PR: !{{ only_run_smoke_tests_on_pull_request }}
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
      render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
@ -152,7 +172,9 @@ jobs:
        run: .github/scripts/generate_pytorch_test_matrix.py

  test:
-    timeout-minutes: !{{ common.timeout_minutes }}
+{%- if only_build_on_pull_request %}
+    if: ${{ github.event_name == 'push' }}
+{%- endif %}
    env:
      JOB_BASE_NAME: !{{ build_environment }}-test
      SHARD_NUMBER: ${{ matrix.shard }}
@ -160,31 +182,40 @@ jobs:
      TEST_CONFIG: ${{ matrix.config }}
      http_proxy: "!{{ common.squid_proxy }}"
      https_proxy: "!{{ common.squid_proxy }}"
+      RUN_SMOKE_TESTS_ONLY_ON_PR: !{{ only_run_smoke_tests_on_pull_request }}
      PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
-    needs: [build, generate-test-matrix]
+      CONTINUE_THROUGH_ERROR: ${{ github.repository == 'pytorch/pytorch' && (github.event_name == 'push' || github.event_name == 'schedule') }}
+    needs: [build, generate-test-matrix, !{{ ciflow_config.root_job_name }}]
    strategy:
      matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
      fail-fast: false
    runs-on: ${{ matrix.runner }}
+    defaults:
+      run:
+        working-directory: pytorch-${{ github.run_id }}
    steps:
+      - name: Checkout PyTorch
+        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
+        with:
+          submodules: recursive
+          path: pytorch-${{ github.run_id }}
+          # deep clone, to allow use of git merge-base
+          fetch-depth: 0
      !{{ common.display_ec2_information() }}
      - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
        uses: seemethere/add-github-ssh-key@v1
        with:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      !{{ common.checkout_pytorch("recursive") }}
      - name: Install Visual Studio 2019 toolchain
        shell: powershell
        run: |
          .\.circleci\scripts\vs_install.ps1
 {%- if cuda_version != "cpu" %}
      - name: Install Cuda
-        if: ${{ matrix.config != 'force_on_cpu' }}
        shell: bash
        run: |
          .circleci/scripts/windows_cuda_install.sh
      - name: Install Cudnn
-        if: ${{ matrix.config != 'force_on_cpu' }}
        shell: bash
        run: |
          .circleci/scripts/windows_cudnn_install.sh
@ -207,11 +238,14 @@ jobs:
        shell: bash
        env:
          PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
-        # Time out the test phase after 3.5 hours
-        timeout-minutes: 210
        run: |
+            if [[ $NUM_TEST_SHARDS -ne 2 ]]; then
+              export SHARD_NUMBER=0
+            fi
+            if [[ -n $GITHUB_HEAD_REF && "$RUN_SMOKE_TESTS_ONLY_ON_PR" == "true" ]]; then
+              export RUN_SMOKE_TESTS_ONLY=1
+            fi
            .jenkins/pytorch/win-test.sh
-      !{{ common.upload_downloaded_files(name='windows') }}
      !{{ common.upload_test_reports(name='windows') }}
      !{{ common.render_test_results() }}
      !{{ wait_and_kill_ssh() }}
--- a/.github/workflows/auto_label.yml
+++ b/.github/workflows/auto_label.yml
@ -0,0 +1,54 @@
+name: Label PRs & Issues
+
+on:
+  issues:
+    types: [opened, edited]
+  pull_request_target:
+    types: [edited, opened, synchronize, reopened]
+
+
+concurrency:
+  group: auto-label-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
+
+
+jobs:
+  auto-label-rocm:
+    if: ${{ github.repository == 'pytorch/pytorch' }}
+    runs-on: ubuntu-18.04
+    steps:
+    - name: Retrieve information
+      id: vars
+      env:
+        EVENT_NAME: ${{ github.event_name }}
+        PR_TITLE: ${{ github.event.pull_request.title }}
+        PR_NUMBER: ${{ github.event.pull_request.number }}
+        ISSUE_TITLE: ${{ github.event.issue.title }}
+        ISSUE_NUMBER: ${{ github.event.issue.number }}
+      run: |
+        set -eux
+        if [[ "$EVENT_NAME" == "pull_request_target" ]]; then
+          TITLE="${PR_TITLE}"
+          ISSUE_NUMBER="${PR_NUMBER}"
+        else
+          TITLE="${ISSUE_TITLE}"
+          # ISSUE_NUMBER is already set
+        fi
+        echo ::set-output name=TITLE::"${TITLE}"
+        echo ::set-output name=ISSUE_NUMBER::"${ISSUE_NUMBER}"
+    - name: Auto-label ROCm
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        TITLE: ${{ steps.vars.outputs.TITLE }}
+        ISSUE_NUMBER: ${{ steps.vars.outputs.ISSUE_NUMBER }}
+        OWNER: ${{ github.repository_owner }}
+        REPO: ${{ github.event.repository.name }}
+      run: |
+        set -eux
+        if [[ "${TITLE,,}" == *rocm* ]]; then
+          curl \
+            -X POST \
+            -H "Authorization: token ${GITHUB_TOKEN}" \
+            "https://api.github.com/repos/${OWNER}/${REPO}/issues/${ISSUE_NUMBER}/labels" \
+            -d '{"labels":["module: rocm"]}'
+        fi
--- a/.github/workflows/build_linux_conda.yml
+++ b/.github/workflows/build_linux_conda.yml
@ -0,0 +1,115 @@
+name: Build Linux Conda Packages
+
+on:
+  # TODO: These are only runnable from workflow_dispatch, we need to eventually add
+  #       a cron
+  # TODO: Add an on_release trigger to build on tags
+  workflow_dispatch:
+
+jobs:
+  generate-build-matrix:
+    if: ${{ github.repository_owner == 'pytorch' }}
+    runs-on: ubuntu-18.04
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    container:
+      image: python:3.9
+    steps:
+      - name: Clone pytorch/pytorch
+        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
+      - name: Generating build matrix
+        id: set-matrix
+        run: |
+          # outputting for debugging purposes
+          MATRIX=$(python .github/scripts/generate_binary_build_matrix.py conda)
+          echo "${MATRIX}"
+          echo "::set-output name=matrix::${MATRIX}"
+  build-conda:
+    if: ${{ github.repository_owner == 'pytorch' }}
+    needs: generate-build-matrix
+    runs-on: linux.2xlarge
+    strategy:
+      matrix: ${{ fromJson(needs.generate-build-matrix.outputs.matrix) }}
+      fail-fast: false
+    container:
+      image: ${{ matrix.container_image }}
+    env:
+      DESIRED_PYTHON: ${{ matrix.python_version }}
+      # TODO: This is a legacy variable that we eventually want to get rid of in
+      #       favor of GPU_ARCH_VERSION
+      DESIRED_CUDA: ${{ matrix.gpu_arch_version }}
+      GPU_ARCH_VERSION: ${{ matrix.GPU_ARCH_VERSION }}
+      GPU_ARCH_TYPE: ${{ matrix.gpu_arch_type }}
+      NO_BUILD_SUFFIX: true
+      # TODO: This is a legacy variable, we should just default all build to use
+      #       this folder within the conda/build_pytorch.sh script
+      TORCH_CONDA_BUILD_FOLDER: pytorch-nightly
+      # TODO: Another legacy env variable that isn't useful anymore, should default
+      #       to pytorch within the scripts directly
+      ANACONDA_USER: pytorch
+      PYTORCH_FINAL_PACKAGE_DIR: /remote
+      # We specify the CONDA_BLD_PATH here since conda creates extremely long paths
+      # for its default build path
+      CONDA_BLD_PATH: /build
+      PYTORCH_BUILD_NUMBER: 1
+      SKIP_ALL_TESTS: 1
+    steps:
+      - name: Clean runner workspace
+        run: rm -rf "$GITHUB_WORKSPACE"
+      - name: Clone pytorch/pytorch
+        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
+        with:
+          path: pytorch
+          submodules: recursive
+      - name: Clone pytorch/builder
+        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
+        with:
+          repository: pytorch/builder
+          path: builder
+      - name: Generate version string
+        working-directory: pytorch/
+        run: |
+          version=$(.github/scripts/generate_pytorch_version.py)
+          echo "Generated version: ${version}"
+          echo "PYTORCH_BUILD_VERSION=${version}" >> "$GITHUB_ENV"
+      - name: Set BUILD_SPLIT_CUDA
+        if: ${{ matrix.gpu_arch_type == 'cuda' && matrix.gpu_arch_version == '11.1' }}
+        run: |
+          echo "BUILD_SPLIT_CUDA=1" >> "$GITHUB_ENV"
+      # TODO: Remove this once we remove the need for the directories to be
+      #       in specific locations
+      - name: Symlink repositories to root directory (for legacy scripts purposes)
+        run: |
+          mv "$PWD"/pytorch /pytorch
+          mv "$PWD"/builder /builder
+      # TODO: Bundle the correct build script in the base container image so
+      #       that we don't have to do this type of specification
+      - name: Build PyTorch binary
+        run: |
+          /builder/conda/build_pytorch.sh
+      - uses: actions/upload-artifact@v2
+        with:
+          name: pytorch-conda-py${{ matrix.python_version }}-${{matrix.gpu_arch_type}}-${{ matrix.gpu_arch_version }}
+          path: /remote/**/*.bz2
+      - name: Parse ref
+        id: parse-ref
+        run: .github/scripts/parse_ref.py
+      - name: Display and upload binary build size statistics (Click Me)
+        # temporary hack: set CIRCLE_* vars, until we update
+        # tools/stats/print_test_stats.py to natively support GitHub Actions
+        env:
+          SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
+          CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
+          CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
+          CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
+          CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
+          CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
+        run: |
+          COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
+          export COMMIT_TIME
+          pip3 install requests==2.26
+          python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
+
+concurrency:
+  group: build-linux-conda-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
--- a/.github/workflows/build_linux_libtorch.yml
+++ b/.github/workflows/build_linux_libtorch.yml
@ -0,0 +1,114 @@
+name: Build Linux libtorch
+
+on:
+  # TODO: These are only runnable from workflow_dispatch, we need to eventually add
+  #       a cron
+  # TODO: Add an on_release trigger to build on tags
+  workflow_dispatch:
+
+jobs:
+  generate-build-matrix:
+    if: ${{ github.repository_owner == 'pytorch' }}
+    runs-on: ubuntu-18.04
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    container:
+      image: python:3.9
+    steps:
+      - name: Clone pytorch/pytorch
+        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
+      - name: Generating build matrix
+        id: set-matrix
+        run: |
+          # outputting for debugging purposes
+          MATRIX=$(python .github/scripts/generate_binary_build_matrix.py libtorch)
+          echo "${MATRIX}"
+          echo "::set-output name=matrix::${MATRIX}"
+  build-libtorch:
+    if: ${{ github.repository_owner == 'pytorch' }}
+    needs: generate-build-matrix
+    runs-on: linux.2xlarge
+    strategy:
+      matrix: ${{ fromJson(needs.generate-build-matrix.outputs.matrix) }}
+      fail-fast: false
+    container:
+      image: ${{ matrix.container_image }}
+    env:
+      # TODO: remove this var from the libtorch builder script(s)
+      DESIRED_PYTHON: '3.7'
+      # TODO: This is a legacy variable that we eventually want to get rid of in
+      #       favor of GPU_ARCH_VERSION
+      DESIRED_CUDA: ${{ matrix.gpu_arch_version }}
+      GPU_ARCH_VERSION: ${{ matrix.GPU_ARCH_VERSION }}
+      GPU_ARCH_TYPE: ${{ matrix.gpu_arch_type }}
+      BUILD_PYTHONLESS: 1
+      LIBTORCH_VARIANT: ${{ matrix.libtorch_variant }}
+      # TODO: remove this and bake env var into the Docker image
+      DESIRED_DEVTOOLSET: ${{ matrix.devtoolset }}
+      PYTORCH_BUILD_NUMBER: 1
+      SKIP_ALL_TESTS: 1
+    steps:
+      - name: Clean runner workspace
+        run: rm -rf "$GITHUB_WORKSPACE"
+      - name: Clone pytorch/pytorch
+        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
+        with:
+          path: pytorch
+          submodules: recursive
+      - name: Clone pytorch/builder
+        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
+        with:
+          repository: pytorch/builder
+          path: builder
+      - name: Generate version string
+        working-directory: pytorch/
+        run: |
+          version=$(.github/scripts/generate_pytorch_version.py)
+          echo "Generated version: ${version}"
+          echo "PYTORCH_BUILD_VERSION=${version}" >> "$GITHUB_ENV"
+      - name: Set BUILD_SPLIT_CUDA
+        if: ${{ matrix.gpu_arch_type == 'cuda' && matrix.gpu_arch_version == '11.1' }}
+        run: |
+          echo "BUILD_SPLIT_CUDA=1" >> "$GITHUB_ENV"
+      # TODO: Remove this once we remove the need for the directories to be
+      #       in specific locations
+      - name: Symlink repositories to root directory (for legacy scripts purposes)
+        run: |
+          ln -s "$PWD"/pytorch /pytorch
+          ln -s "$PWD"/builder /builder
+      # TODO: Bundle the correct build script in the base container image so
+      #       that we don't have to do this type of specification
+      - name: Build PyTorch binary (CUDA specific)
+        if: ${{ matrix.gpu_arch_type == 'cuda' }}
+        run: |
+          /builder/manywheel/build.sh
+      - name: Build PyTorch binary (CPU specific)
+        if: ${{ matrix.gpu_arch_type == 'cpu' }}
+        run: |
+          /builder/manywheel/build_cpu.sh
+      - uses: actions/upload-artifact@v2
+        with:
+          name: pytorch-libtorch-${{ matrix.libtorch_variant }}-${{ matrix.devtoolset }}-${{matrix.gpu_arch_type}}-${{ matrix.gpu_arch_version }}
+          path: /remote/**/*.zip
+      - name: Parse ref
+        id: parse-ref
+        run: .github/scripts/parse_ref.py
+      - name: Display and upload binary build size statistics (Click Me)
+        # temporary hack: set CIRCLE_* vars, until we update
+        # tools/stats/print_test_stats.py to natively support GitHub Actions
+        env:
+          SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
+          CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
+          CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
+          CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
+          CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
+          CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
+        run: |
+          COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
+          export COMMIT_TIME
+          pip3 install requests==2.26
+          python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
+
+concurrency:
+  group: build-linux-libtorch-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
--- a/.github/workflows/build_linux_wheels.yml
+++ b/.github/workflows/build_linux_wheels.yml
@ -0,0 +1,113 @@
+name: Build Linux Wheels
+
+on:
+  # TODO: These are only runnable from workflow_dispatch, we need to eventually add
+  #       a cron
+  # TODO: Add an on_release trigger to build on tags
+  workflow_dispatch:
+
+jobs:
+  generate-build-matrix:
+    if: ${{ github.repository_owner == 'pytorch' }}
+    runs-on: ubuntu-18.04
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    container:
+      image: python:3.9
+    steps:
+      - name: Clone pytorch/pytorch
+        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
+      - name: Generating build matrix
+        id: set-matrix
+        run: |
+          # outputting for debugging purposes
+          MATRIX=$(python .github/scripts/generate_binary_build_matrix.py wheels)
+          echo "${MATRIX}"
+          echo "::set-output name=matrix::${MATRIX}"
+  build-wheel:
+    if: ${{ github.repository_owner == 'pytorch' }}
+    needs: generate-build-matrix
+    runs-on: linux.2xlarge
+    strategy:
+      matrix: ${{ fromJson(needs.generate-build-matrix.outputs.matrix) }}
+      fail-fast: false
+    container:
+      image: ${{ matrix.container_image }}
+    env:
+      DESIRED_PYTHON: ${{ matrix.python_version }}
+      # TODO: This is a legacy variable that we eventually want to get rid of in
+      #       favor of GPU_ARCH_VERSION
+      DESIRED_CUDA: ${{ matrix.gpu_arch_version }}
+      GPU_ARCH_VERSION: ${{ matrix.GPU_ARCH_VERSION }}
+      GPU_ARCH_TYPE: ${{ matrix.gpu_arch_type }}
+      PYTORCH_BUILD_NUMBER: 1
+      SKIP_ALL_TESTS: 1
+    steps:
+      - name: Clean runner workspace
+        run: rm -rf "$GITHUB_WORKSPACE"
+      - name: Clone pytorch/pytorch
+        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
+        with:
+          path: pytorch
+          submodules: recursive
+      - name: Clone pytorch/builder
+        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
+        with:
+          repository: pytorch/builder
+          path: builder
+      - name: Generate version string
+        working-directory: pytorch/
+        run: |
+          version=$(.github/scripts/generate_pytorch_version.py)
+          echo "Generated version: ${version}"
+          echo "PYTORCH_BUILD_VERSION=${version}" >> "$GITHUB_ENV"
+      - name: Set BUILD_SPLIT_CUDA
+        if: ${{ matrix.gpu_arch_type == 'cuda' && matrix.gpu_arch_version == '11.1' }}
+        run: |
+          echo "BUILD_SPLIT_CUDA=1" >> "$GITHUB_ENV"
+      # TODO: Remove this once we remove the need for the directories to be
+      #       in specific locations
+      - name: Symlink repositories to root directory (for legacy scripts purposes)
+        run: |
+          ln -s "$PWD"/pytorch /pytorch
+          ln -s "$PWD"/builder /builder
+      # TODO: Bundle the correct build script in the base container image so
+      #       that we don't have to do this type of specification
+      - name: Build PyTorch binary (CUDA specific)
+        if: ${{ matrix.gpu_arch_type == 'cuda' }}
+        run: |
+          /builder/manywheel/build.sh
+      - name: Build PyTorch binary (ROCM specific)
+        if: ${{ matrix.gpu_arch_type == 'rocm' }}
+        run: |
+          /builder/manywheel/build_rocm.sh
+      - name: Build PyTorch binary (CPU specific)
+        if: ${{ matrix.gpu_arch_type == 'cpu' }}
+        run: |
+          /builder/manywheel/build_cpu.sh
+      - uses: actions/upload-artifact@v2
+        with:
+          name: pytorch-wheel-py${{ matrix.python_version }}-${{matrix.gpu_arch_type}}-${{ matrix.gpu_arch_version }}
+          path: /remote/**/*.whl
+      - name: Parse ref
+        id: parse-ref
+        run: .github/scripts/parse_ref.py
+      - name: Display and upload binary build size statistics (Click Me)
+        # temporary hack: set CIRCLE_* vars, until we update
+        # tools/stats/print_test_stats.py to natively support GitHub Actions
+        env:
+          SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
+          CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
+          CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
+          CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
+          CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
+          CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
+        run: |
+          COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
+          export COMMIT_TIME
+          pip3 install requests==2.26
+          python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
+
+concurrency:
+  group: build-linux-wheels-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
--- a/.github/workflows/generated-caffe2-linux-xenial-py3.7-gcc5.4.yml
+++ b/.github/workflows/generated-caffe2-linux-xenial-py3.7-gcc5.4.yml
@ -1,253 +0,0 @@
-# @generated DO NOT EDIT MANUALLY
-# Template is at:    .github/templates/linux_ci_workflow.yml.j2
-# Generation script: .github/scripts/generate_ci_workflows.py
-name: caffe2-linux-xenial-py3.7-gcc5.4
-
-on:
-  pull_request:
-    types: [opened, synchronize, reopened, unassigned]
-  push:
-    branches:
-      - master
-      - release/*
-  workflow_dispatch:
-
-env:
-  BUILD_ENVIRONMENT: caffe2-linux-xenial-py3.7-gcc5.4
-  DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.7-gcc5.4
-  SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
-  XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
-  TORCH_CUDA_ARCH_LIST: 5.2
-  IN_CI: 1
-  IS_GHA: 1
-  # This is used for the phase of adding wheel tests only, will be removed once completed
-  IN_WHEEL_TEST: 1
-  # Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
-  CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
-  ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
-  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
-  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-  AWS_DEFAULT_REGION: us-east-1
-  PR_NUMBER: ${{ github.event.pull_request.number }}
-  SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-  PYTORCH_RETRY_TEST_CASES: 1
-concurrency:
-  group: caffe2-linux-xenial-py3.7-gcc5.4-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
-  cancel-in-progress: true
-
-jobs:
-
-  build:
-    runs-on: linux.2xlarge
-    timeout-minutes: 240
-    if: ${{ (github.repository == 'pytorch/pytorch') && (
-            (github.event_name == 'push') ||
-            (github.event_name == 'schedule') ||
-            (contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/cpu') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux') || contains(github.event.pull_request.labels.*.name, 'ciflow/trunk')) ||
-            (false))
-         }}
-    env:
-      JOB_BASE_NAME: caffe2-linux-xenial-py3.7-gcc5.4-build
-      IS_PROBOT_TRIGGER_EVENT: ${{ (github.event.action == 'unassigned') && (github.event.assigneed.login == 'pytorchbot') }}
-      LABEL_CONDITIONS: ${{ contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/cpu') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux') || contains(github.event.pull_request.labels.*.name, 'ciflow/trunk') }}
-    outputs:
-      docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
-    steps:
-      - name: print labels
-        run: echo "${PR_LABELS}"
-      - name: Display EC2 information
-        shell: bash
-        run: |
-          set -euo pipefail
-          function get_ec2_metadata() {
-            # Pulled from instance metadata endpoint for EC2
-            # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
-            category=$1
-            curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
-          }
-          echo "ami-id: $(get_ec2_metadata ami-id)"
-          echo "instance-id: $(get_ec2_metadata instance-id)"
-          echo "instance-type: $(get_ec2_metadata instance-type)"
-      - name: Log in to ECR
-        env:
-          AWS_RETRY_MODE: standard
-          AWS_MAX_ATTEMPTS: 5
-        run: |
-          AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
-          retry () {
-              "$@"  || (sleep 1 && "$@") || (sleep 2 && "$@")
-          }
-          retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
-              --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
-      - name: Chown workspace
-        run: |
-          retry () {
-              "$@"  || (sleep 1 && "$@") || (sleep 2 && "$@")
-          }
-          retry docker pull "${ALPINE_IMAGE}"
-          # Ensure the working directory gets chowned back to the current user
-          docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
-      - name: Clean workspace
-        run: |
-          rm -rf "${GITHUB_WORKSPACE}"
-          mkdir "${GITHUB_WORKSPACE}"
-      - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
-        uses: seemethere/add-github-ssh-key@v1
-        with:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      - name: Preserve github env variables for use in docker
-        run: |
-          env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
-      - name: Checkout PyTorch
-        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
-        with:
-          # deep clone, to allow use of git merge-base
-          fetch-depth: 0
-          submodules: recursive
-      - name: Clean PyTorch checkout
-        run: |
-          # Remove any artifacts from the previous checkouts
-          git clean -fxd
-      - name: Calculate docker image tag
-        id: calculate-tag
-        run: |
-          DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
-          echo "DOCKER_TAG=${DOCKER_TAG}" >> "${GITHUB_ENV}"
-          echo "DOCKER_IMAGE=${DOCKER_IMAGE_BASE}:${DOCKER_TAG}" >> "${GITHUB_ENV}"
-          echo "::set-output name=docker_tag::${DOCKER_TAG}"
-          echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
-      - name: Check if image should be built
-        id: check
-        env:
-          BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
-        run: |
-          set -x
-          # Check if image already exists, if it does then skip building it
-          if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
-            exit 0
-          fi
-          if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
-            # if we're on the base branch then use the parent commit
-            MERGE_BASE=$(git rev-parse HEAD~)
-          else
-            # otherwise we're on a PR, so use the most recent base commit
-            MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
-          fi
-          # Covers the case where a previous tag doesn't exist for the tree
-          # this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
-          if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
-            echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
-            exit 1
-          fi
-          PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
-          # If no image exists but the hash is the same as the previous hash then we should error out here
-          if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
-            echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
-            echo "       contact the PyTorch team to restore the original images"
-            exit 1
-          fi
-          echo ::set-output name=rebuild::yes
-      - name: Build and push docker image
-        if: ${{ steps.check.outputs.rebuild }}
-        env:
-          DOCKER_SKIP_S3_UPLOAD: 1
-        working-directory: .circleci/docker
-        run: |
-          export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
-          ./build_docker.sh
-      - name: Pull Docker image
-        run: |
-          retry () {
-              "$@"  || (sleep 1 && "$@") || (sleep 2 && "$@")
-          }
-          retry docker pull "${DOCKER_IMAGE}"
-      - name: Parse ref
-        id: parse-ref
-        run: .github/scripts/parse_ref.py
-      - name: Build
-        env:
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
-        run: |
-          # detached container should get cleaned up by teardown_ec2_linux
-          container_name=$(docker run \
-            -e BUILD_ENVIRONMENT \
-            -e JOB_BASE_NAME \
-            -e MAX_JOBS="$(nproc --ignore=2)" \
-            -e AWS_DEFAULT_REGION \
-            -e IS_GHA \
-            -e PR_NUMBER \
-            -e SHA1 \
-            -e BRANCH \
-            -e GITHUB_RUN_ID \
-            -e SCCACHE_BUCKET \
-            -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-            -e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
-            -e SKIP_SCCACHE_INITIALIZATION=1 \
-            -e TORCH_CUDA_ARCH_LIST \
-            -e PR_LABELS \
-            -e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
-            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
-            --security-opt seccomp=unconfined \
-            --cap-add=SYS_PTRACE \
-            --tty \
-            --detach \
-            --user jenkins \
-            -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-            -w /var/lib/jenkins/workspace \
-            "${DOCKER_IMAGE}"
-          )
-          docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
-      - name: Display and upload binary build size statistics (Click Me)
-        # temporary hack: set CIRCLE_* vars, until we update
-        # tools/stats/print_test_stats.py to natively support GitHub Actions
-        env:
-          SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
-          TAG: ${{ steps.parse-ref.outputs.tag }}
-          WORKFLOW_ID: '${{ github.run_id }}'
-        run: |
-          COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
-          export COMMIT_TIME
-          pip3 install requests==2.26 boto3==1.16.34
-          python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
-      - name: Chown workspace
-        run: |
-          # Ensure the working directory gets chowned back to the current user
-          docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
-      - name: Archive artifacts into zip
-        run: |
-          zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .pytorch-test-times.json
-      - uses: seemethere/upload-artifact-s3@v3
-        name: Store PyTorch Build Artifacts on S3
-        with:
-          name: ${{ env.BUILD_ENVIRONMENT }}
-          retention-days: 14
-          if-no-files-found: error
-          path:
-            artifacts.zip
-      - name: Hold runner for 2 hours or until ssh sessions have drained
-        # Always hold for active ssh sessions
-        if: always()
-        run: .github/scripts/wait_for_ssh_to_drain.sh
-      - name: Chown workspace
-        if: always()
-        run: |
-          # Ensure the working directory gets chowned back to the current user
-          docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
-      - name: Kill containers, clean up images
-        if: always()
-        run: |
-          # ignore expansion of "docker ps -q" since it could be empty
-          # shellcheck disable=SC2046
-          docker stop $(docker ps -q) || true
-          # Prune all of the docker images
-          docker system prune -af
-      - name: Hold runner for 2 hours or until ssh sessions have drained
-        # Always hold for active ssh sessions
-        if: always()
-        run: .github/scripts/wait_for_ssh_to_drain.sh
-      - name: Clean up docker images
-        if: always()
-        run: |
-          # Prune all of the docker images
-          docker system prune -af
--- a/.github/workflows/generated-docker-builds.yml
+++ b/.github/workflows/generated-docker-builds.yml
@ -1,175 +0,0 @@
-# @generated DO NOT EDIT MANUALLY
-# Template is at:    .github/templates/docker_builds_ci_workflow.yml.j2
-# Generation script: .github/scripts/generate_ci_workflows.py
-name: docker-builds
-
-on:
-  workflow_dispatch:
-  pull_request:
-    types: [opened, synchronize, reopened]
-    paths:
-      - '.circleci/docker/**'
-      - '.github/workflows/generated-docker-builds.yml'
-  schedule:
-    - cron: 1 3 * * 3
-concurrency:
-  group: docker-builds-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
-  cancel-in-progress: true
-
-env:
-  ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
-  AWS_DEFAULT_REGION: us-east-1
-
-jobs:
-
-  docker-build:
-    runs-on: linux.2xlarge
-    timeout-minutes: 240
-    strategy:
-      matrix:
-        include:
-            - docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda10.2-cudnn7-py3.7-clang9'
-              docker_image_short_name: 'pytorch-linux-bionic-cuda10.2-cudnn7-py3.7-clang9'
-            - docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7'
-              docker_image_short_name: 'pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7'
-            - docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-cuda11.5-cudnn8-py3-gcc7'
-              docker_image_short_name: 'pytorch-linux-bionic-cuda11.5-cudnn8-py3-gcc7'
-            - docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-py3.7-clang9'
-              docker_image_short_name: 'pytorch-linux-bionic-py3.7-clang9'
-            - docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-rocm4.3.1-py3.7'
-              docker_image_short_name: 'pytorch-linux-bionic-rocm4.3.1-py3.7'
-            - docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-rocm4.5-py3.7'
-              docker_image_short_name: 'pytorch-linux-bionic-rocm4.5-py3.7'
-            - docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7'
-              docker_image_short_name: 'pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7'
-            - docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7'
-              docker_image_short_name: 'pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7'
-            - docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7'
-              docker_image_short_name: 'pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7'
-            - docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c'
-              docker_image_short_name: 'pytorch-linux-xenial-py3-clang5-android-ndk-r19c'
-            - docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-asan'
-              docker_image_short_name: 'pytorch-linux-xenial-py3-clang5-asan'
-            - docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang7-asan'
-              docker_image_short_name: 'pytorch-linux-xenial-py3-clang7-asan'
-            - docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang7-onnx'
-              docker_image_short_name: 'pytorch-linux-xenial-py3-clang7-onnx'
-            - docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.7-gcc5.4'
-              docker_image_short_name: 'pytorch-linux-xenial-py3.7-gcc5.4'
-            - docker_image_base: '308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.7-gcc7'
-              docker_image_short_name: 'pytorch-linux-xenial-py3.7-gcc7'
-    env:
-      DOCKER_IMAGE_BASE: '${{ matrix.docker_image_base }}'
-    name: docker-build (${{ matrix.docker_image_short_name }})
-    steps:
-      - name: Display EC2 information
-        shell: bash
-        run: |
-          set -euo pipefail
-          function get_ec2_metadata() {
-            # Pulled from instance metadata endpoint for EC2
-            # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
-            category=$1
-            curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
-          }
-          echo "ami-id: $(get_ec2_metadata ami-id)"
-          echo "instance-id: $(get_ec2_metadata instance-id)"
-          echo "instance-type: $(get_ec2_metadata instance-type)"
-      - name: Log in to ECR
-        env:
-          AWS_RETRY_MODE: standard
-          AWS_MAX_ATTEMPTS: 5
-        run: |
-          AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
-          retry () {
-              "$@"  || (sleep 1 && "$@") || (sleep 2 && "$@")
-          }
-          retry aws ecr get-login-password --region "$AWS_DEFAULT_REGION" | docker login --username AWS \
-              --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com"
-      - name: Chown workspace
-        run: |
-          retry () {
-              "$@"  || (sleep 1 && "$@") || (sleep 2 && "$@")
-          }
-          retry docker pull "${ALPINE_IMAGE}"
-          # Ensure the working directory gets chowned back to the current user
-          docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
-      - name: Clean workspace
-        run: |
-          rm -rf "${GITHUB_WORKSPACE}"
-          mkdir "${GITHUB_WORKSPACE}"
-      - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
-        uses: seemethere/add-github-ssh-key@v1
-        with:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      - name: Preserve github env variables for use in docker
-        run: |
-          env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
-      - name: Checkout PyTorch
-        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
-        with:
-          # deep clone, to allow use of git merge-base
-          fetch-depth: 0
-          submodules: recursive
-      - name: Clean PyTorch checkout
-        run: |
-          # Remove any artifacts from the previous checkouts
-          git clean -fxd
-      - name: Calculate docker image tag
-        id: calculate-tag
-        run: |
-          DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
-          echo "DOCKER_TAG=${DOCKER_TAG}" >> "${GITHUB_ENV}"
-          echo "DOCKER_IMAGE=${DOCKER_IMAGE_BASE}:${DOCKER_TAG}" >> "${GITHUB_ENV}"
-          echo "::set-output name=docker_tag::${DOCKER_TAG}"
-          echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
-      - name: Check if image should be built
-        id: check
-        env:
-          BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
-        run: |
-          set -x
-          echo ::set-output name=rebuild::yes
-      - name: Build and push docker image
-        if: ${{ steps.check.outputs.rebuild }}
-        env:
-          DOCKER_SKIP_S3_UPLOAD: 1
-        working-directory: .circleci/docker
-        run: |
-          export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
-          ./build_docker.sh
-      - name: Pull Docker image
-        run: |
-          retry () {
-              "$@"  || (sleep 1 && "$@") || (sleep 2 && "$@")
-          }
-          retry docker pull "${DOCKER_IMAGE}"
-      - name: Parse ref
-        id: parse-ref
-        run: .github/scripts/parse_ref.py
-      - name: Hold runner for 2 hours or until ssh sessions have drained
-        # Always hold for active ssh sessions
-        if: always()
-        run: .github/scripts/wait_for_ssh_to_drain.sh
-      - name: Chown workspace
-        if: always()
-        run: |
-          # Ensure the working directory gets chowned back to the current user
-          docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
-      - name: Kill containers, clean up images
-        if: always()
-        run: |
-          # ignore expansion of "docker ps -q" since it could be empty
-          # shellcheck disable=SC2046
-          docker stop $(docker ps -q) || true
-          # Prune all of the docker images
-          docker system prune -af
-      - name: Hold runner for 2 hours or until ssh sessions have drained
-        # Always hold for active ssh sessions
-        if: always()
-        run: .github/scripts/wait_for_ssh_to_drain.sh
-      - name: Clean up docker images
-        if: always()
-        run: |
-          # Prune all of the docker images
-          docker system prune -af
--- a/.github/workflows/generated-ios-12-5-1-arm64-coreml.yml
+++ b/.github/workflows/generated-ios-12-5-1-arm64-coreml.yml
@ -1,98 +0,0 @@
-# @generated DO NOT EDIT MANUALLY
-# Template is at:    .github/templates/ios_ci_workflow.yml.j2
-# Generation script: .github/scripts/generate_ci_workflows.py
-name: ios-12-5-1-arm64-coreml
-
-on:
-  pull_request:
-    types: [opened, synchronize, reopened, unassigned]
-  push:
-    branches:
-      - master
-      - release/*
-  workflow_dispatch:
-
-# For setup-miniconda, see https://github.com/conda-incubator/setup-miniconda/issues/179
-defaults:
-  run:
-    shell: bash -x -e -l {0}
-env:
-  BUILD_ENVIRONMENT: ios-12-5-1-arm64-coreml
-  IN_CI: 1
-  IS_GHA: 1
-
-
-jobs:
-
-  build:
-    runs-on: macos-10.15
-    timeout-minutes: 240
-    env:
-      JOB_BASE_NAME: ios-12-5-1-arm64-coreml-build
-      IOS_CERT_KEY_2022: ${{ secrets.IOS_CERT_KEY_2022 }}
-      IOS_SIGN_KEY_2022: ${{ secrets.IOS_SIGN_KEY_2022 }}
-      IS_PROBOT_TRIGGER_EVENT: ${{ (github.event.action == 'unassigned') && (github.event.assigneed.login == 'pytorchbot') }}
-      LABEL_CONDITIONS: ${{ contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/ios') || contains(github.event.pull_request.labels.*.name, 'ciflow/macos') || contains(github.event.pull_request.labels.*.name, 'ciflow/trunk') }}
-      PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
-    if: ${{ (github.repository == 'pytorch/pytorch') && (
-            (github.event_name == 'push') ||
-            (github.event_name == 'schedule') ||
-            (contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/ios') || contains(github.event.pull_request.labels.*.name, 'ciflow/macos') || contains(github.event.pull_request.labels.*.name, 'ciflow/trunk')) ||
-            (false))
-         }}
-    steps:
-      - name: print labels
-        run: echo "${PR_LABELS}"
-      - name: Checkout PyTorch
-        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
-        with:
-          # deep clone, to allow use of git merge-base
-          fetch-depth: 0
-          submodules: recursive
-      - name: Clean PyTorch checkout
-        run: |
-          # Remove any artifacts from the previous checkouts
-          git clean -fxd
-      - name: Setup miniconda
-        uses: conda-incubator/setup-miniconda@v2
-        with:
-          auto-update-conda: true
-          python-version: 3.8
-          activate-environment: build
-      - name: Install ios / conda Dependencies
-        run: |
-          # Install dependencies
-          brew install libtool
-          conda install numpy ninja pyyaml mkl mkl-include setuptools cmake cffi requests typing_extensions --yes
-      - name: Run Fastlane
-        shell: bash -e {0}
-        run: |
-          set -x
-          cd ios/TestApp
-          # install fastlane
-          sudo gem install bundler && bundle install
-          # install certificates
-          echo "${IOS_CERT_KEY_2022}" >> cert.txt
-          base64 --decode cert.txt -o Certificates.p12
-          rm cert.txt
-          bundle exec fastlane install_root_cert
-          bundle exec fastlane install_dev_cert
-          # install the provisioning profile
-          PROFILE=PyTorch_CI_2022.mobileprovision
-          PROVISIONING_PROFILES=~/Library/MobileDevice/Provisioning\ Profiles
-          mkdir -pv "${PROVISIONING_PROFILES}"
-          cd "${PROVISIONING_PROFILES}"
-          echo "${IOS_SIGN_KEY_2022}" >> cert.txt
-          base64 --decode cert.txt -o ${PROFILE}
-          rm cert.txt
-      - name: Build
-        run: |
-          export TCLLIBPATH="/usr/local/lib"
-          python -VV
-          export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname "$(which conda)")/../"}
-          scripts/build_ios.sh
-
-
-concurrency:
-  group: ios-12-5-1-arm64-coreml-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
-  cancel-in-progress: true
--- a/Show More
+++ b/Show More