Perform appropriate CUDA stream synchronization in distributed autograd. (#53929 ) (#54358 )

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/53929 The local autograd engine performs appropriate stream synchronization between autograd nodes in the graph to ensure a consumer's stream is synchronized with the producer's stream before executing the consumer. However in case of distributed autograd, the SendRpcBackward function receives gradients over the wire and TensorPipe uses its own pool of streams for this purpose. As a result, the tensors are received on TensorPipe's stream pool but SendRpcBackward runs on a different stream during the backward pass and there is no logic to synchronize these streams. To fix this, I've enhanced DistEngine to synchronize these streams appropriately when it receives grads over the wire. ghstack-source-id: 124055277 (Note: this ignores all push blocking failures!) Test Plan: 1) Added unit test which reproduced the issue. 2) waitforbuildbot. Reviewed By: walterddr, wanchaol Differential Revision: D27025307 fbshipit-source-id: 2944854e688e001cb3989d2741727b30d9278414 Co-authored-by: Pritam Damania <pritam.damania@fb.com>
[CI] Install compatible cmath for Win builds (#54556 )
2025-11-01 13:34:57 +08:00 · 2021-03-23 19:28:21 -07:00 · 2021-03-23 19:02:01 -07:00 · 2021-03-23 15:56:26 -07:00 · 2021-03-23 15:45:20 -07:00 · 2021-03-23 11:23:02 -07:00
5418 changed files with 196079 additions and 483796 deletions
--- a/.azure_pipelines/build-pipeline.yml
+++ b/.azure_pipelines/build-pipeline.yml
@ -1,63 +0,0 @@
-# PyTorch CI Builds Pipeline on Azure DevOps
-#
-# This pipeline:
-#   1) builds PyTorch on select configurations
-#   2) runs only TestTorch unit tests.
-
-stages:
- stage: 'Build'
-  displayName: 'Build PyTorch'
-  jobs:
-  - template: job_templates/build-verify-publish-template-unix.yml
-    parameters:
-      name: ubuntu_1804_CPU_docker
-      pool: 'PyTorch-Linux-CPU'
-      container_endpoint: pytorchms.azurecr.io
-      build_stage: True
-      is_ci_build: True
-      os: ubuntu
-      cuda: cpu
-      customMatrixes:
-        Py_38:
-          configuration: ubuntu_1804_py_38_cpu
-          container_image: pytorchms.azurecr.io/ubuntu_1804_py_38_cpu_dev
-
-  - template: job_templates/build-verify-publish-template-unix.yml
-    parameters:
-      name: ubuntu_1804_GPU_docker
-      pool: 'PyTorch-Linux-GPU'
-      container_endpoint: pytorchms.azurecr.io
-      build_stage: True
-      is_ci_build: True
-      os: ubuntu
-      cuda: gpu
-      customMatrixes:
-        Py_39_CUDA_112_cuDNN_810:
-          configuration: ubuntu_1804_py_39_cuda_112_cudnn_810
-          container_image: pytorchms.azurecr.io/ubuntu_1804_py_39_cuda_112_cudnn_8_dev
-          CUDA_VERSION: 112
-
-  - template: job_templates/build-verify-publish-template-win.yml
-    parameters:
-      name: windows_2019_CPU
-      pool: 'PyTorch-Win-CPU'
-      build_stage: True
-      is_ci_build: True
-      os: windows
-      cuda: cpu
-      customMatrixes:
-        Py_37:
-          configuration: windows_2019_py_37_cpu
-
-  - template: job_templates/build-verify-publish-template-win.yml
-    parameters:
-      name: windows_2019_GPU
-      pool: 'PyTorch-Win-GPU'
-      build_stage: True
-      is_ci_build: True
-      os: windows
-      cuda: gpu
-      customMatrixes:
-        Py_38_CUDA_102_cuDNN_765:
-          configuration: windows_2019_py_38_cuda_102_cudnn_765
-          CUDA_VERSION: 102
--- a/.azure_pipelines/daily-pipeline.yml
+++ b/.azure_pipelines/daily-pipeline.yml
@ -1,82 +0,0 @@
-# PyTorch Daily Builds Pipeline on Azure DevOps
-#
-# This pipeline:
-#   1) builds PyTorch on all available configurations
-#   2) runs all PyTorch unit tests
-
-stages:
- stage: 'BuildTest'
-  displayName: 'Build and Test PyTorch'
-  jobs:
-  - template: job_templates/build-verify-publish-template-unix.yml
-    parameters:
-      name: ubuntu_1804_CPU_docker
-      pool: 'PyTorch-Linux-CPU'
-      container_endpoint: pytorchms.azurecr.io
-      build_stage: True
-      is_daily_build: True
-      os: ubuntu
-      cuda: cpu
-      customMatrixes:
-        Py_38:
-          configuration: ubuntu_1804_py_38_cpu
-          container_image: pytorchms.azurecr.io/ubuntu_1804_py_38_cpu_dev
-        Py_37:
-          configuration: ubuntu_1804_py_37_cpu
-          container_image: pytorchms.azurecr.io/ubuntu_1804_py_37_cpu_dev
-
-  - template: job_templates/build-verify-publish-template-unix.yml
-    parameters:
-      name: ubuntu_1804_GPU_docker
-      pool: 'PyTorch-Linux-GPU'
-      container_endpoint: pytorchms.azurecr.io
-      build_stage: True
-      is_daily_build: True
-      os: ubuntu
-      cuda: gpu
-      customMatrixes:
-        Py_39_CUDA_112_cuDNN_810:
-          configuration: ubuntu_1804_py_39_cuda_112_cudnn_810
-          container_image: pytorchms.azurecr.io/ubuntu_1804_py_39_cuda_112_cudnn_8_dev
-          CUDA_VERSION: 112
-        Py_38_CUDA_102_cuDNN_810:
-          configuration: ubuntu_1804_py_38_cuda_102_cudnn_810
-          container_image: pytorchms.azurecr.io/ubuntu_1804_py_38_cuda_102_cudnn_8_dev
-          CUDA_VERSION: 102
-        Py_37_CUDA_101_cuDNN_765:
-          configuration: ubuntu_1804_py_37_cuda_101_cudnn_765
-          container_image: pytorchms.azurecr.io/ubuntu_1804_py_37_cuda_101_cudnn_7_dev
-          CUDA_VERSION: 101
-
-  - template: job_templates/build-verify-publish-template-win.yml
-    parameters:
-      name: windows_2019_CPU
-      pool: 'PyTorch-Win-CPU'
-      build_stage: True
-      is_daily_build: True
-      os: windows
-      cuda: cpu
-      customMatrixes:
-        Py_38:
-          configuration: windows_2019_py_38_cpu
-        Py_37:
-          configuration: windows_2019_py_37_cpu
-
-  - template: job_templates/build-verify-publish-template-win.yml
-    parameters:
-      name: windows_2019_GPU
-      pool: 'PyTorch-Win-GPU'
-      build_stage: True
-      is_daily_build: True
-      os: windows
-      cuda: gpu
-      customMatrixes:
-        Py_39_CUDA_112_cuDNN_810:
-          configuration: windows_2019_py_39_cuda_112_cudnn_810
-          CUDA_VERSION: 112
-        Py_38_CUDA_102_cuDNN_765:
-          configuration: windows_2019_py_38_cuda_102_cudnn_765
-          CUDA_VERSION: 102
-        Py_37_CUDA_101_cuDNN_764:
-          configuration: windows_2019_py_37_cuda_101_cudnn_764
-          CUDA_VERSION: 101
--- a/.azure_pipelines/job_templates/build-verify-publish-template-unix.yml
+++ b/.azure_pipelines/job_templates/build-verify-publish-template-unix.yml
@ -1,134 +0,0 @@
-# PyTorch build steps template with Unix images Azure DevOps Instances
-#
-# This build depends on 3 parameters set as environment variables in the pipeline:
-#   - AZURE_DEVOPS_CLI_PAT: Secret var for authenticating to Azure DevOps
-#   - AZURE_DEVOPS_ARTIFACTS_ORGANIZATION: Azure Artifacts Organization name to publish artifacts
-#   - AZURE_DEVOPS_ARTIFACTS_PROJECT: Azure Artifacts Project name to publish artifacts
-
-parameters:
-  name: ''
-  pool: ''
-  container_endpoint: ''
-  os: ''
-  cuda: ''
-  is_ci_build: False
-  is_official_build: False
-  is_daily_build: False
-  build_stage: False
-  verify_stage: False
-  publish_stage: False
-  customMatrixes: ''
-
-jobs:
- job: ${{parameters.name}}
-  timeoutInMinutes: 300
-  strategy:
-    matrix:
-      ${{ insert }}: ${{parameters.customMatrixes}}
-  pool:
-    name: ${{ parameters.pool}}
-  variables:
-    DECODE_PERCENTS: false
-  container:
-    image: $[variables['container_image']]
-    endpoint: ${{parameters.container_endpoint}}
-
-  steps:
-  # Build stage
-  - ${{ if eq(parameters.build_stage, 'True') }}:
-    # Set up environment variables for specific pipeline build
-    - template: set-environment-variables.yml
-      parameters:
-        os: ${{ parameters.os}}
-        cuda: ${{ parameters.cuda}}
-        is_official_build: ${{ parameters.is_official_build}}
-
-    # Sync and update PyTorch submodules
-    - bash: git submodule update --init --recursive --jobs 0
-      displayName: Update PyTorch submodules
-
-    # Build PyTorch and run unit tests - no packaging
-    - ${{ if or(eq(parameters.is_ci_build, 'True'), eq(parameters.is_daily_build, 'True')) }}:
-      # Build PyTorch from source in develop mode
-      - bash: python setup.py develop
-        displayName: Build PyTorch from source
-
-      - ${{ if eq(parameters.is_ci_build, 'True') }}:
-        # Run TestTorch unit tests to demonstrate successful PyTorch build
-        - bash: python test/test_torch.py TestTorch
-          displayName: Run TestTorch unit tests
-
-      - ${{ if eq(parameters.is_daily_build, 'True') }}:
-        # Run all unit tests to demonstrate successful PyTorch build
-        - bash: python test/run_test.py --continue-through-error --exclude-jit-executor --verbose
-          displayName: Run all unit tests
-
-      # Run ComponentGovernance
-      - task: ComponentGovernanceComponentDetection@0
-        inputs:
-          scanType: 'Register'
-          verbosity: 'Verbose'
-          alertWarningLevel: 'High'
-
-    # Build PyTorch and produce artifacts for verification stage
-    - ${{ if eq(parameters.is_official_build, 'True') }}:
-      # Build PyTorch from source in install mode and exclude test binaries
-      - bash: python setup.py install
-        displayName: Build PyTorch from source without test binaries
-
-      # Package PyTorch Wheel
-      - bash: python setup.py bdist_wheel
-        displayName: Package PyTorch Wheel
-
-      # Publish PyTorch Wheel
-      - task: PublishPipelineArtifact@1
-        inputs:
-          targetPath: $(Build.SourcesDirectory)/dist/
-          artifactName: Build_$(Build.BuildNumber)_$(configuration)
-        displayName: Publish PyTorch Wheel to Pipeline Artifacts
-
-  # Verification stage
-  - ${{ if eq(parameters.verify_stage, 'True') }}:
-    # Download PyTorch Wheel
-    - task: DownloadPipelineArtifact@2
-      inputs:
-        artifact: Build_$(Build.BuildNumber)_$(configuration)
-        path: $(Build.SourcesDirectory)/verify
-      displayName: Download PyTorch Wheel
-
-    # Install PyTorch Wheel on Windows
-    - bash: python -m pip install $(Build.SourcesDirectory)/verify/torch*linux*.whl
-      displayName: Install PyTorch Wheel
-
-    # Ensure PyTorch installed correctly from produced wheel
-    - bash: |
-        cd $(Build.SourcesDirectory)/verify
-        python -c "import torch; print('Installed Torch version: ' + torch.__version__)"
-      displayName: Check PyTorch correctly installed from wheel
-
-  # Publishing stage
-  - ${{ if eq(parameters.publish_stage, 'True') }}:
-    # Download PyTorch Wheel
-    - task: DownloadPipelineArtifact@2
-      inputs:
-        artifact: Build_$(Build.BuildNumber)_$(configuration)
-        path: $(Build.SourcesDirectory)/publish
-      displayName: Download PyTorch Wheel
-
-    # Publish wheel to Azure Artifacts
-    # The flag continueOnError=true is needed as the artifact to be published
-    # may already exist, because the artifact is differentiated based on the
-    # last commit date.
-    - bash: |
-        export TORCH_VERSION=$(head -c 5 ./version.txt)
-        export LAST_COMMIT=$(git rev-parse --short HEAD)
-        export LAST_COMMIT_DATE=$(git log -1 --pretty=%ad --date=format:%Y%m%d)
-        cd $(Build.SourcesDirectory)/publish
-        export TORCH_WHEEL=$(echo torch*linux*whl)
-        az extension add -n azure-devops
-        echo $ADOTOKEN | az devops login
-        az artifacts universal publish --organization $AZURE_DEVOPS_ARTIFACTS_ORGANIZATION --project $AZURE_DEVOPS_ARTIFACTS_PROJECT --scope project --feed "PyTorch" --name $TORCH_WHEEL --description "PyTorch Official Build Artifact" --version $TORCH_VERSION-$LAST_COMMIT_DATE-$LAST_COMMIT --path .
-      env:
-        ADOTOKEN: $(AZURE_DEVOPS_CLI_PAT)
-      continueOnError: true
-      displayName: Upload PyTorch Official Build package to Azure Artifacts
--- a/.azure_pipelines/job_templates/build-verify-publish-template-win.yml
+++ b/.azure_pipelines/job_templates/build-verify-publish-template-win.yml
@ -1,150 +0,0 @@
-# PyTorch build steps template with Windows images Azure DevOps Instances
-#
-# This build depends on 3 parameters set as environment variables in the pipeline:
-#   - AZURE_DEVOPS_CLI_PAT: Secret var for authenticating to Azure DevOps
-#   - AZURE_DEVOPS_ARTIFACTS_ORGANIZATION: Azure Artifacts Organization name to publish artifacts
-#   - AZURE_DEVOPS_ARTIFACTS_PROJECT: Azure Artifacts Project name to publish artifacts
-
-parameters:
-  name: ''
-  pool: ''
-  os: ''
-  cuda: ''
-  is_ci_build: False
-  is_official_build: False
-  is_daily_build: False
-  build_stage: False
-  verify_stage: False
-  publish_stage: False
-  customMatrixes: ''
-
-jobs:
- job: ${{parameters.name}}
-  timeoutInMinutes: 300
-  strategy:
-    matrix:
-      ${{ insert }}: ${{parameters.customMatrixes}}
-  pool:
-    name: ${{ parameters.pool}}
-  variables:
-    CMAKE_GENERATOR: Ninja
-    PACKAGE_PDBS: 0
-
-  steps:
-  # Prepare for PyTorch build on Windows
-  - template: prepare-build-template.yml
-    parameters:
-      configuration: $(configuration)
-      build_stage: ${{ parameters.build_stage}}
-
-  # Build Stage
-  - ${{ if eq(parameters.build_stage, 'True') }}:
-    # Set up environment variables for specific pipeline build
-    - template: set-environment-variables.yml
-      parameters:
-        os: ${{ parameters.os}}
-        cuda: ${{ parameters.cuda}}
-        is_official_build: ${{ parameters.is_official_build}}
-
-    # Sync and update PyTorch submodules
-    - script: git submodule update --init --recursive --jobs 0
-      displayName: Update PyTorch submodules
-
-    # Build PyTorch and run unit tests - no packaging
-    - ${{ if or(eq(parameters.is_ci_build, 'True'), eq(parameters.is_daily_build, 'True')) }}:
-      # Build PyTorch from source in develop mode with Ninja
-      - script: call activate $(configuration) && python setup.py develop
-        displayName: Build PyTorch from source
-
-      - ${{ if eq(parameters.is_ci_build, 'True') }}:
-        # Run TestTorch unit tests to demonstrate successful PyTorch build
-        - script: call activate $(configuration) && python test\test_torch.py TestTorch
-          displayName: Run TestTorch unit tests
-
-      - ${{ if eq(parameters.is_daily_build, 'True') }}:
-        # Run all unit tests to demonstrate successful PyTorch build
-        - script: call activate $(configuration) && python test/run_test.py --continue-through-error --exclude-jit-executor --verbose
-          displayName: Run all unit tests
-
-      # Run ComponentGovernance
-      - task: ComponentGovernanceComponentDetection@0
-        inputs:
-          scanType: 'Register'
-          verbosity: 'Verbose'
-          alertWarningLevel: 'High'
-
-    # Build PyTorch and produce artifacts for verification stage
-    - ${{ if eq(parameters.is_official_build, 'True') }}:
-      # Build PyTorch from source in install mode with Ninja and exclude test binaries
-      - script: call activate $(configuration) && python setup.py install
-        displayName: Build PyTorch from source without test binaries
-
-      # Package PyTorch Wheel
-      - script: call activate $(configuration) && python setup.py bdist_wheel
-        displayName: Package PyTorch Wheel
-
-      # Publish PyTorch Wheel
-      - task: PublishPipelineArtifact@1
-        inputs:
-          targetPath: $(Build.SourcesDirectory)\dist\
-          artifactName: Build_$(Build.BuildNumber)_$(configuration)
-        displayName: Publish PyTorch Wheel to Pipeline Artifacts
-
-  # Verification Stage
-  - ${{ if eq(parameters.verify_stage, 'True') }}:
-    # Download PyTorch Wheel
-    - task: DownloadPipelineArtifact@2
-      inputs:
-        artifact: Build_$(Build.BuildNumber)_$(configuration)
-        path: $(Build.SourcesDirectory)\verify
-      displayName: Download PyTorch Wheel
-
-    # Install PyTorch Wheel on Windows
-    - script: |
-        call activate $(configuration)
-        cd $(Build.SourcesDirectory)\verify
-        dir torch*win*.whl /b > whl.txt
-        set /p whl= < whl.txt
-        python -m pip install %whl%
-      displayName: Install PyTorch Wheel
-
-    # Ensure PyTorch installed correctly from produced wheel
-    - script: |
-        call activate $(configuration)
-        cd $(Build.SourcesDirectory)\verify
-        python -c "import torch; print('Installed Torch version: ' + torch.__version__)"
-      displayName: Check PyTorch correctly installed from wheel
-
-  # Publishing stage
-  - ${{ if eq(parameters.publish_stage, 'True') }}:
-    # Download PyTorch Wheel
-    - task: DownloadPipelineArtifact@2
-      inputs:
-        artifact: Build_$(Build.BuildNumber)_$(configuration)
-        path: $(Build.SourcesDirectory)\publish
-      displayName: Download PyTorch Wheel
-
-    # Set up Azure Artifacts for Windows
-    # The pip install --upgrade command is a bug fix for Azure CLI on Windows
-    # More info: https://github.com/Azure/azure-cli/issues/16858
-    - script: |
-        pip install --upgrade pip --target \opt\az\lib\python3.6\site-packages\
-        az extension add -n azure-devops
-      displayName: Set up Azure Artifacts download on Windows
-
-    # Publish wheel to Azure Artifacts
-    # The flag continueOnError=true is needed as the artifact to be published
-    # may already exist, because the artifact is differentiated based on the
-    # last commit date.
-    - script: |
-        set /p TORCH_VERSION= < version.txt
-        cd $(Build.SourcesDirectory)\publish
-        git rev-parse --short HEAD > last_commit.txt && set /p LAST_COMMIT= < last_commit.txt
-        git log -1 --pretty=%ad --date=format:%Y%m%d > last_commit_date.txt && set /p LAST_COMMIT_DATE= < last_commit_date.txt
-        dir torch*win*.whl /b > whl.txt && set /p TORCH_WHEEL= < whl.txt
-        echo %ADOTOKEN% | az devops login
-        az artifacts universal publish --organization %AZURE_DEVOPS_ARTIFACTS_ORGANIZATION% --project %AZURE_DEVOPS_ARTIFACTS_PROJECT% --scope project --feed "PyTorch" --name %TORCH_WHEEL% --description "PyTorch Official Build Artifact" --version %TORCH_VERSION:~0,5%-%LAST_COMMIT_DATE%-%LAST_COMMIT% --path .
-      env:
-        ADOTOKEN: $(AZURE_DEVOPS_CLI_PAT)
-      continueOnError: true
-      displayName: Upload PyTorch nigthly package to Azure Artifacts
--- a/.azure_pipelines/job_templates/common-packages.yml
+++ b/.azure_pipelines/job_templates/common-packages.yml
@ -1,17 +0,0 @@
-dependencies:
-  - python=PYTHON_VERSION
-  - numpy
-  - ninja
-  - pyyaml
-  - mkl
-  - mkl-include
-  - setuptools
-  - cmake
-  - cffi
-  - typing_extensions
-  - future
-  - six
-  - requests
-  - dataclasses
-  - pip:
-    - -r ../../requirements.txt
--- a/.azure_pipelines/job_templates/notify-webapp-template.yml
+++ b/.azure_pipelines/job_templates/notify-webapp-template.yml
@ -1,26 +0,0 @@
-parameters:
-  name: ''
-  pool: ''
-  customMatrixes: ''
-
-jobs:
- job: ${{parameters.name}}
-  timeoutInMinutes: 600
-  strategy:
-    matrix:
-      ${{ insert }}: ${{parameters.customMatrixes}}
-  pool:
-    name: ${{ parameters.pool}}
-  steps:
-  # Clone PyTorch Tests repository
-  - bash: |
-      B64_PAT=$(echo -n ":$_ADOTOKEN" | base64)
-      git -c http.extraHeader="Authorization: Basic ${B64_PAT}" clone $(AZURE_DEVOPS_PYTORCH_TESTS_REPO_URL)
-      cd pytorch_tests
-      git checkout $(PYTORCH_TESTS_CHECKOUT_BRANCH)
-    env:
-      _ADOTOKEN: $(AZURE_DEVOPS_CLI_PAT)
-    displayName: Clone PyTorch Tests repo
-  - bash: |
-      bash $(Build.SourcesDirectory)/pytorch_tests/webapp/notify_webapp.sh
-    displayName: Notify Webapp
--- a/.azure_pipelines/job_templates/prepare-build-template.yml
+++ b/.azure_pipelines/job_templates/prepare-build-template.yml
@ -1,62 +0,0 @@
-# Build prepare steps for PyTorch on Azure DevOps to build from source.
-# These steps share between normal build process and semmle security scan tasks
-
-parameters:
-  build_stage: False
-  configuration: ''
-
-steps:
-# End Python tasks that may be lingering over from previous runs
-# Note: If python.exe isn't currently running, exit code becomes 128,
-# which fails the run. Here exit code is set to 0 to avoid failed run.
- script: |
-    taskkill /f /im python.exe
-    IF %ERRORLEVEL% EQU 128 exit 0
-  displayName: End previous Python processes
-
-# Clean up env directory in conda for fresh builds and set up conda environment YAML
- powershell: |
-    Remove-Item 'C:\Miniconda\envs' -Recurse -ErrorAction Ignore
-    $env:PYTHON_VERSION = $env:SYSTEM_JOBNAME.Substring(3,1) + '.' + $env:SYSTEM_JOBNAME.Substring(4,1)
-    (Get-Content .azure_pipelines\job_templates\common-packages.yml) -replace 'PYTHON_VERSION', $env:PYTHON_VERSION | Out-File -encoding ASCII .azure_pipelines\job_templates\common-packages.yml
-  displayName: Clean up previous environments and Set up conda environment YAML
-
-# Make conda environment and install required packages
- script: |
-    call conda clean --all -y
-    call conda env create -n $(configuration) --file .azure_pipelines\job_templates\common-packages.yml
-    call activate $(configuration)
-    call conda install -c conda-forge libuv=1.39
-  displayName: Set up conda environment for building from source
-
- ${{ if eq(parameters.build_stage, 'True') }}:
-  # Install MKL
-  - script: |
-      rmdir /s /q mkl
-      del mkl_2020.2.254.7z
-      curl https://s3.amazonaws.com/ossci-windows/mkl_2020.2.254.7z -k -O
-      7z x -aoa mkl_2020.2.254.7z -omkl
-    displayName: Install MKL
-
-  # Install sccache and randomtemp
-  # Related PyTorch GitHub issue: https://github.com/pytorch/pytorch/issues/25393
-  # Related fix: https://github.com/pytorch/builder/pull/448/
-  - script: |
-      mkdir .\tmp_bin
-      curl -k https://s3.amazonaws.com/ossci-windows/sccache.exe --output .\tmp_bin\sccache.exe
-      curl -k https://s3.amazonaws.com/ossci-windows/sccache-cl.exe --output .\tmp_bin\sccache-cl.exe
-      copy .\tmp_bin\sccache.exe .\tmp_bin\nvcc.exe
-      curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.3/randomtemp.exe --output .\tmp_bin\randomtemp.exe
-    displayName: Install sccache and randomtemp
-    condition: not(eq(variables.CUDA_VERSION, ''))
-
-  # CUDA 11.2's CUB directory conflicts with CUDA 10.2 and 10.1
-  # builds, where CUDA 11.2's CUB is injected into non-CUDA
-  # 11.2 builds.
-  - powershell: Remove-Item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\include\cub" -Recurse -ErrorAction Ignore
-    displayName: Remove conflicting CUB from CUDA installation
-    condition: not(eq(variables.CUDA_VERSION, ''))
-
-  - powershell: Copy-Item -Path "F:\cuda_11_2\cub\" -Destination "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\include" -Recurse
-    displayName: Copy CUDA CUB for CUDA 11.2 build
-    condition: eq(variables.CUDA_VERSION, '112')
--- a/.azure_pipelines/job_templates/pytorch-template-unix.yml
+++ b/.azure_pipelines/job_templates/pytorch-template-unix.yml
@ -1,61 +0,0 @@
-# PyTorch build steps template with Unix images Azure DevOps Instances
-#
-# This build depends on 5 parameters set as an environment variables in the pipeline:
-#   - AZURE_DEVOPS_CLI_PAT: Secret var for authenticating to Azure DevOps
-#   - AZURE_STORAGE_KEY: Secret var for authenticating to Azure Storage
-#   - _TS_CLONE_P, _TS_P, _TS_SM_P: Secret vars for specific unit tests
-
-parameters:
-  name: ''
-  pool: ''
-  container_endpoint: ''
-  customMatrixes: ''
-
-jobs:
- job: ${{parameters.name}}
-  timeoutInMinutes: 600
-  strategy:
-    matrix:
-      ${{ insert }}: ${{parameters.customMatrixes}}
-  pool:
-    name: ${{ parameters.pool}}
-  variables:
-    DECODE_PERCENTS: false
-
-  steps:
-  # Don't checkout repo contents to save time and CPU compute. Environment variables
-  # related to checkout branch such as $(BUILD_SOURCEBRANCH) are still available.
-  - checkout: none
-
-  # Delete pytorch_tests repo from previous builds if exists
-  - bash: rm -rf pytorch_tests/
-    displayName: Delete pytorch_tests repo from previous builds if exists
-
-  # Clone PyTorch Tests repository
-  - bash: |
-      B64_PAT=$(echo -n ":$_ADOTOKEN" | base64)
-      git -c http.extraHeader="Authorization: Basic ${B64_PAT}" clone $(AZURE_DEVOPS_PYTORCH_TESTS_REPO_URL)
-      cd pytorch_tests
-      git checkout $(PYTORCH_TESTS_CHECKOUT_BRANCH)
-    env:
-      _ADOTOKEN: $(AZURE_DEVOPS_CLI_PAT)
-    displayName: Clone PyTorch Tests repo
-
-  # Run PyTorch Unit Tests
-  - bash: bash $(Build.SourcesDirectory)/pytorch_tests/scripts/linux/run.sh
-    env:
-      _AZURE_STORAGE_KEY: $(AZURE_STORAGE_KEY)
-      _TS_CLONE_P: $(TS_CLONE_PASSWORD)
-      _TS_P: $(TS_PAT)
-      _TS_SM_P: $(TS_SM_PAT)
-      _AZUREML_CLONE_PASSWORD: $(AZUREML_CLONE_PASSWORD)
-      _SPPASSWORD: $(SPPASSWORD)
-    displayName: Run PyTorch Unit Tests
-
-  # Tests results are available outside the docker container since
-  # the current directory is mounted as a volume of the container.
-  - task: PublishTestResults@2
-    condition: always()
-    inputs:
-      testResultsFiles: '**/test-*.xml'
-      testRunTitle: 'Publish test results for Python'
--- a/.azure_pipelines/job_templates/pytorch-template-win.yml
+++ b/.azure_pipelines/job_templates/pytorch-template-win.yml
@ -1,57 +0,0 @@
-# PyTorch build steps template with Windows images Azure DevOps Instances
-#
-# This build depends on 5 parameters set as an environment variables in the pipeline:
-#   - AZURE_DEVOPS_CLI_PAT: Secret var for authenticating to Azure DevOps
-#   - AZURE_STORAGE_KEY: Secret var for authenticating to Azure Storage
-#   - _TS_CLONE_P, _TS_P, _TS_SM_P: Secret vars for specific unit tests
-
-parameters:
-  name: ''
-  pool: ''
-  customMatrixes: ''
-
-jobs:
- job: ${{parameters.name}}
-  timeoutInMinutes: 600
-  strategy:
-    matrix:
-      ${{ insert }}: ${{parameters.customMatrixes}}
-  pool:
-    name: ${{ parameters.pool}}
-
-  steps:
-  # Don't checkout repo contents to save time and CPU compute. Environment variables
-  # related to checkout branch such as $(BUILD_SOURCEBRANCH) are still available.
-  - checkout: none
-
-  # Delete pytorch_tests repo from previous builds if exists
-  - script: if exist "pytorch_tests/" rmdir "pytorch_tests/" /q /s
-    displayName: Delete pytorch_tests repo from previous builds if exists
-
-  # Clone PyTorch Tests repository
-  - powershell: |
-      $env:B64Pat = [Convert]::ToBase64String([System.Text.Encoding]::UTF8.GetBytes(":$env:_ADOTOKEN"))
-      git -c http.extraHeader="Authorization: Basic $env:B64Pat" clone $env:AZURE_DEVOPS_pytorch_tests_REPO_URL
-      cd pytorch_tests
-      git checkout $(PYTORCH_TESTS_CHECKOUT_BRANCH)
-    env:
-      _ADOTOKEN: $(AZURE_DEVOPS_CLI_PAT)
-    displayName: Clone PyTorch Tests repo
-
-  # Run PyTorch Unit Tests
-  - script: call $(Build.SourcesDirectory)\pytorch_tests\scripts\windows\run.bat
-    env:
-      _ADOTOKEN: $(AZURE_DEVOPS_CLI_PAT)
-      _AZURE_STORAGE_KEY: $(AZURE_STORAGE_KEY)
-      _TS_CLONE_P: $(TS_CLONE_PASSWORD)
-      _TS_P: $(TS_PAT)
-      _TS_SM_P: $(TS_SM_PAT)
-    displayName: Run PyTorch Unit Tests
-
-  # Tests results are available outside the docker container since
-  # the current directory is mounted as a volume of the container.
-  - task: PublishTestResults@2
-    condition: always()
-    inputs:
-      testResultsFiles: '**\test-*.xml'
-      testRunTitle: 'Publish test results for Python'
--- a/.azure_pipelines/job_templates/set-environment-variables.yml
+++ b/.azure_pipelines/job_templates/set-environment-variables.yml
@ -1,131 +0,0 @@
-# Set environment variables for specific configurations
-
-parameters:
-  is_official_build: False
-  os: ''
-  cuda: ''
-
-steps:
-  # Environment configuration steps for Ubuntu builds
-  - ${{ if contains(parameters.os, 'ubuntu') }}:
-    # Set configuration specific build flags
-    - ${{ if eq(parameters.is_official_build, True) }}:
-      - bash: |
-          echo "##vso[task.setvariable variable=INSTALL_TEST;]0"
-          echo "##vso[task.setvariable variable=PYTORCH_BUILD_NUMBER;]1"
-          export PYTORCH_VERSION=$(head -c 5 ./version.txt)
-          echo "##vso[task.setvariable variable=PYTORCH_BUILD_VERSION;]$PYTORCH_VERSION.dev"
-        displayName: Set configuration-specific build flags
-
-      # Set PyTorch CPU/GPU build flags.
-      - ${{ if contains(parameters.cuda, 'cpu') }}:
-        - bash: |
-            echo "##vso[task.setvariable variable=USE_CUDA;]0"
-            echo "##vso[task.setvariable variable=PYTORCH_BUILD_VERSION;]$(PYTORCH_BUILD_VERSION).cpu"
-          displayName: Set CUDA-specific build flag for CPU builds
-
-      - ${{ if contains(parameters.cuda, 'gpu') }}:
-        - bash: |
-            echo "##vso[task.setvariable variable=USE_CUDA;]1"
-            echo "##vso[task.setvariable variable=PYTORCH_BUILD_VERSION;]$(PYTORCH_BUILD_VERSION).cu$(CUDA_VERSION)"
-          displayName: Set CUDA-specific build flag for GPU builds
-
-    # Set MKL environment variables
-    - bash: |
-        echo "##vso[task.setvariable variable=CMAKE_LIBRARY_PATH;]/opt/intel/lib:$CMAKE_LIBRARY_PATH"
-        echo "##vso[task.setvariable variable=CMAKE_INCLUDE_PATH;]/opt/intel/include:$CMAKE_INCLUDE_PATH"
-      displayName: Set MKL paths
-
-    # View current environment variables
-    - bash:
-        printenv
-      displayName: Show environment variables
-
-  # Environment configuration steps for Windows builds
-  - ${{ if contains(parameters.os, 'windows') }}:
-    # Set Conda Lib Path
-    - powershell: Write-Host "##vso[task.setvariable variable=CONDA_LIB_PATH;]C:\Miniconda\envs\$(configuration)\Library\bin"
-      displayName: Set Conda Lib Path
-
-    # Set configuration specific build flags
-    - ${{ if eq(parameters.is_official_build, True) }}:
-      - powershell: |
-          Write-Host "##vso[task.setvariable variable=INSTALL_TEST;]0"
-          Write-Host "##vso[task.setvariable variable=PYTORCH_BUILD_NUMBER;]1"
-          Set-Variable -Name PYTORCH_VERSION -Value (Get-Content .\version.txt).Substring(0,5)
-          Write-Host "##vso[task.setvariable variable=PYTORCH_BUILD_VERSION;]$PYTORCH_VERSION.dev"
-        displayName: Set configuration-specific build flags
-
-      # Set PyTorch CPU/GPU build flags..
-      - ${{ if contains(parameters.cuda, 'cpu') }}:
-        - powershell: |
-            Write-Host "##vso[task.setvariable variable=USE_CUDA;]0"
-            Write-Host "##vso[task.setvariable variable=PYTORCH_BUILD_VERSION;]$(PYTORCH_BUILD_VERSION).cpu"
-          displayName: Set CUDA-specific build flag for CPU build
-
-      - ${{ if contains(parameters.cuda, 'gpu') }}:
-        - powershell: |
-            Write-Host "##vso[task.setvariable variable=USE_CUDA;]1"
-            Write-Host "##vso[task.setvariable variable=PYTORCH_BUILD_VERSION;]$(PYTORCH_BUILD_VERSION).cu$(CUDA_VERSION)"
-          displayName: Set CUDA-specific build flag for GPU build
-
-    # Set CUDA 11.2, 10.2 or 10.1 specific build flags
-    - ${{ if eq(parameters.cuda, 'gpu') }}:
-      - powershell: |
-          Write-Host "##vso[task.setvariable variable=TORCH_CUDA_ARCH_LIST;]3.7+PTX;5.0;6.0;6.1;7.0;7.5;8.0;8.6"
-          Write-Host "##vso[task.setvariable variable=CUDA_PATH;]C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\"
-        displayName: Set CUDA 11.2 specific build flags
-        condition: eq(variables.CUDA_VERSION, '112')
-
-      - powershell: |
-          Write-Host "##vso[task.setvariable variable=TORCH_CUDA_ARCH_LIST;]3.7+PTX;5.0;6.0;6.1;7.0;7.5"
-          Write-Host "##vso[task.setvariable variable=CUDA_PATH;]C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\"
-        displayName: Set CUDA 10.2 specific build flags
-        condition: eq(variables.CUDA_VERSION, '102')
-
-      - powershell: |
-          Write-Host "##vso[task.setvariable variable=TORCH_CUDA_ARCH_LIST;]3.7+PTX;5.0;6.0;6.1;7.0;7.5"
-          Write-Host "##vso[task.setvariable variable=CUDA_PATH;]C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\"
-        displayName: Set CUDA 10.1 specific build flags
-        condition: eq(variables.CUDA_VERSION, '101')
-
-      - powershell: |
-          Write-Host "##vso[task.setvariable variable=CUDA_BIN_PATH;]$env:CUDA_PATH\bin\"
-          Write-Host "##vso[task.setvariable variable=CUDNN_ROOT;]$env:CUDA_PATH"
-          Write-Host "##vso[task.setvariable variable=CUDNN_INCLUDE_DIR;]$env:CUDA_PATH\include\"
-          Write-Host "##vso[task.setvariable variable=CUDNN_LIBRARY;]$env:CUDA_PATH\lib\x64\"
-          Write-Host "##vso[task.prependpath]$env:CUDA_PATH\bin"
-          Write-Host "##vso[task.setvariable variable=TORCH_NVCC_FLAGS;]-Xfatbin -compress-all --no-host-device-move-forward"
-          Write-Host "##vso[task.setvariable variable=THRUST_IGNORE_CUB_VERSION_CHECK;]1"
-          Write-Host "##vso[task.setvariable variable=NVTOOLSEXT_PATH;]C:\Program Files\NVIDIA Corporation\NvToolsExt\"
-        displayName: Set CUDA environment variables
-
-      - powershell: |
-          copy "$(CUDA_BIN_PATH)\cusparse*64_*.dll*" $(Build.SourcesDirectory)\torch\lib
-          copy "$(CUDA_BIN_PATH)\cublas*64_*.dll*" $(Build.SourcesDirectory)\torch\lib
-          copy "$(CUDA_BIN_PATH)\cudart*64_*.dll*" $(Build.SourcesDirectory)\torch\lib
-          copy "$(CUDA_BIN_PATH)\curand*64_*.dll*" $(Build.SourcesDirectory)\torch\lib
-          copy "$(CUDA_BIN_PATH)\cufft*64_*.dll*" $(Build.SourcesDirectory)\torch\lib
-          copy "$(CUDA_BIN_PATH)\cusolver*64_*.dll*" $(Build.SourcesDirectory)\torch\lib
-          copy "$(CUDA_BIN_PATH)\cudnn*64_*.dll*" $(Build.SourcesDirectory)\torch\lib
-          copy "$(CUDA_BIN_PATH)\nvrtc*64_*.dll*" $(Build.SourcesDirectory)\torch\lib
-          copy "C:\Program Files\NVIDIA Corporation\NvToolsExt\bin\x64\nvToolsExt64_1.dll*" $(Build.SourcesDirectory)\torch\lib
-          copy "$(CONDA_LIB_PATH)\libiomp*5md.dll" $(Build.SourcesDirectory)\torch\lib
-          copy "$(CONDA_LIB_PATH)\uv.dll" $(Build.SourcesDirectory)\torch\lib
-        displayName: Copy CUDA/cuDNN/libomp/libuv dlls to torch\lib
-
-    # Set MKL, sccache and randomtemp environment variables
-    - powershell: |
-        Write-Host "##vso[task.setvariable variable=CMAKE_INCLUDE_PATH;]$(Build.SourcesDirectory)\mkl\include"
-        Write-Host "##vso[task.setvariable variable=CMAKE_LIBRARY_PATH;]$(Build.SourcesDirectory)\mkl\lib;$env:CMAKE_LIBRARY_PATH"
-        Write-Host "##vso[task.setvariable variable=ADDITIONAL_PATH;]$(Build.SourcesDirectory)\tmp_bin"
-        Write-Host "##vso[task.setvariable variable=SCCACHE_IDLE_TIMEOUT;]1500"
-        Write-Host "##vso[task.setvariable variable=RANDOMTEMP_EXECUTABLE;]$(Build.SourcesDirectory)\tmp_bin\nvcc.exe"
-        Write-Host "##vso[task.setvariable variable=CUDA_NVCC_EXECUTABLE;]$(Build.SourcesDirectory)\tmp_bin\randomtemp.exe"
-        Write-Host "##vso[task.setvariable variable=RANDOMTEMP_BASEDIR;]$(Build.SourcesDirectory)\tmp_bin"
-      displayName: Set MKL, sccache and randomtemp environment variables
-
-    # View current environment variables
-    - script:
-        set
-      displayName: Show environment variables
--- a/.azure_pipelines/job_templates/wheel-wait-job-template.yml
+++ b/.azure_pipelines/job_templates/wheel-wait-job-template.yml
@ -1,14 +0,0 @@
-# Main logic to initiate wait for PR artifact to be ready
-
-steps:
- task: InvokeRESTAPI@1
-  displayName: 'Wait for job success and wheel ready'
-  timeoutInMinutes: 60
-  inputs:
-    connectionType: 'connectedServiceName'
-    serviceConnection: circleciconn
-    method: 'POST'
-    headers: '{"Content-Type":"application/json", "BranchName":"$(_TARGET_BRANCH_TO_CHECK)", "JobName":"$(TARGET_CIRCLECI_BUILD_PR)", "PRNumber":"$(_TARGET_PR_NUMBER)", "TargetCommit":"$(_TARGET_COMMIT)", "PlanUrl":"$(System.CollectionUri)", "ProjectId":"$(System.TeamProjectId)", "HubName":"$(System.HostType)", "PlanId":"$(System.PlanId)", "JobId":"$(System.JobId)", "TimelineId":"$(System.TimelineId)", "TaskInstanceId":"$(System.TaskInstanceId)", "AuthToken":"$(System.AccessToken)"}'
-    body: ''
-    urlSuffix: 'api/JobStatus'
-    waitForCompletion: true
--- a/.azure_pipelines/job_templates/wheel-wait-template.yml
+++ b/.azure_pipelines/job_templates/wheel-wait-template.yml
@ -1,92 +0,0 @@
-# Initiate 5 agentless-server waiting jobs to check on the
-# status of PR artifact builds, for a maximum wait time of
-# 11*60 min=660 mins. These jobs will pass immediately
-# once targeted CircleCI build is ready.
-
-jobs:
- job: checkjob1
-  pool: server
-  timeoutInMinutes: 60
-  continueOnError: true
-  steps:
-  - template: wheel-wait-job-template.yml
-
- job: checkjob2
-  pool: server
-  timeoutInMinutes: 60
-  dependsOn: checkjob1
-  continueOnError: true
-  steps:
-  - template: wheel-wait-job-template.yml
-
- job: checkjob3
-  pool: server
-  timeoutInMinutes: 60
-  dependsOn: checkjob2
-  continueOnError: true
-  steps:
-  - template: wheel-wait-job-template.yml
-
- job: checkjob4
-  pool: server
-  timeoutInMinutes: 60
-  dependsOn: checkjob3
-  continueOnError: true
-  steps:
-  - template: wheel-wait-job-template.yml
-
- job: checkjob5
-  pool: server
-  timeoutInMinutes: 60
-  dependsOn: checkjob4
-  continueOnError: true
-  steps:
-  - template: wheel-wait-job-template.yml
-
- job: checkjob6
-  pool: server
-  timeoutInMinutes: 60
-  dependsOn: checkjob5
-  continueOnError: true
-  steps:
-  - template: wheel-wait-job-template.yml
-
- job: checkjob7
-  pool: server
-  timeoutInMinutes: 60
-  dependsOn: checkjob6
-  continueOnError: true
-  steps:
-  - template: wheel-wait-job-template.yml
-
- job: checkjob8
-  pool: server
-  timeoutInMinutes: 60
-  dependsOn: checkjob7
-  continueOnError: true
-  steps:
-  - template: wheel-wait-job-template.yml
-
- job: checkjob9
-  pool: server
-  timeoutInMinutes: 60
-  dependsOn: checkjob8
-  continueOnError: true
-  steps:
-  - template: wheel-wait-job-template.yml
-
- job: checkjob10
-  pool: server
-  timeoutInMinutes: 60
-  dependsOn: checkjob9
-  continueOnError: true
-  steps:
-  - template: wheel-wait-job-template.yml
-
- job: checkjob11
-  pool: server
-  timeoutInMinutes: 60
-  dependsOn: checkjob10
-  continueOnError: true
-  steps:
-  - template: wheel-wait-job-template.yml
--- a/.azure_pipelines/nightly-pytorch-tests-pipeline.yml
+++ b/.azure_pipelines/nightly-pytorch-tests-pipeline.yml
@ -1,60 +0,0 @@
-# PyTorch Nightly PyTorch Tests Builds Pipeline on Azure DevOps
-#
-# This pipeline runs custom PyTorch unit-tests on nightly
-# PyTorch wheels.
-
-stages:
- stage: 'NightlyCustomTests'
-  displayName: 'Run custom unit tests on PyTorch wheels'
-  jobs:
-  - template: job_templates/pytorch-template-unix.yml
-    parameters:
-      name: ubuntu_1804_CPU_docker
-      pool: $(BUILD_POOL_LIN_1)
-      customMatrixes:
-        Nightly_Custom_Tests:
-          _DOCKER_IMAGE: $(DOCKER_IMAGE_LIN_1)
-          _PYTHON_VERSION: $(PYTHON_VERSION_LIN_1)
-          _CUDA_BUILD_VERSION: $(CUDA_BUILD_VERSION_LIN_1)
-          _RUN_TESTS: $(RUN_TESTS_LIN)
-
-  - template: job_templates/pytorch-template-unix.yml
-    parameters:
-      name: ubuntu_1804_GPU_docker
-      pool: $(BUILD_POOL_LIN_2)
-      customMatrixes:
-        Nightly_Custom_Tests:
-          _DOCKER_IMAGE: $(DOCKER_IMAGE_LIN_2)
-          _PYTHON_VERSION: $(PYTHON_VERSION_LIN_2)
-          _CUDA_BUILD_VERSION: $(CUDA_BUILD_VERSION_LIN_2)
-          _RUN_TESTS: $(RUN_TESTS_LIN)
-
-  - template: job_templates/pytorch-template-win.yml
-    parameters:
-      name: windows_2019_CPU
-      pool: $(BUILD_POOL_WIN_1)
-      customMatrixes:
-        Nightly_Custom_Tests:
-          _PYTHON_VERSION: $(PYTHON_VERSION_WIN_1)
-          _CUDA_BUILD_VERSION: $(CUDA_BUILD_VERSION_WIN_1)
-          _RUN_TESTS: $(RUN_TESTS_WIN)
-
-  - template: job_templates/pytorch-template-win.yml
-    parameters:
-      name: windows_2019_GPU
-      pool: $(BUILD_POOL_WIN_2)
-      customMatrixes:
-        Nightly_Custom_Tests:
-          _PYTHON_VERSION: $(PYTHON_VERSION_WIN_2)
-          _CUDA_BUILD_VERSION: $(CUDA_BUILD_VERSION_WIN_2)
-          _RUN_TESTS: $(RUN_TESTS_WIN)
-
- stage: 'NotifyWebapp'
-  displayName: 'Notify Webapp that pipeline is finished'
-  dependsOn: NightlyCustomTests
-  condition: succeededOrFailed()
-  jobs:
-  - template: job_templates/notify-webapp-template.yml
-    parameters:
-      name: ubuntu_1804_CPU
-      pool: $(BUILD_POOL_LIN_1)
--- a/.azure_pipelines/pytorch-tests-pipeline.yml
+++ b/.azure_pipelines/pytorch-tests-pipeline.yml
@ -1,62 +0,0 @@
-# PyTorch PR PyTorch Tests Builds Pipeline on Azure DevOps
-#
-# This pipeline:
-#   1) ensures that CircleCI builds for a given PR
-#      have finished, and that its artifacts are
-#      ready for download
-#   2) runs custom PyTorch unit-tests on PyTorch
-#      wheels generated during PR builds.
-
-resources:
-  webhooks:
-    - webhook: GitHubPyTorchPRTrigger
-      connection: GitHubPyTorchPRTriggerConnection
-      filters:
-        - path: repositoryName
-          value: pytorch_tests
-
-stages:
- stage: 'EnsureArtifactsReady'
-  displayName: 'Ensure PyTorch PR Artifacts are ready'
-  jobs:
-  - template: job_templates/wheel-wait-template.yml
-  variables:
-    _TARGET_BRANCH_TO_CHECK: ${{parameters.GitHubPyTorchPRTrigger.TARGET_BRANCH_TO_CHECK_AZ_DEVOPS_PR}}
-    _TARGET_PR_NUMBER: ${{parameters.GitHubPyTorchPRTrigger.PR_NUMBER}}
-    _TARGET_COMMIT: ${{parameters.GitHubPyTorchPRTrigger.TARGET_COMMIT}}
-
- stage: 'PRCustomTests'
-  displayName: 'Run custom unit tests on PyTorch wheels'
-  dependsOn: EnsureArtifactsReady
-  condition: succeeded()
-  jobs:
-  - template: job_templates/pytorch-template-unix.yml
-    parameters:
-      name: ubuntu_1804_GPU_docker
-      pool: $(BUILD_POOL_PR)
-      customMatrixes:
-        PR_Custom_Tests:
-          _PYTHON_VERSION: $(PYTHON_VERSION_PR)
-          _CUDA_BUILD_VERSION: $(CUDA_BUILD_VERSION_PR)
-          _TARGET_CIRCLECI_BUILD: $(TARGET_CIRCLECI_BUILD_PR)
-          _TARGET_BRANCH_TO_CHECK: ${{parameters.GitHubPyTorchPRTrigger.TARGET_BRANCH_TO_CHECK_AZ_DEVOPS_PR}}
-          _TARGET_PR_NUMBER: ${{parameters.GitHubPyTorchPRTrigger.PR_NUMBER}}
-          _TARGET_COMMIT: ${{parameters.GitHubPyTorchPRTrigger.TARGET_COMMIT}}
-          _DOCKER_IMAGE: $(DOCKER_IMAGE_PR)
-          _RUN_TESTS: $(RUN_TESTS_PR)
-
- stage: 'NotifyWebapp'
-  displayName: 'Notify Webapp that pipeline is finished'
-  dependsOn: PRCustomTests
-  condition: succeededOrFailed()
-  jobs:
-  - template: job_templates/notify-webapp-template.yml
-    parameters:
-      name: ubuntu_1804_CPU
-      pool: $(BUILD_POOL_LIN_1)
-      customMatrixes:
-        PR_Notify_WebApp:
-          _TARGET_CIRCLECI_BUILD: $(TARGET_CIRCLECI_BUILD_PR)
-          _TARGET_BRANCH_TO_CHECK: ${{parameters.GitHubPyTorchPRTrigger.TARGET_BRANCH_TO_CHECK_AZ_DEVOPS_PR}}
-          _TARGET_PR_NUMBER: ${{parameters.GitHubPyTorchPRTrigger.PR_NUMBER}}
-          _TARGET_COMMIT: ${{parameters.GitHubPyTorchPRTrigger.TARGET_COMMIT}}
--- a/.azure_pipelines/verify-pipeline.yml
+++ b/.azure_pipelines/verify-pipeline.yml
@ -1,224 +0,0 @@
-# PyTorch Official Builds Pipeline on Azure DevOps
-#
-# This pipeline:
-#   1) builds PyTorch on all available configurations
-#   2) verifies PyTorch artifacts by installing them in a clean environment
-#      and checking torch.__version_
-#   3) publishes official PyTorch artifacts to Azure DevOps Artifacts for consumption
-
-stages:
- stage: 'Build'
-  displayName: 'Build PyTorch'
-  jobs:
-  - template: job_templates/build-verify-publish-template-unix.yml
-    parameters:
-      name: ubuntu_1804_CPU_docker
-      pool: 'PyTorch-Linux-CPU'
-      container_endpoint: pytorchms.azurecr.io
-      build_stage: True
-      is_official_build: True
-      os: ubuntu
-      cuda: cpu
-      customMatrixes:
-        Py_38:
-          configuration: ubuntu_1804_py_38_cpu
-          container_image: pytorchms.azurecr.io/ubuntu_1804_py_38_cpu_dev
-        Py_37:
-          configuration: ubuntu_1804_py_37_cpu
-          container_image: pytorchms.azurecr.io/ubuntu_1804_py_37_cpu_dev
-
-  - template: job_templates/build-verify-publish-template-unix.yml
-    parameters:
-      name: ubuntu_1804_GPU_docker
-      pool: 'PyTorch-Linux-GPU'
-      container_endpoint: pytorchms.azurecr.io
-      build_stage: True
-      is_official_build: True
-      os: ubuntu
-      cuda: gpu
-      customMatrixes:
-        Py_39_CUDA_112_cuDNN_810:
-          configuration: ubuntu_1804_py_39_cuda_112_cudnn_810
-          container_image: pytorchms.azurecr.io/ubuntu_1804_py_39_cuda_112_cudnn_8_dev
-          CUDA_VERSION: 112
-        Py_38_CUDA_102_cuDNN_810:
-          configuration: ubuntu_1804_py_38_cuda_102_cudnn_810
-          container_image: pytorchms.azurecr.io/ubuntu_1804_py_38_cuda_102_cudnn_8_dev
-          CUDA_VERSION: 102
-        Py_37_CUDA_101_cuDNN_765:
-          configuration: ubuntu_1804_py_37_cuda_101_cudnn_765
-          container_image: pytorchms.azurecr.io/ubuntu_1804_py_37_cuda_101_cudnn_7_dev
-          CUDA_VERSION: 101
-
-  - template: job_templates/build-verify-publish-template-win.yml
-    parameters:
-      name: windows_2019_CPU
-      pool: 'PyTorch-Win-CPU'
-      build_stage: True
-      is_official_build: True
-      os: windows
-      cuda: cpu
-      customMatrixes:
-        Py_38:
-          configuration: windows_2019_py_38_cpu
-        Py_37:
-          configuration: windows_2019_py_37_cpu
-
-  - template: job_templates/build-verify-publish-template-win.yml
-    parameters:
-      name: windows_2019_GPU
-      pool: 'PyTorch-Win-GPU'
-      build_stage: True
-      is_official_build: True
-      os: windows
-      cuda: gpu
-      customMatrixes:
-        Py_39_CUDA_112_cuDNN_810:
-          configuration: windows_2019_py_39_cuda_112_cudnn_810
-          CUDA_VERSION: 112
-        Py_38_CUDA_102_cuDNN_765:
-          configuration: windows_2019_py_38_cuda_102_cudnn_765
-          CUDA_VERSION: 102
-        Py_37_CUDA_101_cuDNN_764:
-          configuration: windows_2019_py_37_cuda_101_cudnn_764
-          CUDA_VERSION: 101
-
- stage: 'Verify'
-  displayName: 'Verify PyTorch wheels'
-  dependsOn: Build
-  condition: succeeded()
-  jobs:
-  - template: job_templates/build-verify-publish-template-unix.yml
-    parameters:
-      name: ubuntu_1804_CPU_docker
-      pool: 'PyTorch-Linux-CPU'
-      container_endpoint: pytorchms.azurecr.io
-      verify_stage: True
-      is_official_build: True
-      customMatrixes:
-        Py_38:
-          configuration: ubuntu_1804_py_38_cpu
-          container_image: pytorchms.azurecr.io/ubuntu_1804_py_38_cpu_dev
-        Py_37:
-          configuration: ubuntu_1804_py_37_cpu
-          container_image: pytorchms.azurecr.io/ubuntu_1804_py_37_cpu_dev
-
-  - template: job_templates/build-verify-publish-template-unix.yml
-    parameters:
-      name: ubuntu_1804_GPU_docker
-      pool: 'PyTorch-Linux-GPU'
-      container_endpoint: pytorchms.azurecr.io
-      verify_stage: True
-      is_official_build: True
-      customMatrixes:
-        Py_39_CUDA_112_cuDNN_810:
-          configuration: ubuntu_1804_py_39_cuda_112_cudnn_810
-          container_image: pytorchms.azurecr.io/ubuntu_1804_py_39_cuda_112_cudnn_8_dev
-          CUDA_VERSION: 112
-        Py_38_CUDA_102_cuDNN_810:
-          configuration: ubuntu_1804_py_38_cuda_102_cudnn_810
-          container_image: pytorchms.azurecr.io/ubuntu_1804_py_38_cuda_102_cudnn_8_dev
-          CUDA_VERSION: 102
-        Py_37_CUDA_101_cuDNN_765:
-          configuration: ubuntu_1804_py_37_cuda_101_cudnn_765
-          container_image: pytorchms.azurecr.io/ubuntu_1804_py_37_cuda_101_cudnn_7_dev
-          CUDA_VERSION: 101
-
-  - template: job_templates/build-verify-publish-template-win.yml
-    parameters:
-      name: windows_2019_CPU
-      pool: 'PyTorch-Win-CPU'
-      verify_stage: True
-      is_official_build: True
-      customMatrixes:
-        Py_38:
-          configuration: windows_2019_py_38_cpu
-        Py_37:
-          configuration: windows_2019_py_37_cpu
-
-  - template: job_templates/build-verify-publish-template-win.yml
-    parameters:
-      name: windows_2019_GPU
-      pool: 'PyTorch-Win-GPU'
-      verify_stage: True
-      is_official_build: True
-      customMatrixes:
-        Py_39_CUDA_112_cuDNN_810:
-          configuration: windows_2019_py_39_cuda_112_cudnn_810
-          CUDA_VERSION: 112
-        Py_38_CUDA_102_cuDNN_765:
-          configuration: windows_2019_py_38_cuda_102_cudnn_765
-          CUDA_VERSION: 102
-        Py_37_CUDA_101_cuDNN_764:
-          configuration: windows_2019_py_37_cuda_101_cudnn_764
-          CUDA_VERSION: 101
-
- stage: 'Publish'
-  displayName: 'Publish PyTorch wheels'
-  dependsOn: Verify
-  condition: succeeded()
-  jobs:
-  - template: job_templates/build-verify-publish-template-unix.yml
-    parameters:
-      name: ubuntu_1804_CPU_docker
-      pool: 'PyTorch-Linux-CPU'
-      container_endpoint: pytorchms.azurecr.io
-      publish_stage: True
-      is_official_build: True
-      customMatrixes:
-        Py_38:
-          configuration: ubuntu_1804_py_38_cpu
-          container_image: pytorchms.azurecr.io/ubuntu_1804_py_38_cpu_dev
-        Py_37:
-          configuration: ubuntu_1804_py_37_cpu
-          container_image: pytorchms.azurecr.io/ubuntu_1804_py_37_cpu_dev
-
-  - template: job_templates/build-verify-publish-template-unix.yml
-    parameters:
-      name: ubuntu_1804_GPU_docker
-      pool: 'PyTorch-Linux-GPU'
-      container_endpoint: pytorchms.azurecr.io
-      publish_stage: True
-      is_official_build: True
-      customMatrixes:
-        Py_39_CUDA_112_cuDNN_810:
-          configuration: ubuntu_1804_py_39_cuda_112_cudnn_810
-          container_image: pytorchms.azurecr.io/ubuntu_1804_py_39_cuda_112_cudnn_8_dev
-          CUDA_VERSION: 112
-        Py_38_CUDA_102_cuDNN_810:
-          configuration: ubuntu_1804_py_38_cuda_102_cudnn_810
-          container_image: pytorchms.azurecr.io/ubuntu_1804_py_38_cuda_102_cudnn_8_dev
-          CUDA_VERSION: 102
-        Py_37_CUDA_101_cuDNN_765:
-          configuration: ubuntu_1804_py_37_cuda_101_cudnn_765
-          container_image: pytorchms.azurecr.io/ubuntu_1804_py_37_cuda_101_cudnn_7_dev
-          CUDA_VERSION: 101
-
-  - template: job_templates/build-verify-publish-template-win.yml
-    parameters:
-      name: windows_2019_CPU
-      pool: 'PyTorch-Win-CPU'
-      publish_stage: True
-      is_official_build: True
-      customMatrixes:
-        Py_38:
-          configuration: windows_2019_py_38_cpu
-        Py_37:
-          configuration: windows_2019_py_37_cpu
-
-  - template: job_templates/build-verify-publish-template-win.yml
-    parameters:
-      name: windows_2019_GPU
-      pool: 'PyTorch-Win-GPU'
-      publish_stage: True
-      is_official_build: True
-      customMatrixes:
-        Py_39_CUDA_112_cuDNN_810:
-          configuration: windows_2019_py_39_cuda_112_cudnn_810
-          CUDA_VERSION: 112
-        Py_38_CUDA_102_cuDNN_765:
-          configuration: windows_2019_py_38_cuda_102_cudnn_765
-          CUDA_VERSION: 102
-        Py_37_CUDA_101_cuDNN_764:
-          configuration: windows_2019_py_37_cuda_101_cudnn_764
-          CUDA_VERSION: 101
--- a/.bazelrc
+++ b/.bazelrc
@ -1,13 +1,3 @@
 build --copt=--std=c++14
 build --copt=-I.
 build --copt=-isystem --copt bazel-out/k8-fastbuild/bin
-
-# Configuration to disable tty features for environments like CI
-build:no-tty --curses no
-build:no-tty --progress_report_interval 10
-build:no-tty --show_progress_rate_limit 10
-
-# Configuration to build with GPU support
-build:gpu --define=cuda=true
-# define a separate build folder for faster switching between configs
-build:gpu --platform_suffix=-gpu
--- a/.bazelversion
+++ b/.bazelversion
@ -1 +1 @@
-4.2.1
+3.1.0
--- a/.circleci/README.md
+++ b/.circleci/README.md
@ -343,6 +343,7 @@ All linux builds occur in docker images. The docker images are
    * Has ALL CUDA versions installed. The script pytorch/builder/conda/switch_cuda_version.sh sets /usr/local/cuda to a symlink to e.g. /usr/local/cuda-10.0 to enable different CUDA builds
    * Also used for cpu builds
 * pytorch/manylinux-cuda90
+* pytorch/manylinux-cuda92
 * pytorch/manylinux-cuda100
    * Also used for cpu builds

--- a/.circleci/cimodel/data/binary_build_data.py
+++ b/.circleci/cimodel/data/binary_build_data.py
@ -55,15 +55,14 @@ CONFIG_TREE_DATA = OrderedDict(
    macos_arm64=([None], OrderedDict(
        wheel=[
            "3.8",
-            "3.9",
        ],
        conda=[
            "3.8",
-            "3.9",
        ],
    )),
+    # Skip CUDA-9.2 builds on Windows
    windows=(
-        [v for v in dimensions.GPU_VERSIONS if v not in dimensions.ROCM_VERSION_LABELS],
+        [v for v in dimensions.GPU_VERSIONS if v not in ['cuda92'] + dimensions.ROCM_VERSION_LABELS],
        OrderedDict(
            wheel=dimensions.STANDARD_PYTHON_VERSIONS,
            conda=dimensions.STANDARD_PYTHON_VERSIONS,
@ -126,7 +125,6 @@ class PackageFormatConfigNode(ConfigNode):
        self.props["python_versions"] = python_versions
        self.props["package_format"] = package_format

-
    def get_children(self):
        if self.find_prop("os_name") == "linux":
            return [LinuxGccConfigNode(self, v) for v in LINUX_GCC_CONFIG_VARIANTS[self.find_prop("package_format")]]
--- a/.circleci/cimodel/data/binary_build_definitions.py
+++ b/.circleci/cimodel/data/binary_build_definitions.py
@ -27,19 +27,7 @@ class Conf(object):

    def gen_docker_image(self):
        if self.gcc_config_variant == 'gcc5.4_cxx11-abi':
-            if self.gpu_version is None:
-                return miniutils.quote("pytorch/libtorch-cxx11-builder:cpu")
-            else:
-                return miniutils.quote(
-                    f"pytorch/libtorch-cxx11-builder:{self.gpu_version}"
-                )
-        if self.pydistro == "conda":
-            if self.gpu_version is None:
-                return miniutils.quote("pytorch/conda-builder:cpu")
-            else:
-                return miniutils.quote(
-                    f"pytorch/conda-builder:{self.gpu_version}"
-                )
+            return miniutils.quote("pytorch/pytorch-binary-docker-image-ubuntu16.04:latest")

        docker_word_substitution = {
            "manywheel": "manylinux",
@ -124,9 +112,9 @@ class Conf(object):
        Output looks similar to:

      - binary_upload:
-          name: binary_linux_manywheel_3_7m_cu113_devtoolset7_nightly_upload
+          name: binary_linux_manywheel_3_7m_cu92_devtoolset7_nightly_upload
          context: org-member
-          requires: binary_linux_manywheel_3_7m_cu113_devtoolset7_nightly_test
+          requires: binary_linux_manywheel_3_7m_cu92_devtoolset7_nightly_test
          filters:
            branches:
              only:
@ -134,7 +122,7 @@ class Conf(object):
            tags:
              only: /v[0-9]+(\\.[0-9]+)*-rc[0-9]+/
          package_type: manywheel
-          upload_subfolder: cu113
+          upload_subfolder: cu92
        """
        return {
            "binary_upload": OrderedDict({
--- a/.circleci/cimodel/data/dimensions.py
+++ b/.circleci/cimodel/data/dimensions.py
@ -1,15 +1,14 @@
 PHASES = ["build", "test"]

 CUDA_VERSIONS = [
+    "101",
    "102",
    "111",
-    "113",
 ]

 ROCM_VERSIONS = [
+    "3.10",
    "4.0.1",
-    "4.1",
-    "4.2",
 ]

 ROCM_VERSION_LABELS = ["rocm" + v for v in ROCM_VERSIONS]
--- a/.circleci/cimodel/data/pytorch_build_data.py
+++ b/.circleci/cimodel/data/pytorch_build_data.py
@ -7,48 +7,73 @@ CONFIG_TREE_DATA = [
            ("5.4", [  # All this subtree rebases to master and then build
                ("3.6", [
                    ("important", [X(True)]),
+                    ("parallel_tbb", [X(True)]),
+                    ("parallel_native", [X(True)]),
+                    ("pure_torch", [X(True)]),
                ]),
            ]),
            # TODO: bring back libtorch test
            ("7", [X("3.6")]),
        ]),
        ("clang", [
-            ("7", [
+            ("5", [
                ("3.6", [
                    ("asan", [
                        (True, [
                            ("shard_test", [XImportant(True)]),
                        ]),
                    ]),
+                ]),
+            ]),
+            ("7", [
+                ("3.6", [
                    ("onnx", [XImportant(True)]),
                ]),
            ]),
        ]),
        ("cuda", [
-            ("10.2", [
+            ("9.2", [
                ("3.6", [
-                    # Build are needed for slow_gradcheck
-                    ('build_only', [X(True)]),
-                    ("slow_gradcheck", [
-                        # If you update this slow gradcheck, you should
-                        # also update docker_definitions.py to make sure
-                        # the docker image match the config used here
-                        (True, [
-                            ('shard_test', [XImportant(True)]),
+                    X(True),
+                    ("cuda_gcc_override", [
+                        ("gcc5.4", [
+                            ('build_only', [XImportant(True)]),
+                        ]),
+                    ]),
+                ])
+            ]),
+            ("10.1", [
+                ("3.6", [
+                    ('build_only', [X(True)]),
+                ]),
+            ]),
+            ("10.2", [
+                ("3.6", [
+                    ("shard_test", [XImportant(True)]),
+                    ("libtorch", [
+                        (True, [
+                            ('build_only', [X(True)]),
+                        ]),
+                    ]),
+                ]),
+            ]),
+            ("11.1", [
+                ("3.8", [
+                    X(True),
+                    ("libtorch", [
+                        (True, [
+                            ('build_only', [XImportant(True)]),
                        ]),
                    ]),
-                    # UNCOMMENT THE BELOW TO REENABLE LIBTORCH
-                    # ("libtorch", [
-                    #     (True, [
-                    #         ('build_only', [X(True)]),
-                    #     ]),
-                    # ]),
                ]),
            ]),
        ]),
    ]),
    ("bionic", [
        ("clang", [
+            ("9", [
+                XImportant("3.6"),
+            ]),
            ("9", [
                ("3.6", [
                    ("xla", [XImportant(True)]),
@ -56,14 +81,24 @@ CONFIG_TREE_DATA = [
                ]),
            ]),
        ]),
-        # @jithunnair-amd believes Jenkins builds are sufficient
-        # ("rocm", [
-        #     ("3.9", [
-        #         ("3.6", [
-        #             ('build_only', [XImportant(True)]),
-        #         ]),
-        #     ]),
-        # ]),
+        ("gcc", [
+            ("9", [
+                ("3.8", [
+                    ("coverage", [
+                        (True, [
+                            ("shard_test", [XImportant(True)]),
+                        ]),
+                    ]),
+                ]),
+            ]),
+        ]),
+        ("rocm", [
+            ("3.9", [
+                ("3.6", [
+                    ('build_only', [XImportant(True)]),
+                ]),
+            ]),
+        ]),
    ]),
 ]

@ -116,8 +151,6 @@ class PyVerConfigNode(TreeConfigNode):
    def init2(self, node_name):
        self.props["pyver"] = node_name
        self.props["abbreviated_pyver"] = get_major_pyver(node_name)
-        if node_name == "3.9":
-            self.props["abbreviated_pyver"] = "py3.9"

    # noinspection PyMethodMayBeStatic
    def child_constructor(self):
@ -134,10 +167,8 @@ class ExperimentalFeatureConfigNode(TreeConfigNode):
        next_nodes = {
            "asan": AsanConfigNode,
            "xla": XlaConfigNode,
-            "mlc": MLCConfigNode,
            "vulkan": VulkanConfigNode,
            "parallel_tbb": ParallelTBBConfigNode,
-            "noarch": NoarchConfigNode,
            "parallel_native": ParallelNativeConfigNode,
            "onnx": ONNXConfigNode,
            "libtorch": LibTorchConfigNode,
@ -147,18 +178,10 @@ class ExperimentalFeatureConfigNode(TreeConfigNode):
            "cuda_gcc_override": CudaGccOverrideConfigNode,
            "coverage": CoverageConfigNode,
            "pure_torch": PureTorchConfigNode,
-            "slow_gradcheck": SlowGradcheckConfigNode,
        }
        return next_nodes[experimental_feature]


-class SlowGradcheckConfigNode(TreeConfigNode):
-    def init2(self, node_name):
-        self.props["is_slow_gradcheck"] = True
-
-    def child_constructor(self):
-        return ExperimentalFeatureConfigNode
-
 class PureTorchConfigNode(TreeConfigNode):
    def modify_label(self, label):
        return "PURE_TORCH=" + str(label)
@ -180,16 +203,6 @@ class XlaConfigNode(TreeConfigNode):
    def child_constructor(self):
        return ImportantConfigNode

-class MLCConfigNode(TreeConfigNode):
-    def modify_label(self, label):
-        return "MLC=" + str(label)
-
-    def init2(self, node_name):
-        self.props["is_mlc"] = node_name
-
-    def child_constructor(self):
-        return ImportantConfigNode
-

 class AsanConfigNode(TreeConfigNode):
    def modify_label(self, label):
@ -235,14 +248,6 @@ class ParallelTBBConfigNode(TreeConfigNode):
        return ImportantConfigNode


-class NoarchConfigNode(TreeConfigNode):
-    def init2(self, node_name):
-        self.props["is_noarch"] = node_name
-
-    def child_constructor(self):
-        return ImportantConfigNode
-
-
 class ParallelNativeConfigNode(TreeConfigNode):
    def modify_label(self, label):
        return "PARALLELNATIVE=" + str(label)
--- a/.circleci/cimodel/data/pytorch_build_definitions.py
+++ b/.circleci/cimodel/data/pytorch_build_definitions.py
@ -31,7 +31,6 @@ class Conf:
    is_libtorch: bool = False
    is_important: bool = False
    parallel_backend: Optional[str] = None
-    build_only: bool = False

    @staticmethod
    def is_test_phase(phase):
@ -113,8 +112,6 @@ class Conf:
            parameters["resource_class"] = "xlarge"
        if hasattr(self, 'filters'):
            parameters['filters'] = self.filters
-        if self.build_only:
-            parameters['build_only'] = miniutils.quote(str(int(True)))
        return parameters

    def gen_workflow_job(self, phase):
@ -178,6 +175,35 @@ class DocPushConf(object):
            }
        }

+# TODO Convert these to graph nodes
+def gen_dependent_configs(xenial_parent_config):
+
+    extra_parms = [
+        (["multigpu"], "large"),
+        (["nogpu", "NO_AVX2"], None),
+        (["nogpu", "NO_AVX"], None),
+        (["slow"], "medium"),
+    ]
+
+    configs = []
+    for parms, gpu in extra_parms:
+
+        c = Conf(
+            xenial_parent_config.distro,
+            ["py3"] + parms,
+            pyver=xenial_parent_config.pyver,
+            cuda_version=xenial_parent_config.cuda_version,
+            restrict_phases=["test"],
+            gpu_resource=gpu,
+            parent_build=xenial_parent_config,
+            is_important=False,
+        )
+
+        configs.append(c)
+
+    return configs
+
+
 def gen_docs_configs(xenial_parent_config):
    configs = []

@ -185,7 +211,7 @@ def gen_docs_configs(xenial_parent_config):
        HiddenConf(
            "pytorch_python_doc_build",
            parent_build=xenial_parent_config,
-            filters=gen_filter_dict(branches_list=["master", "nightly"],
+            filters=gen_filter_dict(branches_list=r"/.*/",
                                    tags_list=RC_PATTERN),
        )
    )
@ -201,7 +227,7 @@ def gen_docs_configs(xenial_parent_config):
        HiddenConf(
            "pytorch_cpp_doc_build",
            parent_build=xenial_parent_config,
-            filters=gen_filter_dict(branches_list=["master", "nightly"],
+            filters=gen_filter_dict(branches_list=r"/.*/",
                                    tags_list=RC_PATTERN),
        )
    )
@ -212,6 +238,13 @@ def gen_docs_configs(xenial_parent_config):
            branch="master",
        )
    )
+
+    configs.append(
+        HiddenConf(
+            "pytorch_doc_test",
+            parent_build=xenial_parent_config
+        )
+    )
    return configs


@ -225,7 +258,7 @@ def gen_tree():
    return configs_list


-def instantiate_configs(only_slow_gradcheck):
+def instantiate_configs():

    config_list = []

@ -240,16 +273,11 @@ def instantiate_configs(only_slow_gradcheck):
        is_xla = fc.find_prop("is_xla") or False
        is_asan = fc.find_prop("is_asan") or False
        is_coverage = fc.find_prop("is_coverage") or False
-        is_noarch = fc.find_prop("is_noarch") or False
        is_onnx = fc.find_prop("is_onnx") or False
        is_pure_torch = fc.find_prop("is_pure_torch") or False
        is_vulkan = fc.find_prop("is_vulkan") or False
-        is_slow_gradcheck = fc.find_prop("is_slow_gradcheck") or False
        parms_list_ignored_for_docker_image = []

-        if only_slow_gradcheck ^ is_slow_gradcheck:
-            continue
-
        python_version = None
        if compiler_name == "cuda" or compiler_name == "android":
            python_version = fc.find_prop("pyver")
@ -288,9 +316,6 @@ def instantiate_configs(only_slow_gradcheck):
            parms_list_ignored_for_docker_image.append("coverage")
            python_version = fc.find_prop("pyver")

-        if is_noarch:
-            parms_list_ignored_for_docker_image.append("noarch")
-
        if is_onnx:
            parms_list.append("onnx")
            python_version = fc.find_prop("pyver")
@ -313,10 +338,6 @@ def instantiate_configs(only_slow_gradcheck):
        if build_only or is_pure_torch:
            restrict_phases = ["build"]

-        if is_slow_gradcheck:
-            parms_list_ignored_for_docker_image.append("old")
-            parms_list_ignored_for_docker_image.append("gradcheck")
-
        gpu_resource = None
        if cuda_version and cuda_version != "10":
            gpu_resource = "medium"
@ -336,7 +357,6 @@ def instantiate_configs(only_slow_gradcheck):
            is_libtorch=is_libtorch,
            is_important=is_important,
            parallel_backend=parallel_backend,
-            build_only=build_only,
        )

        # run docs builds on "pytorch-linux-xenial-py3.6-gcc5.4". Docs builds
@ -357,19 +377,19 @@ def instantiate_configs(only_slow_gradcheck):
                                        tags_list=RC_PATTERN)
            c.dependent_tests = gen_docs_configs(c)

+        if cuda_version == "10.2" and python_version == "3.6" and not is_libtorch:
+            c.dependent_tests = gen_dependent_configs(c)
+
        if (
-            compiler_name != "clang"
-            and not rocm_version
+            compiler_name == "gcc"
+            and compiler_version == "5.4"
            and not is_libtorch
            and not is_vulkan
            and not is_pure_torch
-            and not is_noarch
-            and not is_slow_gradcheck
-            and not only_slow_gradcheck
-            and not build_only
+            and parallel_backend is None
        ):
-            distributed_test = Conf(
-                c.gen_build_name("") + "distributed",
+            bc_breaking_check = Conf(
+                "backward-compatibility-check",
                [],
                is_xla=False,
                restrict_phases=["test"],
@ -377,16 +397,16 @@ def instantiate_configs(only_slow_gradcheck):
                is_important=True,
                parent_build=c,
            )
-            c.dependent_tests.append(distributed_test)
+            c.dependent_tests.append(bc_breaking_check)

        config_list.append(c)

    return config_list


-def get_workflow_jobs(only_slow_gradcheck=False):
+def get_workflow_jobs():

-    config_list = instantiate_configs(only_slow_gradcheck)
+    config_list = instantiate_configs()

    x = []
    for conf_options in config_list:
--- a/.circleci/cimodel/data/simple/android_definitions.py
+++ b/.circleci/cimodel/data/simple/android_definitions.py
@ -2,7 +2,6 @@ import cimodel.data.simple.util.branch_filters as branch_filters
 from cimodel.data.simple.util.docker_constants import (
    DOCKER_IMAGE_NDK, DOCKER_REQUIREMENT_NDK
 )
-import cimodel.lib.miniutils as miniutils


 class AndroidJob:
@ -52,15 +51,13 @@ class AndroidGradleJob:
                 template_name,
                 dependencies,
                 is_master_only=True,
-                 is_pr_only=False,
-                 extra_props=tuple()):
+                 is_pr_only=False):

        self.job_name = job_name
        self.template_name = template_name
        self.dependencies = dependencies
        self.is_master_only = is_master_only
        self.is_pr_only = is_pr_only
-        self.extra_props = dict(extra_props)

    def gen_tree(self):

@ -73,8 +70,6 @@ class AndroidGradleJob:
            props_dict["filters"] = branch_filters.gen_filter_dict(branch_filters.NON_PR_BRANCH_LIST)
        elif self.is_pr_only:
            props_dict["filters"] = branch_filters.gen_filter_dict(branch_filters.PR_BRANCH_LIST)
-        if self.extra_props:
-            props_dict.update(self.extra_props)

        return [{self.template_name: props_dict}]

@ -96,15 +91,6 @@ WORKFLOW_DATA = [
        [DOCKER_REQUIREMENT_NDK],
        is_master_only=False,
        is_pr_only=True),
-    AndroidGradleJob(
-        "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single-full-jit",
-        "pytorch_android_gradle_custom_build_single",
-        [DOCKER_REQUIREMENT_NDK],
-        is_master_only=False,
-        is_pr_only=True,
-        extra_props=tuple({
-            "lite_interpreter": miniutils.quote(str(int(False)))
-        }.items())),
    AndroidGradleJob(
        "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-build",
        "pytorch_android_gradle_build",
--- a/.circleci/cimodel/data/simple/binary_smoketest.py
+++ b/.circleci/cimodel/data/simple/binary_smoketest.py
@ -77,7 +77,7 @@ WORKFLOW_DATA = [
        ["libtorch", "3.7m", "cpu", "devtoolset7"],
        "pytorch/manylinux-cuda102",
        "binary_linux_libtorch_3_7m_cpu_devtoolset7_shared-with-deps_build",
-        is_master_only=True,
+        is_master_only=False,
        has_libtorch_variant=True,
    ),
    SmoketestJob(
@ -109,14 +109,14 @@ WORKFLOW_DATA = [
        ["libtorch", "3.7", "cpu", "debug"],
        None,
        "binary_windows_libtorch_3_7_cpu_debug_build",
-        is_master_only=True,
+        is_master_only=False,
    ),
    SmoketestJob(
        "binary_windows_build",
        ["libtorch", "3.7", "cpu", "release"],
        None,
        "binary_windows_libtorch_3_7_cpu_release_build",
-        is_master_only=True,
+        is_master_only=False,
    ),
    SmoketestJob(
        "binary_windows_build",
@ -131,7 +131,7 @@ WORKFLOW_DATA = [
        ["libtorch", "3.7", "cpu", "debug"],
        None,
        "binary_windows_libtorch_3_7_cpu_debug_test",
-        is_master_only=True,
+        is_master_only=False,
        requires=["binary_windows_libtorch_3_7_cpu_debug_build"],
    ),
    SmoketestJob(
@ -173,7 +173,7 @@ WORKFLOW_DATA = [
        ["libtorch", "3.7m", "cpu", "devtoolset7"],
        "pytorch/manylinux-cuda102",
        "binary_linux_libtorch_3_7m_cpu_devtoolset7_shared-with-deps_test",
-        is_master_only=True,
+        is_master_only=False,
        requires=["binary_linux_libtorch_3_7m_cpu_devtoolset7_shared-with-deps_build"],
        has_libtorch_variant=True,
    ),
@ -182,7 +182,7 @@ WORKFLOW_DATA = [
        ["libtorch", "3.7m", "cpu", "gcc5.4_cxx11-abi"],
        "pytorch/pytorch-binary-docker-image-ubuntu16.04:latest",
        "binary_linux_libtorch_3_7m_cpu_gcc5_4_cxx11-abi_shared-with-deps_test",
-        is_master_only=True,
+        is_master_only=False,
        requires=["binary_linux_libtorch_3_7m_cpu_gcc5_4_cxx11-abi_shared-with-deps_build"],
        has_libtorch_variant=True,
    ),
--- a/.circleci/cimodel/data/simple/docker_definitions.py
+++ b/.circleci/cimodel/data/simple/docker_definitions.py
@ -6,39 +6,38 @@ from cimodel.data.simple.util.branch_filters import gen_filter_dict, RC_PATTERN

 # TODO: make this generated from a matrix rather than just a static list
 IMAGE_NAMES = [
-    "pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7",
+    "pytorch-linux-bionic-cuda11.1-cudnn8-py3.6-gcc9",
+    "pytorch-linux-bionic-cuda11.1-cudnn8-py3.8-gcc9",
+    "pytorch-linux-bionic-cuda11.0-cudnn8-py3.6-gcc9",
+    "pytorch-linux-bionic-cuda11.0-cudnn8-py3.8-gcc9",
+    "pytorch-linux-bionic-cuda10.2-cudnn7-py3.8-gcc9",
    "pytorch-linux-bionic-py3.6-clang9",
    "pytorch-linux-bionic-cuda10.2-cudnn7-py3.6-clang9",
    "pytorch-linux-bionic-py3.8-gcc9",
+    "pytorch-linux-xenial-cuda10-cudnn7-py3-gcc7",
+    "pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7",
    "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7",
+    "pytorch-linux-xenial-cuda11.0-cudnn8-py3-gcc7",
    "pytorch-linux-xenial-cuda11.1-cudnn8-py3-gcc7",
-    "pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7",
+    "pytorch-linux-xenial-cuda9.2-cudnn7-py3-gcc5.4",
+    "pytorch-linux-xenial-cuda9.2-cudnn7-py3-gcc7",
    "pytorch-linux-xenial-py3-clang5-android-ndk-r19c",
    "pytorch-linux-xenial-py3-clang5-asan",
-    "pytorch-linux-xenial-py3-clang7-asan",
    "pytorch-linux-xenial-py3-clang7-onnx",
    "pytorch-linux-xenial-py3.8",
    "pytorch-linux-xenial-py3.6-clang7",
    "pytorch-linux-xenial-py3.6-gcc5.4",  # this one is used in doc builds
    "pytorch-linux-xenial-py3.6-gcc7.2",
    "pytorch-linux-xenial-py3.6-gcc7",
-    "pytorch-linux-bionic-rocm4.1-py3.6",
-    "pytorch-linux-bionic-rocm4.2-py3.6",
-    "pytorch-linux-bionic-rocm4.3.1-py3.6",
+    "pytorch-linux-bionic-rocm3.9-py3.6",
+    "pytorch-linux-bionic-rocm3.10-py3.6",
 ]

-# This entry should be an element from the list above
-# This should contain the image matching the "slow_gradcheck" entry in
-# pytorch_build_data.py
-SLOW_GRADCHECK_IMAGE_NAME = "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"

-def get_workflow_jobs(only_slow_gradcheck=False):
+def get_workflow_jobs():
    """Generates a list of docker image build definitions"""
    ret = []
    for image_name in IMAGE_NAMES:
-        if only_slow_gradcheck and image_name is not SLOW_GRADCHECK_IMAGE_NAME:
-            continue
-
        parameters = OrderedDict({
            "name": quote(f"docker-{image_name}"),
            "image_name": quote(image_name),
--- a/.circleci/cimodel/data/simple/ge_config_tests.py
+++ b/.circleci/cimodel/data/simple/ge_config_tests.py
@ -0,0 +1,78 @@
+import cimodel.lib.miniutils as miniutils
+from cimodel.data.simple.util.versions import MultiPartVersion, CudaVersion
+from cimodel.data.simple.util.docker_constants import DOCKER_IMAGE_BASIC, DOCKER_IMAGE_CUDA_10_2
+
+
+class GeConfigTestJob:
+    def __init__(self,
+                 py_version,
+                 gcc_version,
+                 cuda_version,
+                 variant_parts,
+                 extra_requires,
+                 use_cuda_docker=False,
+                 build_env_override=None):
+
+        self.py_version = py_version
+        self.gcc_version = gcc_version
+        self.cuda_version = cuda_version
+        self.variant_parts = variant_parts
+        self.extra_requires = extra_requires
+        self.use_cuda_docker = use_cuda_docker
+        self.build_env_override = build_env_override
+
+    def get_all_parts(self, with_dots):
+
+        maybe_py_version = self.py_version.render_dots_or_parts(with_dots) if self.py_version else []
+        maybe_gcc_version = self.gcc_version.render_dots_or_parts(with_dots) if self.gcc_version else []
+        maybe_cuda_version = self.cuda_version.render_dots_or_parts(with_dots) if self.cuda_version else []
+
+        common_parts = [
+            "pytorch",
+            "linux",
+            "xenial",
+        ] + maybe_cuda_version + maybe_py_version + maybe_gcc_version
+
+        return common_parts + self.variant_parts
+
+    def gen_tree(self):
+
+        resource_class = "gpu.medium" if self.use_cuda_docker else "large"
+        docker_image = DOCKER_IMAGE_CUDA_10_2 if self.use_cuda_docker else DOCKER_IMAGE_BASIC
+        full_name = "_".join(self.get_all_parts(False))
+        build_env = self.build_env_override or "-".join(self.get_all_parts(True))
+
+        props_dict = {
+            "name": full_name,
+            "build_environment": build_env,
+            "requires": self.extra_requires,
+            "resource_class": resource_class,
+            "docker_image": docker_image,
+        }
+
+        if self.use_cuda_docker:
+            props_dict["use_cuda_docker_runtime"] = miniutils.quote(str(1))
+
+        return [{"pytorch_linux_test": props_dict}]
+
+
+WORKFLOW_DATA = [
+    GeConfigTestJob(
+        MultiPartVersion([3, 6], "py"),
+        MultiPartVersion([5, 4], "gcc"),
+        None,
+        ["jit_legacy", "test"],
+        ["pytorch_linux_xenial_py3_6_gcc5_4_build"]),
+    GeConfigTestJob(
+        None,
+        None,
+        CudaVersion(10, 2),
+        ["cudnn7", "py3", "jit_legacy", "test"],
+        ["pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build"],
+        use_cuda_docker=True,
+    ),
+]
+
+
+def get_workflow_jobs():
+    return [item.gen_tree() for item in WORKFLOW_DATA]
--- a/.circleci/cimodel/data/simple/ios_definitions.py
+++ b/.circleci/cimodel/data/simple/ios_definitions.py
@ -1,7 +1,7 @@
 from cimodel.data.simple.util.versions import MultiPartVersion
 import cimodel.lib.miniutils as miniutils

-XCODE_VERSION = MultiPartVersion([12, 5, 1])
+XCODE_VERSION = MultiPartVersion([12, 0, 0])


 class ArchVariant:
@ -61,20 +61,10 @@ class IOSJob:


 WORKFLOW_DATA = [
-    IOSJob(XCODE_VERSION, ArchVariant("x86_64"), is_org_member_context=False, extra_props={
-        "lite_interpreter": miniutils.quote(str(int(True)))}),
-    IOSJob(XCODE_VERSION, ArchVariant("x86_64", "full_jit"), is_org_member_context=False, extra_props={
-        "lite_interpreter": miniutils.quote(str(int(False)))}),
-    IOSJob(XCODE_VERSION, ArchVariant("arm64"), extra_props={
-        "lite_interpreter": miniutils.quote(str(int(True)))}),
-    IOSJob(XCODE_VERSION, ArchVariant("arm64", "metal"), extra_props={
-        "use_metal": miniutils.quote(str(int(True))),
-        "lite_interpreter": miniutils.quote(str(int(True)))}),
-    IOSJob(XCODE_VERSION, ArchVariant("arm64", "full_jit"), extra_props={
-        "lite_interpreter": miniutils.quote(str(int(False)))}),
-    IOSJob(XCODE_VERSION, ArchVariant("arm64", "custom"), extra_props={
-        "op_list": "mobilenetv2.yaml",
-        "lite_interpreter": miniutils.quote(str(int(True)))}),
+    IOSJob(XCODE_VERSION, ArchVariant("x86_64"), is_org_member_context=False),
+    IOSJob(XCODE_VERSION, ArchVariant("arm64")),
+    IOSJob(XCODE_VERSION, ArchVariant("arm64", "metal"), extra_props={"use_metal": miniutils.quote(str(int(True)))}),
+    IOSJob(XCODE_VERSION, ArchVariant("arm64", "custom"), extra_props={"op_list": "mobilenetv2.yaml"}),
 ]


--- a/.circleci/cimodel/data/simple/macos_definitions.py
+++ b/.circleci/cimodel/data/simple/macos_definitions.py
@ -1,22 +1,14 @@
 class MacOsJob:
-    def __init__(self, os_version, is_build=False, is_test=False, extra_props=tuple()):
-        # extra_props is tuple type, because mutable data structures for argument defaults
-        # is not recommended.
+    def __init__(self, os_version, is_test=False):
        self.os_version = os_version
-        self.is_build = is_build
        self.is_test = is_test
-        self.extra_props = dict(extra_props)

    def gen_tree(self):
        non_phase_parts = ["pytorch", "macos", self.os_version, "py3"]

-        extra_name_list = [name for name, exist in self.extra_props.items() if exist]
-        full_job_name_list = non_phase_parts + extra_name_list + [
-            'build' if self.is_build else None,
-            'test' if self.is_test else None,
-        ]
+        phase_name = "test" if self.is_test else "build"

-        full_job_name = "_".join(list(filter(None, full_job_name_list)))
+        full_job_name = "_".join(non_phase_parts + [phase_name])

        test_build_dependency = "_".join(non_phase_parts + ["build"])
        extra_dependencies = [test_build_dependency] if self.is_test else []
@ -29,23 +21,7 @@ class MacOsJob:
        return [{full_job_name: props_dict}]


-WORKFLOW_DATA = [
-    MacOsJob("10_15", is_build=True),
-    MacOsJob("10_13", is_build=True),
-    MacOsJob(
-        "10_13",
-        is_build=False,
-        is_test=True,
-    ),
-    MacOsJob(
-        "10_13",
-        is_build=True,
-        is_test=True,
-        extra_props=tuple({
-            "lite_interpreter": True
-        }.items()),
-    )
-]
+WORKFLOW_DATA = [MacOsJob("10_13"), MacOsJob("10_13", True)]


 def get_workflow_jobs():
--- a/.circleci/cimodel/data/simple/mobile_definitions.py
+++ b/.circleci/cimodel/data/simple/mobile_definitions.py
@ -65,12 +65,6 @@ WORKFLOW_DATA = [
        ["custom", "build", "dynamic"]
    ),

-    MobileJob(
-        DOCKER_IMAGE_NDK,
-        [DOCKER_REQUIREMENT_NDK],
-        ["custom", "build", "static"]
-    ),
-
    # Use LLVM-DEV toolchain in android-ndk-r19c docker image
    # Most of this CI is already covered by "mobile-custom-build-dynamic" job
    MobileJob(
--- a/.circleci/cimodel/data/simple/nightly_ios.py
+++ b/.circleci/cimodel/data/simple/nightly_ios.py
@ -1,5 +1,4 @@
 import cimodel.data.simple.ios_definitions as ios_definitions
-import cimodel.lib.miniutils as miniutils


 class IOSNightlyJob:
@ -44,8 +43,6 @@ class IOSNightlyJob:
            props_dict["ios_arch"] = self.variant
            props_dict["ios_platform"] = ios_definitions.get_platform(self.variant)
            props_dict["name"] = self.gen_job_name()
-            props_dict["use_metal"] = miniutils.quote(str(int(True)))
-            props_dict["use_coreml"] = miniutils.quote(str(int(True)))

        template_name = "_".join([
            "binary",
--- a/.circleci/cimodel/data/windows_build_definitions.py
+++ b/.circleci/cimodel/data/windows_build_definitions.py
@ -1,5 +1,5 @@
+import cimodel.data.simple.util.branch_filters
 import cimodel.lib.miniutils as miniutils
-from cimodel.data.simple.util.branch_filters import gen_filter_dict, RC_PATTERN, NON_PR_BRANCH_LIST
 from cimodel.data.simple.util.versions import CudaVersion


@ -10,19 +10,13 @@ class WindowsJob:
        vscode_spec,
        cuda_version,
        force_on_cpu=False,
-        multi_gpu=False,
-        master_only=False,
-        nightly_only=False,
-        master_and_nightly=False
+        master_only_pred=lambda job: job.vscode_spec.year != 2019,
    ):
        self.test_index = test_index
        self.vscode_spec = vscode_spec
        self.cuda_version = cuda_version
        self.force_on_cpu = force_on_cpu
-        self.multi_gpu = multi_gpu
-        self.master_only = master_only
-        self.nightly_only = nightly_only
-        self.master_and_nightly = master_and_nightly
+        self.master_only_pred = master_only_pred

    def gen_tree(self):

@ -31,22 +25,17 @@ class WindowsJob:
            base_phase if self.test_index is None else base_phase + str(self.test_index)
        )

-        key_parts = ["pytorch", "windows", base_phase]
-        if self.multi_gpu:
-            key_parts.append('multigpu')
-        key_name = "_".join(key_parts)
+        key_name = "_".join(["pytorch", "windows", base_phase])

        cpu_forcing_name_parts = ["on", "cpu"] if self.force_on_cpu else []

        target_arch = self.cuda_version.render_dots() if self.cuda_version else "cpu"

-        python_version = "3.8"
-
        base_name_parts = [
            "pytorch",
            "windows",
            self.vscode_spec.render(),
-            "py" + python_version.replace(".", ""),
+            "py36",
            target_arch,
        ]

@ -58,7 +47,7 @@ class WindowsJob:
            self.cudnn_version = 8 if self.cuda_version.major == 11 else 7

        arch_env_elements = (
-            ["cuda" + str(self.cuda_version.major) + "." + str(self.cuda_version.minor)]
+            ["cuda" + str(self.cuda_version.major), "cudnn" + str(self.cudnn_version)]
            if self.cuda_version
            else ["cpu"]
        )
@ -67,53 +56,40 @@ class WindowsJob:
            ["pytorch", "win"]
            + self.vscode_spec.get_elements()
            + arch_env_elements
-            + ["py" + python_version.split(".")[0]]
+            + ["py3"]
        )

        is_running_on_cuda = bool(self.cuda_version) and not self.force_on_cpu

-        if self.multi_gpu:
-            props_dict = {"requires": prerequisite_jobs}
-        else:
-            props_dict = {
-                "build_environment": build_environment_string,
-                "python_version": miniutils.quote(python_version),
-                "vs_version": miniutils.quote("16.8.6"),
-                "vc_version": miniutils.quote(self.vscode_spec.dotted_version()),
-                "vc_year": miniutils.quote(str(self.vscode_spec.year)),
-                "vc_product": self.vscode_spec.get_product(),
-                "use_cuda": miniutils.quote(str(int(is_running_on_cuda))),
-                "requires": prerequisite_jobs,
-            }
+        props_dict = {
+            "build_environment": build_environment_string,
+            "python_version": miniutils.quote("3.6"),
+            "vc_version": miniutils.quote(self.vscode_spec.dotted_version()),
+            "vc_year": miniutils.quote(str(self.vscode_spec.year)),
+            "vc_product": self.vscode_spec.get_product(),
+            "use_cuda": miniutils.quote(str(int(is_running_on_cuda))),
+            "requires": prerequisite_jobs,
+        }

-        if self.master_only:
+        if self.master_only_pred(self):
            props_dict[
                "filters"
-            ] = gen_filter_dict()
-        elif self.nightly_only:
-            props_dict[
-                "filters"
-            ] = gen_filter_dict(branches_list=["nightly"], tags_list=RC_PATTERN)
-        elif self.master_and_nightly:
-            props_dict[
-                "filters"
-            ] = gen_filter_dict(branches_list=NON_PR_BRANCH_LIST + ["nightly"], tags_list=RC_PATTERN)
+            ] = cimodel.data.simple.util.branch_filters.gen_filter_dict()

        name_parts = base_name_parts + cpu_forcing_name_parts + [numbered_phase]

-        if not self.multi_gpu:
-            if base_phase == "test":
-                test_name = "-".join(["pytorch", "windows", numbered_phase])
-                props_dict["test_name"] = test_name
+        if base_phase == "test":
+            test_name = "-".join(["pytorch", "windows", numbered_phase])
+            props_dict["test_name"] = test_name

-                if is_running_on_cuda:
-                    props_dict["executor"] = "windows-with-nvidia-gpu"
+            if is_running_on_cuda:
+                props_dict["executor"] = "windows-with-nvidia-gpu"

-            props_dict["cuda_version"] = (
-                miniutils.quote(str(self.cuda_version))
-                if self.cuda_version
-                else "cpu"
-            )
+        props_dict["cuda_version"] = (
+            miniutils.quote(str(self.cuda_version))
+            if self.cuda_version
+            else "cpu"
+        )

        props_dict["name"] = "_".join(name_parts)

@ -132,7 +108,7 @@ class VcSpec:
        return [self.prefixed_year()] + self.version_elements

    def get_product(self):
-        return "BuildTools"
+        return "Community" if self.year == 2019 else "BuildTools"

    def dotted_version(self):
        return ".".join(self.version_elements)
@ -143,16 +119,28 @@ class VcSpec:
    def render(self):
        return "_".join(self.get_elements())

+def FalsePred(_):
+    return False
+
+def TruePred(_):
+    return True
+
 _VC2019 = VcSpec(2019)

 WORKFLOW_DATA = [
-    # VS2019 CUDA-10.2
-    WindowsJob(None, _VC2019, CudaVersion(10, 2), master_only=True),
-    # VS2019 CUDA-10.2 force on cpu
-    WindowsJob(1, _VC2019, CudaVersion(10, 2), force_on_cpu=True, master_only=True),
-
-    # TODO: This test is disabled due to https://github.com/pytorch/pytorch/issues/59724
-    # WindowsJob('_azure_multi_gpu', _VC2019, CudaVersion(11, 1), multi_gpu=True, master_and_nightly=True),
+    # VS2019 CUDA-10.1
+    WindowsJob(None, _VC2019, CudaVersion(10, 1)),
+    WindowsJob(1, _VC2019, CudaVersion(10, 1)),
+    WindowsJob(2, _VC2019, CudaVersion(10, 1)),
+    # VS2019 CUDA-11.1
+    WindowsJob(None, _VC2019, CudaVersion(11, 1)),
+    WindowsJob(1, _VC2019, CudaVersion(11, 1), master_only_pred=TruePred),
+    WindowsJob(2, _VC2019, CudaVersion(11, 1), master_only_pred=TruePred),
+    # VS2019 CPU-only
+    WindowsJob(None, _VC2019, None),
+    WindowsJob(1, _VC2019, None, master_only_pred=TruePred),
+    WindowsJob(2, _VC2019, None, master_only_pred=TruePred),
+    WindowsJob(1, _VC2019, CudaVersion(10, 1), force_on_cpu=True, master_only_pred=TruePred),
 ]


--- a/.circleci/config.yml
+++ b/.circleci/config.yml
--- a/.circleci/docker/README.md
+++ b/.circleci/docker/README.md
@ -12,20 +12,8 @@ each image as the `BUILD_ENVIRONMENT` environment variable.

 See `build.sh` for valid build environments (it's the giant switch).

-Docker builds are now defined with `.circleci/cimodel/data/simple/docker_definitions.py`
-
 ## Contents

 * `build.sh` -- dispatch script to launch all builds
 * `common` -- scripts used to execute individual Docker build stages
 * `ubuntu-cuda` -- Dockerfile for Ubuntu image with CUDA support for nvidia-docker
-
-## Usage
-
-```bash
-# Build a specific image
-./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
-
-# Set flags (see build.sh) and build image
-sudo bash -c 'PROTOBUF=1 ./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
-```
--- a/.circleci/docker/android/build.gradle
+++ b/.circleci/docker/android/build.gradle
@ -20,8 +20,10 @@ buildscript {
    }

    dependencies {
-        classpath 'com.android.tools.build:gradle:4.1.2'
-        classpath 'com.vanniktech:gradle-maven-publish-plugin:0.14.2'
+        classpath 'com.android.tools.build:gradle:3.3.2'
+        classpath "com.jfrog.bintray.gradle:gradle-bintray-plugin:1.8.0"
+        classpath "com.github.dcendents:android-maven-gradle-plugin:2.1"
+        classpath "org.jfrog.buildinfo:build-info-extractor-gradle:4.9.8"
    }
 }

--- a/.circleci/docker/build.sh
+++ b/.circleci/docker/build.sh
@ -78,13 +78,11 @@ TRAVIS_DL_URL_PREFIX="https://s3.amazonaws.com/travis-python-archives/binaries/u
 case "$image" in
  pytorch-linux-xenial-py3.8)
    ANACONDA_PYTHON_VERSION=3.8
-    CMAKE_VERSION=3.10.3
    GCC_VERSION=7
    # Do not install PROTOBUF, DB, and VISION as a test
    ;;
  pytorch-linux-xenial-py3.6-gcc5.4)
    ANACONDA_PYTHON_VERSION=3.6
-    CMAKE_VERSION=3.10.3
    GCC_VERSION=5
    PROTOBUF=yes
    DB=yes
@ -93,23 +91,67 @@ case "$image" in
    ;;
  pytorch-linux-xenial-py3.6-gcc7.2)
    ANACONDA_PYTHON_VERSION=3.6
-    CMAKE_VERSION=3.10.3
    GCC_VERSION=7
    # Do not install PROTOBUF, DB, and VISION as a test
    ;;
  pytorch-linux-xenial-py3.6-gcc7)
    ANACONDA_PYTHON_VERSION=3.6
-    CMAKE_VERSION=3.10.3
    GCC_VERSION=7
    PROTOBUF=yes
    DB=yes
    VISION=yes
    ;;
+  pytorch-linux-xenial-cuda9.2-cudnn7-py3-gcc5.4)
+    CUDA_VERSION=9.2
+    CUDNN_VERSION=7
+    ANACONDA_PYTHON_VERSION=3.6
+    GCC_VERSION=5
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    ;;
+  pytorch-linux-xenial-cuda9.2-cudnn7-py3-gcc7)
+    CUDA_VERSION=9.2
+    CUDNN_VERSION=7
+    ANACONDA_PYTHON_VERSION=3.6
+    GCC_VERSION=7
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    ;;
+  pytorch-linux-xenial-cuda10-cudnn7-py3-gcc7)
+    CUDA_VERSION=10.0
+    CUDNN_VERSION=7
+    ANACONDA_PYTHON_VERSION=3.6
+    GCC_VERSION=7
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    ;;
+  pytorch-linux-xenial-cuda10.1-cudnn7-py3-gcc7)
+    CUDA_VERSION=10.1
+    CUDNN_VERSION=7
+    ANACONDA_PYTHON_VERSION=3.6
+    GCC_VERSION=7
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    KATEX=yes
+    ;;
  pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7)
    CUDA_VERSION=10.2
    CUDNN_VERSION=7
    ANACONDA_PYTHON_VERSION=3.6
-    CMAKE_VERSION=3.10.3
+    GCC_VERSION=7
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    KATEX=yes
+    ;;
+  pytorch-linux-xenial-cuda11.0-cudnn8-py3-gcc7)
+    CUDA_VERSION=11.0
+    CUDNN_VERSION=8
+    ANACONDA_PYTHON_VERSION=3.6
    GCC_VERSION=7
    PROTOBUF=yes
    DB=yes
@ -120,18 +162,6 @@ case "$image" in
    CUDA_VERSION=11.1
    CUDNN_VERSION=8
    ANACONDA_PYTHON_VERSION=3.6
-    CMAKE_VERSION=3.10.3
-    GCC_VERSION=7
-    PROTOBUF=yes
-    DB=yes
-    VISION=yes
-    KATEX=yes
-    ;;
-  pytorch-linux-xenial-cuda11.3-cudnn8-py3-gcc7)
-    CUDA_VERSION=11.3.0 # Deviating from major.minor to conform to nvidia's Docker image names
-    CUDNN_VERSION=8
-    ANACONDA_PYTHON_VERSION=3.6
-    CMAKE_VERSION=3.10.3
    GCC_VERSION=7
    PROTOBUF=yes
    DB=yes
@ -141,15 +171,6 @@ case "$image" in
  pytorch-linux-xenial-py3-clang5-asan)
    ANACONDA_PYTHON_VERSION=3.6
    CLANG_VERSION=5.0
-    CMAKE_VERSION=3.10.3
-    PROTOBUF=yes
-    DB=yes
-    VISION=yes
-    ;;
-  pytorch-linux-xenial-py3-clang7-asan)
-    ANACONDA_PYTHON_VERSION=3.6
-    CLANG_VERSION=7
-    CMAKE_VERSION=3.10.3
    PROTOBUF=yes
    DB=yes
    VISION=yes
@ -157,7 +178,6 @@ case "$image" in
  pytorch-linux-xenial-py3-clang7-onnx)
    ANACONDA_PYTHON_VERSION=3.6
    CLANG_VERSION=7
-    CMAKE_VERSION=3.10.3
    PROTOBUF=yes
    DB=yes
    VISION=yes
@ -165,17 +185,16 @@ case "$image" in
  pytorch-linux-xenial-py3-clang5-android-ndk-r19c)
    ANACONDA_PYTHON_VERSION=3.6
    CLANG_VERSION=5.0
-    CMAKE_VERSION=3.10.3
    LLVMDEV=yes
    PROTOBUF=yes
    ANDROID=yes
    ANDROID_NDK_VERSION=r19c
-    GRADLE_VERSION=6.8.3
+    GRADLE_VERSION=4.10.3
+    CMAKE_VERSION=3.7.0
    NINJA_VERSION=1.9.0
    ;;
  pytorch-linux-xenial-py3.6-clang7)
    ANACONDA_PYTHON_VERSION=3.6
-    CMAKE_VERSION=3.10.3
    CLANG_VERSION=7
    PROTOBUF=yes
    DB=yes
@ -187,7 +206,7 @@ case "$image" in
    PROTOBUF=yes
    DB=yes
    VISION=yes
-    VULKAN_SDK_VERSION=1.2.162.1
+    VULKAN_SDK_VERSION=1.2.148.0
    SWIFTSHADER=yes
    ;;
  pytorch-linux-bionic-py3.8-gcc9)
@ -206,11 +225,11 @@ case "$image" in
    DB=yes
    VISION=yes
    ;;
-  pytorch-linux-bionic-cuda10.2-cudnn7-py3.9-gcc7)
+  pytorch-linux-bionic-cuda10.2-cudnn7-py3.8-gcc9)
    CUDA_VERSION=10.2
    CUDNN_VERSION=7
-    ANACONDA_PYTHON_VERSION=3.9
-    GCC_VERSION=7
+    ANACONDA_PYTHON_VERSION=3.8
+    GCC_VERSION=9
    PROTOBUF=yes
    DB=yes
    VISION=yes
@ -223,31 +242,51 @@ case "$image" in
    PROTOBUF=yes
    DB=yes
    VISION=yes
+    KATEX=yes
+    ;;
+  pytorch-linux-bionic-cuda11.0-cudnn8-py3.8-gcc9)
+    CUDA_VERSION=11.0
+    CUDNN_VERSION=8
+    ANACONDA_PYTHON_VERSION=3.8
+    GCC_VERSION=9
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    KATEX=yes
+    ;;
+  pytorch-linux-bionic-cuda11.1-cudnn8-py3.6-gcc9)
+    CUDA_VERSION=11.1
+    CUDNN_VERSION=8
+    ANACONDA_PYTHON_VERSION=3.6
+    GCC_VERSION=9
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    KATEX=yes
+    ;;
+  pytorch-linux-bionic-cuda11.1-cudnn8-py3.8-gcc9)
+    CUDA_VERSION=11.1
+    CUDNN_VERSION=8
+    ANACONDA_PYTHON_VERSION=3.8
+    GCC_VERSION=9
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
+    KATEX=yes
+    ;;
+  pytorch-linux-bionic-rocm3.9-py3.6)
+    ANACONDA_PYTHON_VERSION=3.6
+    PROTOBUF=yes
+    DB=yes
+    VISION=yes
    ROCM_VERSION=3.9
    ;;
-  pytorch-linux-bionic-rocm4.1-py3.6)
+  pytorch-linux-bionic-rocm3.10-py3.6)
    ANACONDA_PYTHON_VERSION=3.6
-    GCC_VERSION=9
    PROTOBUF=yes
    DB=yes
    VISION=yes
-    ROCM_VERSION=4.1
-    ;;
-  pytorch-linux-bionic-rocm4.2-py3.6)
-    ANACONDA_PYTHON_VERSION=3.6
-    GCC_VERSION=9
-    PROTOBUF=yes
-    DB=yes
-    VISION=yes
-    ROCM_VERSION=4.2
-    ;;
-  pytorch-linux-bionic-rocm4.3.1-py3.6)
-    ANACONDA_PYTHON_VERSION=3.6
-    GCC_VERSION=9
-    PROTOBUF=yes
-    DB=yes
-    VISION=yes
-    ROCM_VERSION=4.3.1
+    ROCM_VERSION=3.10
    ;;
  *)
    # Catch-all for builds that are not hardcoded.
@ -255,9 +294,6 @@ case "$image" in
    DB=yes
    VISION=yes
    echo "image '$image' did not match an existing build configuration"
-    if [[ "$image" == *xenial* ]]; then
-      CMAKE_VERSION=3.10.3
-    fi
    if [[ "$image" == *py* ]]; then
      extract_version_from_image_name py ANACONDA_PYTHON_VERSION
    fi
@ -292,7 +328,7 @@ if [ -n "${JENKINS:-}" ]; then
  JENKINS_GID=$(id -g jenkins)
 fi

-tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
+tmp_tag="tmp-$(cat /dev/urandom | tr -dc 'a-z' | fold -w 32 | head -n 1)"

 # Build image
 # TODO: build-arg THRIFT is not turned on for any image, remove it once we confirm
--- a/.circleci/docker/build_docker.sh
+++ b/.circleci/docker/build_docker.sh
@ -46,7 +46,4 @@ trap "docker logout ${registry}" EXIT
 docker push "${image}:${tag}"

 docker save -o "${IMAGE_NAME}:${tag}.tar" "${image}:${tag}"
-
-if [ -z "${DOCKER_SKIP_S3_UPLOAD:-}" ]; then
-  aws s3 cp "${IMAGE_NAME}:${tag}.tar" "s3://ossci-linux-build/pytorch/base/${IMAGE_NAME}:${tag}.tar" --acl public-read
-fi
+aws s3 cp "${IMAGE_NAME}:${tag}.tar" "s3://ossci-linux-build/pytorch/base/${IMAGE_NAME}:${tag}.tar" --acl public-read
--- a/.circleci/docker/centos-rocm/Dockerfile
+++ b/.circleci/docker/centos-rocm/Dockerfile
@ -64,7 +64,6 @@ ENV PATH /opt/rocm/hcc/bin:$PATH
 ENV PATH /opt/rocm/hip/bin:$PATH
 ENV PATH /opt/rocm/opencl/bin:$PATH
 ENV PATH /opt/rocm/llvm/bin:$PATH
-ENV MAGMA_HOME /opt/rocm/magma
 ENV LANG en_US.utf8
 ENV LC_ALL en_US.utf8

--- a/.circleci/docker/common/install_android.sh
+++ b/.circleci/docker/common/install_android.sh
@ -99,7 +99,7 @@ echo "ndk.dir=/opt/ndk" >> $GRADLE_LOCAL_PROPERTIES
 chown -R jenkins /var/lib/jenkins/gradledeps
 chgrp -R jenkins /var/lib/jenkins/gradledeps

-sudo -H -u jenkins $GRADLE_HOME/bin/gradle -Pandroid.useAndroidX=true -p /var/lib/jenkins/gradledeps -g /var/lib/jenkins/.gradle --refresh-dependencies --debug --stacktrace assemble
+sudo -H -u jenkins $GRADLE_HOME/bin/gradle -p /var/lib/jenkins/gradledeps -g /var/lib/jenkins/.gradle --refresh-dependencies --debug --stacktrace assemble

 chown -R jenkins /var/lib/jenkins/.gradle
 chgrp -R jenkins /var/lib/jenkins/.gradle
--- a/.circleci/docker/common/install_base.sh
+++ b/.circleci/docker/common/install_base.sh
@ -77,7 +77,6 @@ install_centos() {
    glog-devel \
    hiredis-devel \
    libstdc++-devel \
-    libsndfile-devel \
    make \
    opencv-devel \
    sudo \
--- a/.circleci/docker/common/install_cmake.sh
+++ b/.circleci/docker/common/install_cmake.sh
@ -4,9 +4,6 @@ set -ex

 [ -n "$CMAKE_VERSION" ]

-# Remove system cmake install so it won't get used instead
-apt-get remove cmake -y
-
 # Turn 3.6.3 into v3.6
 path=$(echo "${CMAKE_VERSION}" | sed -e 's/\([0-9].[0-9]\+\).*/v\1/')
 file="cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz"
--- a/.circleci/docker/common/install_conda.sh
+++ b/.circleci/docker/common/install_conda.sh
@ -69,31 +69,31 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
  }

  # Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README
-  # DO NOT install cmake here as it would install a version newer than 3.10, but
-  # we want to pin to version 3.10.
-  SCIPY_VERSION=1.1.0
-  if [ "$ANACONDA_PYTHON_VERSION" = "3.9" ]; then
+  # DO NOT install cmake here as it would install a version newer than 3.5, but
+  # we want to pin to version 3.5.
+  if [ "$ANACONDA_PYTHON_VERSION" = "3.8" ]; then
    # Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
-    conda_install numpy=1.19.2 astunparse pyyaml mkl mkl-include setuptools cffi future six llvmdev=8.0.0 -c conda-forge
-    SCIPY_VERSION=1.6.0
-  elif [ "$ANACONDA_PYTHON_VERSION" = "3.8" ]; then
-    # Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
-    conda_install numpy=1.18.5 astunparse pyyaml mkl mkl-include setuptools cffi future six llvmdev=8.0.0
+    conda_install numpy=1.18.5 pyyaml mkl mkl-include setuptools cffi future six llvmdev=8.0.0
  elif [ "$ANACONDA_PYTHON_VERSION" = "3.7" ]; then
    # DO NOT install dataclasses if installing python-3.7, since its part of python-3.7 core packages
-    conda_install numpy=1.18.5 astunparse pyyaml mkl mkl-include setuptools cffi future six typing_extensions
+    conda_install numpy=1.18.5 pyyaml mkl mkl-include setuptools cffi future six typing_extensions
  else
-    conda_install numpy=1.18.5 astunparse pyyaml mkl mkl-include setuptools cffi future six dataclasses typing_extensions
+    conda_install numpy=1.18.5 pyyaml mkl mkl-include setuptools cffi future six dataclasses typing_extensions
  fi
-
-  if [[ "$CUDA_VERSION" == 10.2* ]]; then
+  if [[ "$CUDA_VERSION" == 9.2* ]]; then
+    conda_install magma-cuda92 -c pytorch
+  elif [[ "$CUDA_VERSION" == 10.0* ]]; then
+    conda_install magma-cuda100 -c pytorch
+  elif [[ "$CUDA_VERSION" == 10.1* ]]; then
+    conda_install magma-cuda101 -c pytorch
+  elif [[ "$CUDA_VERSION" == 10.2* ]]; then
    conda_install magma-cuda102 -c pytorch
  elif [[ "$CUDA_VERSION" == 11.0* ]]; then
    conda_install magma-cuda110 -c pytorch
  elif [[ "$CUDA_VERSION" == 11.1* ]]; then
    conda_install magma-cuda111 -c pytorch
-  elif [[ "$CUDA_VERSION" == 11.3* ]]; then
-    conda_install magma-cuda113 -c pytorch
+  elif [[ "$CUDA_VERSION" == 11.2* ]]; then
+    conda_install magma-cuda112 -c pytorch
  fi

  # TODO: This isn't working atm
@ -103,27 +103,20 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
  # TODO: Why is scipy pinned
  # Pin MyPy version because new errors are likely to appear with each release
  # Pin hypothesis to avoid flakiness: https://github.com/pytorch/pytorch/issues/31136
-  # Pin coverage so we can use COVERAGE_RCFILE
  as_jenkins pip install --progress-bar off pytest \
-    scipy==$SCIPY_VERSION \
+    scipy==1.1.0 \
    scikit-image \
+    librosa>=0.6.2 \
    psutil \
+    numba \
+    llvmlite \
    unittest-xml-reporting \
    boto3==1.16.34 \
-    coverage==5.5 \
+    coverage \
    hypothesis==4.53.2 \
-    expecttest==0.1.3 \
-    mypy==0.812 \
+    mypy==0.770 \
    tb-nightly

-  # Install numba only on python-3.8 or below
-  # For numba issue see https://github.com/pytorch/pytorch/issues/51511
-  if [[ $(python -c "import sys; print(int(sys.version_info < (3, 9)))") == "1" ]]; then
-    as_jenkins pip install --progress-bar off numba librosa>=0.6.2
-  else
-    as_jenkins pip install --progress-bar off numba==0.49.0 librosa>=0.6.2
-  fi
-
  # Update scikit-learn to a python-3.8 compatible version
  if [[ $(python -c "import sys; print(int(sys.version_info >= (3, 8)))") == "1" ]]; then
    as_jenkins pip install --progress-bar off -U scikit-learn
--- a/.circleci/docker/common/install_db.sh
+++ b/.circleci/docker/common/install_db.sh
@ -2,6 +2,23 @@

 set -ex

+# This function installs protobuf 2.6
+install_protobuf_26() {
+  pb_dir="/usr/temp_pb_install_dir"
+  mkdir -p $pb_dir
+
+  # On the nvidia/cuda:9-cudnn7-devel-centos7 image we need this symlink or
+  # else it will fail with
+  #   g++: error: ./../lib64/crti.o: No such file or directory
+  ln -s /usr/lib64 "$pb_dir/lib64"
+
+  curl -LO "https://github.com/google/protobuf/releases/download/v2.6.1/protobuf-2.6.1.tar.gz"
+  tar -xvz -C "$pb_dir" --strip-components 1 -f protobuf-2.6.1.tar.gz
+  pushd "$pb_dir" && ./configure && make && make check && sudo make install && sudo ldconfig
+  popd
+  rm -rf $pb_dir
+}
+
 install_ubuntu() {
  apt-get update
  apt-get install -y --no-install-recommends \
--- a/.circleci/docker/common/install_nccl.sh
+++ b/.circleci/docker/common/install_nccl.sh
@ -0,0 +1,4 @@
+#!/bin/bash
+
+sudo apt-get -qq update
+sudo apt-get -qq install --allow-downgrades --allow-change-held-packages libnccl-dev=2.5.6-1+cuda10.1 libnccl2=2.5.6-1+cuda10.1
--- a/.circleci/docker/common/install_openmpi.sh
+++ b/.circleci/docker/common/install_openmpi.sh
@ -1,10 +1,4 @@
 #!/bin/bash

 sudo apt-get update
-# also install ssh to avoid error of:
-# --------------------------------------------------------------------------
-# The value of the MCA parameter "plm_rsh_agent" was set to a path
-# that could not be found:
-#   plm_rsh_agent: ssh : rsh
-sudo apt-get install -y ssh
 sudo apt-get install -y --allow-downgrades --allow-change-held-packages openmpi-bin libopenmpi-dev
--- a/.circleci/docker/common/install_openssl.sh
+++ b/.circleci/docker/common/install_openssl.sh
@ -1,14 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-OPENSSL=openssl-1.1.1k
-
-wget -q -O "${OPENSSL}.tar.gz" "https://www.openssl.org/source/${OPENSSL}.tar.gz"
-tar xf "${OPENSSL}.tar.gz"
-cd "${OPENSSL}"
-./config --prefix=/opt/openssl -d '-Wl,--enable-new-dtags,-rpath,$(LIBRPATH)'
-# NOTE: opensl errors out when built with the -j option
-make install_sw
-cd ..
-rm -rf "${OPENSSL}"
--- a/.circleci/docker/common/install_protobuf.sh
+++ b/.circleci/docker/common/install_protobuf.sh
@ -2,8 +2,8 @@

 set -ex

-# This function installs protobuf 3.17
-install_protobuf_317() {
+# This function installs protobuf 2.6
+install_protobuf_26() {
  pb_dir="/usr/temp_pb_install_dir"
  mkdir -p $pb_dir

@ -12,32 +12,37 @@ install_protobuf_317() {
  #   g++: error: ./../lib64/crti.o: No such file or directory
  ln -s /usr/lib64 "$pb_dir/lib64"

-  curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v3.17.3/protobuf-all-3.17.3.tar.gz"
-  tar -xvz -C "$pb_dir" --strip-components 1 -f protobuf-all-3.17.3.tar.gz
-  # -j2 to balance memory usage and speed.
-  # naked `-j` seems to use too much memory.
-  pushd "$pb_dir" && ./configure && make -j2 && make -j2 check && sudo make -j2 install && sudo ldconfig
+  curl -LO "https://github.com/google/protobuf/releases/download/v2.6.1/protobuf-2.6.1.tar.gz"
+  tar -xvz -C "$pb_dir" --strip-components 1 -f protobuf-2.6.1.tar.gz
+  pushd "$pb_dir" && ./configure && make && make check && sudo make install && sudo ldconfig
  popd
  rm -rf $pb_dir
 }

 install_ubuntu() {
-  # Ubuntu 14.04 has cmake 2.8.12 as the default option, so we will
+  # Ubuntu 14.04 ships with protobuf 2.5, but ONNX needs protobuf >= 2.6
+  # so we install that here if on 14.04
+  # Ubuntu 14.04 also has cmake 2.8.12 as the default option, so we will
  # install cmake3 here and use cmake3.
  apt-get update
  if [[ "$UBUNTU_VERSION" == 14.04 ]]; then
    apt-get install -y --no-install-recommends cmake3
+    install_protobuf_26
+  else
+    apt-get install -y --no-install-recommends \
+            libprotobuf-dev \
+            protobuf-compiler
  fi

  # Cleanup
  apt-get autoclean && apt-get clean
  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
-
-  install_protobuf_317
 }

 install_centos() {
-  install_protobuf_317
+  # Centos7 ships with protobuf 2.5, but ONNX needs protobuf >= 2.6
+  # so we always install install that here
+  install_protobuf_26
 }

 # Install base packages depending on the base OS
--- a/.circleci/docker/common/install_rocm.sh
+++ b/.circleci/docker/common/install_rocm.sh
@ -4,19 +4,12 @@ set -ex

 install_magma() {
    # "install" hipMAGMA into /opt/rocm/magma by copying after build
-    git clone https://bitbucket.org/icl/magma.git -b magma_ctrl_launch_bounds
+    git clone https://bitbucket.org/icl/magma.git -b hipMAGMA
    pushd magma
-    # The branch "magma_ctrl_launch_bounds" is having a fix over the below commit, so keeping the below comment for reference.
-    #git checkout 878b1ce02e9cfe4a829be22c8f911e9c0b6bd88f
-    # Work around non-asii characters in certain magma sources; remove this after upstream magma fixes this.
-    perl -i.bak -pe 's/[^[:ascii:]]//g' sparse/control/magma_zfree.cpp
-    perl -i.bak -pe 's/[^[:ascii:]]//g' sparse/control/magma_zsolverinfo.cpp
-    cp make.inc-examples/make.inc.hip-gcc-mkl make.inc
+    cp make.inc-examples/make.inc.hip-mkl-gcc make.inc
    echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc
    echo 'LIB += -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib -Wl,--rpath,$(MKLROOT)/lib -Wl,--rpath,/opt/rocm/magma/lib' >> make.inc
-    echo 'DEVCCFLAGS += --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906 --amdgpu-target=gfx908 --gpu-max-threads-per-block=256' >> make.inc
-    # hipcc with openmp flag may cause isnan() on __device__ not to be found; depending on context, compiler may attempt to match with host definition
-    sed -i 's/^FOPENMP/#FOPENMP/g' make.inc
+    echo 'DEVCCFLAGS += --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906 --amdgpu-target=gfx908' >> make.inc
    export PATH="${PATH}:/opt/rocm/bin"
    make -f make.gen.hipMAGMA -j $(nproc)
    make lib/libmagma.so -j $(nproc) MKLROOT=/opt/conda
@ -25,10 +18,6 @@ install_magma() {
    mv magma /opt/rocm
 }

-ver() {
-    printf "%3d%03d%03d%03d" $(echo "$1" | tr '.' ' ');
-}
-
 install_ubuntu() {
    apt-get update
    if [[ $UBUNTU_VERSION == 18.04 ]]; then
@ -42,14 +31,9 @@ install_ubuntu() {
    apt-get install -y libc++1
    apt-get install -y libc++abi1

-    ROCM_REPO="ubuntu"
-    if [[ $(ver $ROCM_VERSION) -lt $(ver 4.2) ]]; then
-        ROCM_REPO="xenial"
-    fi
-
    # Add rocm repository
    wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
-    echo "deb [arch=amd64] http://repo.radeon.com/rocm/apt/${ROCM_VERSION} ${ROCM_REPO} main" > /etc/apt/sources.list.d/rocm.list
+    echo "deb [arch=amd64] http://repo.radeon.com/rocm/apt/${ROCM_VERSION} xenial main" > /etc/apt/sources.list.d/rocm.list
    apt-get update --allow-insecure-repositories

    DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
--- a/.circleci/docker/common/install_vision.sh
+++ b/.circleci/docker/common/install_vision.sh
@ -2,6 +2,23 @@

 set -ex

+# This function installs protobuf 2.6
+install_protobuf_26() {
+  pb_dir="/usr/temp_pb_install_dir"
+  mkdir -p $pb_dir
+
+  # On the nvidia/cuda:9-cudnn7-devel-centos7 image we need this symlink or
+  # else it will fail with
+  #   g++: error: ./../lib64/crti.o: No such file or directory
+  ln -s /usr/lib64 "$pb_dir/lib64"
+
+  curl -LO "https://github.com/google/protobuf/releases/download/v2.6.1/protobuf-2.6.1.tar.gz"
+  tar -xvz -C "$pb_dir" --strip-components 1 -f protobuf-2.6.1.tar.gz
+  pushd "$pb_dir" && ./configure && make && make check && sudo make install && sudo ldconfig
+  popd
+  rm -rf $pb_dir
+}
+
 install_ubuntu() {
  apt-get update
  apt-get install -y --no-install-recommends \
--- a/.circleci/docker/common/install_vulkan_sdk.sh
+++ b/.circleci/docker/common/install_vulkan_sdk.sh
@ -8,17 +8,16 @@ retry () {
    $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
 }

+_https_amazon_aws=https://ossci-android.s3.amazonaws.com
+
 _vulkansdk_dir=/var/lib/jenkins/vulkansdk
+mkdir -p $_vulkansdk_dir
 _tmp_vulkansdk_targz=/tmp/vulkansdk.tar.gz
+curl --silent --show-error --location --fail --retry 3 \
+  --output "$_tmp_vulkansdk_targz" "$_https_amazon_aws/vulkansdk-linux-x86_64-${VULKAN_SDK_VERSION}.tar.gz"

-curl \
-  --silent \
-  --show-error \
-  --location \
-  --fail \
-  --retry 3 \
-  --output "${_tmp_vulkansdk_targz}" "https://ossci-android.s3.amazonaws.com/vulkansdk-linux-x86_64-${VULKAN_SDK_VERSION}.tar.gz"
+tar -C "$_vulkansdk_dir" -xzf "$_tmp_vulkansdk_targz" --strip-components 1

-mkdir -p "${_vulkansdk_dir}"
-tar -C "${_vulkansdk_dir}" -xzf "${_tmp_vulkansdk_targz}" --strip-components 1
-rm -rf "${_tmp_vulkansdk_targz}"
+export VULKAN_SDK="$_vulkansdk_dir/"
+
+rm "$_tmp_vulkansdk_targz"
--- a/.circleci/docker/ubuntu-cuda/Dockerfile
+++ b/.circleci/docker/ubuntu-cuda/Dockerfile
@ -61,16 +61,6 @@ RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
 RUN rm install_vision.sh
 ENV INSTALLED_VISION ${VISION}

-ADD ./common/install_openssl.sh install_openssl.sh
-ENV OPENSSL_ROOT_DIR /opt/openssl
-RUN bash ./install_openssl.sh
-
-# (optional) Install non-default CMake version
-ARG CMAKE_VERSION
-ADD ./common/install_cmake.sh install_cmake.sh
-RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
-RUN rm install_cmake.sh
-
 # Install ccache/sccache (do this last, so we get priority in PATH)
 ADD ./common/install_cache.sh install_cache.sh
 ENV PATH /opt/cache/bin:$PATH
@ -82,6 +72,11 @@ ADD ./common/install_jni.sh install_jni.sh
 ADD ./java/jni.h jni.h
 RUN bash ./install_jni.sh && rm install_jni.sh

+# Install NCCL for when CUDA is version 10.1
+ADD ./common/install_nccl.sh install_nccl.sh
+RUN if [ "${CUDA_VERSION}" = 10.1 ]; then bash ./install_nccl.sh; fi
+RUN rm install_nccl.sh
+
 # Install Open MPI for CUDA
 ADD ./common/install_openmpi.sh install_openmpi.sh
 RUN if [ -n "${CUDA_VERSION}" ]; then bash install_openmpi.sh; fi
--- a/.circleci/docker/ubuntu-rocm/Dockerfile
+++ b/.circleci/docker/ubuntu-rocm/Dockerfile
@ -27,11 +27,6 @@ ARG ANACONDA_PYTHON_VERSION
 ADD ./common/install_conda.sh install_conda.sh
 RUN bash ./install_conda.sh && rm install_conda.sh

-# Install gcc
-ARG GCC_VERSION
-ADD ./common/install_gcc.sh install_gcc.sh
-RUN bash ./install_gcc.sh && rm install_gcc.sh
-
 # (optional) Install protobuf for ONNX
 ARG PROTOBUF
 ADD ./common/install_protobuf.sh install_protobuf.sh
--- a/.circleci/docker/ubuntu/Dockerfile
+++ b/.circleci/docker/ubuntu/Dockerfile
@ -106,10 +106,6 @@ ADD ./common/install_ninja.sh install_ninja.sh
 RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
 RUN rm install_ninja.sh

-ADD ./common/install_openssl.sh install_openssl.sh
-RUN bash ./install_openssl.sh
-ENV OPENSSL_ROOT_DIR /opt/openssl
-
 # Install ccache/sccache (do this last, so we get priority in PATH)
 ADD ./common/install_cache.sh install_cache.sh
 ENV PATH /opt/cache/bin:$PATH
--- a/.circleci/ecr_gc_docker/Dockerfile
+++ b/.circleci/ecr_gc_docker/Dockerfile
@ -1,10 +1,10 @@
-FROM ubuntu:18.04
+FROM ubuntu:16.04

-RUN apt-get update && apt-get install -y python3-pip git && rm -rf /var/lib/apt/lists/* /var/log/dpkg.log
+RUN apt-get update && apt-get install -y python-pip git && rm -rf /var/lib/apt/lists/* /var/log/dpkg.log

 ADD requirements.txt /requirements.txt

-RUN pip3 install -r /requirements.txt
+RUN pip install -r /requirements.txt

 ADD gc.py /usr/bin/gc.py

--- a/.circleci/ecr_gc_docker/docker_hub.py
+++ b/.circleci/ecr_gc_docker/docker_hub.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python

 from collections import namedtuple

--- a/.circleci/ecr_gc_docker/gc.py
+++ b/.circleci/ecr_gc_docker/gc.py
@ -1,11 +1,11 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python

 import argparse
-import boto3
 import datetime
+import boto3
 import pytz
-import re
 import sys
+import re


 def save_to_s3(project, data):
@ -148,12 +148,9 @@ def chunks(chunkable, n):
    """ Yield successive n-sized chunks from l.
    """
    for i in range(0, len(chunkable), n):
-        yield chunkable[i: i + n]
-
+        yield chunkable[i : i + n]

 SHA_PATTERN = re.compile(r'^[0-9a-f]{40}$')
-
-
 def looks_like_git_sha(tag):
    """Returns a boolean to check if a tag looks like a git sha

@ -162,7 +159,6 @@ def looks_like_git_sha(tag):
    """
    return re.match(SHA_PATTERN, tag) is not None

-
 stable_window_tags = []
 for repo in repos(client):
    repositoryName = repo["repositoryName"]
--- a/.circleci/generate_config_yml.py
+++ b/.circleci/generate_config_yml.py
@ -13,8 +13,10 @@ from collections import namedtuple
 import cimodel.data.binary_build_definitions as binary_build_definitions
 import cimodel.data.pytorch_build_definitions as pytorch_build_definitions
 import cimodel.data.simple.android_definitions
+import cimodel.data.simple.bazel_definitions
 import cimodel.data.simple.binary_smoketest
 import cimodel.data.simple.docker_definitions
+import cimodel.data.simple.ge_config_tests
 import cimodel.data.simple.ios_definitions
 import cimodel.data.simple.macos_definitions
 import cimodel.data.simple.mobile_definitions
@ -78,52 +80,6 @@ class Header(object):
        for line in filter(None, lines):
            output_filehandle.write(line + "\n")

-def filter_master_only_jobs(items):
-    def _for_all_items(items, functor) -> None:
-        if isinstance(items, list):
-            for item in items:
-                _for_all_items(item, functor)
-        if isinstance(items, dict) and len(items) == 1:
-            item_type, item = next(iter(items.items()))
-            functor(item_type, item)
-
-    def _is_master_item(item):
-        filters = item.get('filters', None)
-        branches = filters.get('branches', None) if filters is not None else None
-        branches_only = branches.get('only', None) if branches is not None else None
-        return 'master' in branches_only if branches_only is not None else False
-
-    master_deps = set()
-
-    def _save_requires_if_master(item_type, item):
-        requires = item.get('requires', None)
-        item_name = item.get("name", None)
-        if not isinstance(requires, list):
-            return
-        if _is_master_item(item) or item_name in master_deps:
-            master_deps.update([n.strip('"') for n in requires])
-
-    def _do_filtering(items):
-        if isinstance(items, list):
-            rc = [_do_filtering(item) for item in items]
-            return [item for item in rc if len(item if item is not None else []) > 0]
-        assert isinstance(items, dict) and len(items) == 1
-        item_type, item = next(iter(items.items()))
-        item_name = item.get("name", None)
-        item_name = item_name.strip('"') if item_name is not None else None
-        if not _is_master_item(item) and item_name not in master_deps:
-            return None
-        if 'filters' in item:
-            item = item.copy()
-            item.pop('filters')
-        return {item_type: item}
-
-    # Scan of dependencies twice to pick up nested required jobs
-    # I.e. jobs depending on jobs that master-only job depend on
-    _for_all_items(items, _save_requires_if_master)
-    _for_all_items(items, _save_requires_if_master)
-    return _do_filtering(items)
-

 def gen_build_workflows_tree():
    build_workflows_functions = [
@ -133,6 +89,8 @@ def gen_build_workflows_tree():
        cimodel.data.simple.android_definitions.get_workflow_jobs,
        cimodel.data.simple.ios_definitions.get_workflow_jobs,
        cimodel.data.simple.mobile_definitions.get_workflow_jobs,
+        cimodel.data.simple.ge_config_tests.get_workflow_jobs,
+        cimodel.data.simple.bazel_definitions.get_workflow_jobs,
        cimodel.data.simple.binary_smoketest.get_workflow_jobs,
        cimodel.data.simple.nightly_ios.get_workflow_jobs,
        cimodel.data.simple.nightly_android.get_workflow_jobs,
@ -141,8 +99,6 @@ def gen_build_workflows_tree():
        binary_build_definitions.get_post_upload_jobs,
        binary_build_definitions.get_binary_smoke_test_jobs,
    ]
-    build_jobs = [f() for f in build_workflows_functions]
-    master_build_jobs = filter_master_only_jobs(build_jobs)

    binary_build_functions = [
        binary_build_definitions.get_binary_build_jobs,
@ -150,11 +106,6 @@ def gen_build_workflows_tree():
        binary_build_definitions.get_nightly_uploads,
    ]

-    slow_gradcheck_jobs = [
-        pytorch_build_definitions.get_workflow_jobs,
-        cimodel.data.simple.docker_definitions.get_workflow_jobs,
-    ]
-
    return {
        "workflows": {
            "binary_builds": {
@ -163,15 +114,7 @@ def gen_build_workflows_tree():
            },
            "build": {
                "when": r"<< pipeline.parameters.run_build >>",
-                "jobs": build_jobs,
-            },
-            "master_build": {
-                "when": r"<< pipeline.parameters.run_master_build >>",
-                "jobs": master_build_jobs,
-            },
-            "slow_gradcheck_build": {
-                "when": r"<< pipeline.parameters.run_slow_gradcheck_build >>",
-                "jobs": [f(only_slow_gradcheck=True) for f in slow_gradcheck_jobs],
+                "jobs": [f() for f in build_workflows_functions]
            },
        }
    }
@ -196,7 +139,6 @@ YAML_SOURCES = [
    File("job-specs/docker_jobs.yml"),
    Header("Workflows"),
    Treegen(gen_build_workflows_tree, 0),
-    File("workflows/workflows-scheduled-ci.yml"),
    File("workflows/workflows-ecr-gc.yml"),
    File("workflows/workflows-promote.yml"),
 ]
--- a/.circleci/regenerate.ps1
+++ b/.circleci/regenerate.ps1
@ -1,5 +0,0 @@
-cd $PSScriptRoot;
-$NewFile = New-TemporaryFile;
-python generate_config_yml.py > $NewFile.name
-(Get-Content $NewFile.name -Raw).TrimEnd().Replace("`r`n","`n") | Set-Content config.yml -Force
-Remove-Item $NewFile.name
--- a/.circleci/regenerate.sh
+++ b/.circleci/regenerate.sh
@ -1,17 +1,8 @@
-#!/bin/bash -e
+#!/bin/bash -xe

 # Allows this script to be invoked from any directory:
-cd "$(dirname "$0")"
-
-UNCOMMIT_CHANGE=$(git status -s | grep " config.yml" | wc -l | xargs)
-if [[ $UNCOMMIT_CHANGE != 0 ]]; then
-    OLD_FILE=$(mktemp)
-    cp config.yml "$OLD_FILE"
-    echo "Uncommitted change detected in .circleci/config.yml"
-    echo "It has been backed up to $OLD_FILE"
-fi
+cd $(dirname "$0")

 NEW_FILE=$(mktemp)
-./generate_config_yml.py > "$NEW_FILE"
-cp "$NEW_FILE" config.yml
-echo "New config generated in .circleci/config.yml"
+./generate_config_yml.py > $NEW_FILE
+cp $NEW_FILE config.yml
--- a/.circleci/scripts/binary_checkout.sh
+++ b/.circleci/scripts/binary_checkout.sh
@ -55,13 +55,13 @@ else
  echo "Can't tell what to checkout"
  exit 1
 fi
-retry git submodule update --init --recursive --jobs 0
+retry git submodule update --init --recursive
 echo "Using Pytorch from "
 git --no-pager log --max-count 1
 popd

 # Clone the Builder master repo
-retry git clone -q https://github.com/pytorch/builder.git -b release/1.10 "$BUILDER_ROOT"
+retry git clone -q https://github.com/pytorch/builder.git "$BUILDER_ROOT"
 pushd "$BUILDER_ROOT"
 echo "Using builder from "
 git --no-pager log --max-count 1
--- a/.circleci/scripts/binary_ios_build.sh
+++ b/.circleci/scripts/binary_ios_build.sh
@ -15,14 +15,14 @@ export PATH="~/anaconda/bin:${PATH}"
 source ~/anaconda/bin/activate

 # Install dependencies
-conda install numpy ninja pyyaml mkl mkl-include setuptools cmake cffi requests typing_extensions --yes
+conda install numpy ninja pyyaml mkl mkl-include setuptools cmake cffi requests --yes
 conda install -c conda-forge valgrind --yes
 export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}

 # sync submodules
 cd ${PROJ_ROOT}
 git submodule sync
-git submodule update --init --recursive --jobs 0
+git submodule update --init --recursive

 # run build script
 chmod a+x ${PROJ_ROOT}/scripts/build_ios.sh
@ -31,12 +31,8 @@ cat ${PROJ_ROOT}/scripts/build_ios.sh
 echo "########################################################"
 echo "IOS_ARCH: ${IOS_ARCH}"
 echo "IOS_PLATFORM: ${IOS_PLATFORM}"
-echo "USE_PYTORCH_METAL: ${USE_PYTORCH_METAL}"
-echo "USE_COREML_DELEGATE: ${USE_COREML_DELEGATE}"
 export IOS_ARCH=${IOS_ARCH}
 export IOS_PLATFORM=${IOS_PLATFORM}
-export USE_PYTORCH_METAL=${USE_PYTORCH_METAL}
-export USE_COREML_DELEGATE=${USE_COREML_DELEGATE}
 unbuffer ${PROJ_ROOT}/scripts/build_ios.sh 2>&1 | ts

 #store the binary
--- a/.circleci/scripts/binary_ios_test.sh
+++ b/.circleci/scripts/binary_ios_test.sh
@ -8,23 +8,22 @@ cd ${PROJ_ROOT}/ios/TestApp
 # install fastlane
 sudo gem install bundler && bundle install
 # install certificates
-echo "${IOS_CERT_KEY_2022}" >> cert.txt
+echo "${IOS_CERT_KEY}" >> cert.txt
 base64 --decode cert.txt -o Certificates.p12
 rm cert.txt
-bundle exec fastlane install_root_cert
-bundle exec fastlane install_dev_cert
+bundle exec fastlane install_cert
 # install the provisioning profile
-PROFILE=PyTorch_CI_2022.mobileprovision
+PROFILE=PyTorch_CI_2021.mobileprovision
 PROVISIONING_PROFILES=~/Library/MobileDevice/Provisioning\ Profiles
 mkdir -pv "${PROVISIONING_PROFILES}"
 cd "${PROVISIONING_PROFILES}"
-echo "${IOS_SIGN_KEY_2022}" >> cert.txt
+echo "${IOS_SIGN_KEY}" >> cert.txt
 base64 --decode cert.txt -o ${PROFILE}
 rm cert.txt
 # run the ruby build script
 if ! [ -x "$(command -v xcodebuild)" ]; then
    echo 'Error: xcodebuild is not installed.'
    exit 1
-fi
-PROFILE=PyTorch_CI_2022
-ruby ${PROJ_ROOT}/scripts/xcode_build.rb -i ${PROJ_ROOT}/build_ios/install -x ${PROJ_ROOT}/ios/TestApp/TestApp.xcodeproj -p ${IOS_PLATFORM} -c ${PROFILE} -t ${IOS_DEV_TEAM_ID} -f Accelerate,MetalPerformanceShaders,CoreML
+fi 
+PROFILE=PyTorch_CI_2021
+ruby ${PROJ_ROOT}/scripts/xcode_build.rb -i ${PROJ_ROOT}/build_ios/install -x ${PROJ_ROOT}/ios/TestApp/TestApp.xcodeproj -p ${IOS_PLATFORM} -c ${PROFILE} -t ${IOS_DEV_TEAM_ID}
--- a/.circleci/scripts/binary_ios_upload.sh
+++ b/.circleci/scripts/binary_ios_upload.sh
@ -24,17 +24,14 @@ do
 done
 lipo -i ${ZIP_DIR}/install/lib/*.a
 # copy the umbrella header and license
-cp ${PROJ_ROOT}/ios/LibTorch-Lite.h ${ZIP_DIR}/src/
+cp ${PROJ_ROOT}/ios/LibTorch.h ${ZIP_DIR}/src/
 cp ${PROJ_ROOT}/LICENSE ${ZIP_DIR}/
 # zip the library
-export DATE="$(date -u +%Y%m%d)"
-export IOS_NIGHTLY_BUILD_VERSION="1.10.0.${DATE}"
-# libtorch_lite_ios_nightly_1.10.0.20210810.zip
-ZIPFILE="libtorch_lite_ios_nightly_${IOS_NIGHTLY_BUILD_VERSION}.zip"
+ZIPFILE=libtorch_ios_nightly_build.zip
 cd ${ZIP_DIR}
 #for testing
 touch version.txt
-echo "${IOS_NIGHTLY_BUILD_VERSION}" > version.txt
+echo $(date +%s) > version.txt
 zip -r ${ZIPFILE} install src version.txt LICENSE
 # upload to aws
 # Install conda then 'conda install' awscli
@ -51,14 +48,3 @@ set +x
 # echo "AWS KEY: ${AWS_ACCESS_KEY_ID}"
 # echo "AWS SECRET: ${AWS_SECRET_ACCESS_KEY}"
 aws s3 cp ${ZIPFILE} s3://ossci-ios-build/ --acl public-read
-
-# create a new LibTorch-Lite-Nightly.podspec from the template
-echo "cp ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec.template ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec"
-cp ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec.template ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec
-
-# update pod version
-sed -i '' -e "s/IOS_NIGHTLY_BUILD_VERSION/${IOS_NIGHTLY_BUILD_VERSION}/g" ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec
-cat ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec
-
-# push the new LibTorch-Lite-Nightly.podspec to CocoaPods
-pod trunk push --verbose --allow-warnings --use-libraries --skip-import-validation ${PROJ_ROOT}/ios/LibTorch-Lite-Nightly.podspec
--- a/.circleci/scripts/binary_linux_build.sh
+++ b/.circleci/scripts/binary_linux_build.sh
@ -4,14 +4,10 @@ echo "RUNNING ON $(uname -a) WITH $(nproc) CPUS AND $(free -m)"
 set -eux -o pipefail
 source /env

-# Because most Circle executors only have 20 CPUs, using more causes OOMs w/ Ninja and nvcc parallelization
-MEMORY_LIMIT_MAX_JOBS=18
-NUM_CPUS=$(( $(nproc) - 2 ))
+# Defaults here so they can be changed in one place
+export MAX_JOBS=${MAX_JOBS:-$(( $(nproc) - 2 ))}

-# Defaults here for **binary** linux builds so they can be changed in one place
-export MAX_JOBS=${MAX_JOBS:-$(( ${NUM_CPUS} > ${MEMORY_LIMIT_MAX_JOBS} ? ${MEMORY_LIMIT_MAX_JOBS} : ${NUM_CPUS} ))}
-
-if [[ "${DESIRED_CUDA}" == "cu111" || "${DESIRED_CUDA}" == "cu113" ]]; then
+if [[ "${DESIRED_CUDA}" == "cu111" ]]; then
  export BUILD_SPLIT_CUDA="ON"
 fi

@ -26,9 +22,5 @@ else
  build_script='manywheel/build.sh'
 fi

-if [[ "$CIRCLE_BRANCH" == "master" ]] || [[ "$CIRCLE_BRANCH" == release/* ]]; then
-  export BUILD_DEBUG_INFO=1
-fi
-
 # Build the package
 SKIP_ALL_TESTS=1 "/builder/$build_script"
--- a/.circleci/scripts/binary_linux_test.sh
+++ b/.circleci/scripts/binary_linux_test.sh
@ -9,6 +9,10 @@ python_nodot="\$(echo $DESIRED_PYTHON | tr -d m.u)"

 # Set up Python
 if [[ "$PACKAGE_TYPE" == conda ]]; then
+  # There was a bug that was introduced in conda-package-handling >= 1.6.1 that makes archives
+  # above a certain size fail out when attempting to extract
+  # see: https://github.com/conda/conda-package-handling/issues/71
+  conda install -y conda-package-handling=1.6.0
  retry conda create -qyn testenv python="$DESIRED_PYTHON"
  source activate testenv >/dev/null
 elif [[ "$PACKAGE_TYPE" != libtorch ]]; then
@ -34,10 +38,6 @@ if [[ "$DESIRED_CUDA" == "cu112" ]]; then
  EXTRA_CONDA_FLAGS="-c=conda-forge"
 fi

-# Move debug wheels out of the the package dir so they don't get installed
-mkdir -p /tmp/debug_final_pkgs
-mv /final_pkgs/debug-*.zip /tmp/debug_final_pkgs || echo "no debug packages to move"
-
 # Install the package
 # These network calls should not have 'retry's because they are installing
 # locally and aren't actually network calls
--- a/.circleci/scripts/binary_macos_build.sh
+++ b/.circleci/scripts/binary_macos_build.sh
@ -14,10 +14,6 @@ chmod +x "$build_script"
 # Build
 cat >"$build_script" <<EOL
 export PATH="$workdir/miniconda/bin:$PATH"
-if [[ "$CIRCLE_BRANCH" == "nightly" ]]; then
-  export USE_PYTORCH_METAL_EXPORT=1
-  export USE_COREML_DELEGATE=1
-fi
 if [[ "$PACKAGE_TYPE" == conda ]]; then
  "$workdir/builder/conda/build_pytorch.sh"
 else
--- a/.circleci/scripts/binary_populate_env.sh
+++ b/.circleci/scripts/binary_populate_env.sh
@ -62,30 +62,18 @@ if [[ -z "$DOCKER_IMAGE" ]]; then
  if [[ "$PACKAGE_TYPE" == conda ]]; then
    export DOCKER_IMAGE="pytorch/conda-cuda"
  elif [[ "$DESIRED_CUDA" == cpu ]]; then
-    export DOCKER_IMAGE="pytorch/manylinux-cpu"
+    export DOCKER_IMAGE="pytorch/manylinux-cuda100"
  else
    export DOCKER_IMAGE="pytorch/manylinux-cuda${DESIRED_CUDA:2}"
  fi
 fi

-USE_GOLD_LINKER="OFF"
-# GOLD linker can not be used if CUPTI is statically linked into PyTorch, see https://github.com/pytorch/pytorch/issues/57744
-if [[ ${DESIRED_CUDA} == "cpu" ]]; then
-  USE_GOLD_LINKER="ON"
-fi
-
-USE_WHOLE_CUDNN="OFF"
-# Link whole cuDNN for CUDA-11.1 to include fp16 fast kernels
-if [[  "$(uname)" == "Linux" && "${DESIRED_CUDA}" == "cu111" ]]; then
-  USE_WHOLE_CUDNN="ON"
-fi
-
 # Default to nightly, since that's where this normally uploads to
 PIP_UPLOAD_FOLDER='nightly/'
 # We put this here so that OVERRIDE_PACKAGE_VERSION below can read from it
 export DATE="$(date -u +%Y%m%d)"
 #TODO: We should be pulling semver version from the base version.txt
-BASE_BUILD_VERSION="1.10.0.dev$DATE"
+BASE_BUILD_VERSION="1.8.0.dev$DATE"
 # Change BASE_BUILD_VERSION to git tag when on a git tag
 # Use 'git -C' to make doubly sure we're in the correct directory for checking
 # the git tag
@ -148,7 +136,7 @@ if [[ "${BUILD_FOR_SYSTEM:-}" == "windows" ]]; then
 fi

 export DATE="$DATE"
-export NIGHTLIES_DATE_PREAMBLE=1.10.0.dev
+export NIGHTLIES_DATE_PREAMBLE=1.8.0.dev
 export PYTORCH_BUILD_VERSION="$PYTORCH_BUILD_VERSION"
 export PYTORCH_BUILD_NUMBER="$PYTORCH_BUILD_NUMBER"
 export OVERRIDE_PACKAGE_VERSION="$PYTORCH_BUILD_VERSION"
@ -180,10 +168,6 @@ export CIRCLE_SHA1="$CIRCLE_SHA1"
 export CIRCLE_PR_NUMBER="${CIRCLE_PR_NUMBER:-}"
 export CIRCLE_BRANCH="$CIRCLE_BRANCH"
 export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"
-
-export USE_GOLD_LINKER="${USE_GOLD_LINKER}"
-export USE_GLOO_WITH_OPENSSL="ON"
-export USE_WHOLE_CUDNN="${USE_WHOLE_CUDNN}"
 # =================== The above code will be executed inside Docker container ===================
 EOL

--- a/.circleci/scripts/binary_windows_build.sh
+++ b/.circleci/scripts/binary_windows_build.sh
@ -8,45 +8,15 @@ export CUDA_VERSION="${DESIRED_CUDA/cu/}"
 export USE_SCCACHE=1
 export SCCACHE_BUCKET=ossci-compiler-cache-windows
 export NIGHTLIES_PYTORCH_ROOT="$PYTORCH_ROOT"
-export VC_YEAR=2019

-if [[ "${DESIRED_CUDA}" == "cu111" || "${DESIRED_CUDA}" == "cu113" ]]; then
-    export BUILD_SPLIT_CUDA="ON"
+if [[ "$CUDA_VERSION" == "92" || "$CUDA_VERSION" == "100" ]]; then
+  export VC_YEAR=2017
+else
+  export VC_YEAR=2019
 fi

-echo "Free Space for CUDA DEBUG BUILD"
-if [[ "$CIRCLECI" == 'true' ]]; then
-    if [[ -d "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community" ]]; then
-        rm -rf "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community"
-    fi
-
-    if [[ -d "C:\\Program Files (x86)\\Microsoft Visual Studio 14.0" ]]; then
-        rm -rf "C:\\Program Files (x86)\\Microsoft Visual Studio 14.0"
-    fi
-
-    if [[ -d "C:\\Program Files (x86)\\Microsoft.NET" ]]; then
-        rm -rf "C:\\Program Files (x86)\\Microsoft.NET"
-    fi
-
-    if [[ -d "C:\\Program Files\\dotnet" ]]; then
-        rm -rf "C:\\Program Files\\dotnet"
-    fi
-
-    if [[ -d "C:\\Program Files (x86)\\dotnet" ]]; then
-        rm -rf "C:\\Program Files (x86)\\dotnet"
-    fi
-
-    if [[ -d "C:\\Program Files (x86)\\Microsoft SQL Server" ]]; then
-        rm -rf "C:\\Program Files (x86)\\Microsoft SQL Server"
-    fi
-
-    if [[ -d "C:\\Program Files (x86)\\Xamarin" ]]; then
-        rm -rf "C:\\Program Files (x86)\\Xamarin"
-    fi
-
-    if [[ -d "C:\\Program Files (x86)\\Google" ]]; then
-        rm -rf "C:\\Program Files (x86)\\Google"
-    fi
+if [[ "${DESIRED_CUDA}" == "cu111" ]]; then
+  export BUILD_SPLIT_CUDA="ON"
 fi

 set +x
@ -61,11 +31,6 @@ if [[ "$CIRCLECI" == 'true' && -d "C:\\ProgramData\\Microsoft\\VisualStudio\\Pac
  mv _Instances "C:\\ProgramData\\Microsoft\\VisualStudio\\Packages"
 fi

-if [[ "$CIRCLECI" == 'true' && -d "C:\\Microsoft" ]]; then
-  # don't use quotes here
-  rm -rf /c/Microsoft/AndroidNDK*
-fi
-
 echo "Free space on filesystem before build:"
 df -h

--- a/.circleci/scripts/binary_windows_test.sh
+++ b/.circleci/scripts/binary_windows_test.sh
@ -4,7 +4,13 @@ set -eux -o pipefail
 source "/c/w/env"

 export CUDA_VERSION="${DESIRED_CUDA/cu/}"
-export VC_YEAR=2019
+export VC_YEAR=2017
+
+if [[ "$CUDA_VERSION" == "92" || "$CUDA_VERSION" == "100" ]]; then
+  export VC_YEAR=2017
+else
+  export VC_YEAR=2019
+fi

 pushd "$BUILDER_ROOT"

--- a/.circleci/scripts/build_android_gradle.sh
+++ b/.circleci/scripts/build_android_gradle.sh
@ -10,7 +10,7 @@ export ANDROID_HOME=/opt/android/sdk

 # Must be in sync with GRADLE_VERSION in docker image for android
 # https://github.com/pietern/pytorch-dockerfiles/blob/master/build.sh#L155
-export GRADLE_VERSION=6.8.3
+export GRADLE_VERSION=4.10.3
 export GRADLE_HOME=/opt/gradle/gradle-$GRADLE_VERSION
 export GRADLE_PATH=$GRADLE_HOME/bin/gradle

--- a/.circleci/scripts/cpp_doc_push_script.sh
+++ b/.circleci/scripts/cpp_doc_push_script.sh
@ -10,27 +10,18 @@ pt_checkout="/var/lib/jenkins/workspace"
 # Since we're cat-ing this file, we need to escape all $'s
 echo "cpp_doc_push_script.sh: Invoked with $*"

-# for statements like ${1:-${DOCS_INSTALL_PATH:-docs/}}
-# the order of operations goes:
-#   1. Check if there's an argument $1
-#   2. If no argument check for environment var DOCS_INSTALL_PATH
-#   3. If no environment var fall back to default 'docs/'
-
-# NOTE: It might seem weird to gather the second argument before gathering the first argument
-#       but since DOCS_INSTALL_PATH can be derived from DOCS_VERSION it's probably better to
-#       try and gather it first, just so we don't potentially break people who rely on this script
-# Argument 2: What version of the Python API docs we are building.
-version="${2:-${DOCS_VERSION:-master}}"
-if [ -z "$version" ]; then
-echo "error: cpp_doc_push_script.sh: version (arg2) not specified"
+# Argument 1: Where to copy the built documentation for Python API to
+# (pytorch.github.io/$install_path)
+install_path="$1"
+if [ -z "$install_path" ]; then
+echo "error: cpp_doc_push_script.sh: install_path (arg1) not specified"
  exit 1
 fi

-# Argument 1: Where to copy the built documentation for Python API to
-# (pytorch.github.io/$install_path)
-install_path="${1:-${DOCS_INSTALL_PATH:-docs/${DOCS_VERSION}}}"
-if [ -z "$install_path" ]; then
-echo "error: cpp_doc_push_script.sh: install_path (arg1) not specified"
+# Argument 2: What version of the Python API docs we are building.
+version="$2"
+if [ -z "$version" ]; then
+echo "error: cpp_doc_push_script.sh: version (arg2) not specified"
  exit 1
 fi

--- a/.circleci/scripts/publish_android_snapshot.sh
+++ b/.circleci/scripts/publish_android_snapshot.sh
@ -5,7 +5,7 @@ set -eu -o pipefail
 export ANDROID_NDK_HOME=/opt/ndk
 export ANDROID_HOME=/opt/android/sdk

-export GRADLE_VERSION=6.8.3
+export GRADLE_VERSION=4.10.3
 export GRADLE_HOME=/opt/gradle/gradle-$GRADLE_VERSION
 export GRADLE_PATH=$GRADLE_HOME/bin/gradle

@ -35,9 +35,7 @@ else
  echo "ndk.dir=/opt/ndk" >> $GRADLE_LOCAL_PROPERTIES

  echo "SONATYPE_NEXUS_USERNAME=${SONATYPE_NEXUS_USERNAME}" >> $GRADLE_PROPERTIES
-  echo "mavenCentralRepositoryUsername=${SONATYPE_NEXUS_USERNAME}" >> $GRADLE_PROPERTIES
  echo "SONATYPE_NEXUS_PASSWORD=${SONATYPE_NEXUS_PASSWORD}" >> $GRADLE_PROPERTIES
-  echo "mavenCentralRepositoryPassword=${SONATYPE_NEXUS_PASSWORD}" >> $GRADLE_PROPERTIES

  echo "signing.keyId=${ANDROID_SIGN_KEY}" >> $GRADLE_PROPERTIES
  echo "signing.password=${ANDROID_SIGN_PASS}" >> $GRADLE_PROPERTIES
--- a/.circleci/scripts/python_doc_push_script.sh
+++ b/.circleci/scripts/python_doc_push_script.sh
@ -13,27 +13,18 @@ echo "python_doc_push_script.sh: Invoked with $*"

 set -ex

-# for statements like ${1:-${DOCS_INSTALL_PATH:-docs/}}
-# the order of operations goes:
-#   1. Check if there's an argument $1
-#   2. If no argument check for environment var DOCS_INSTALL_PATH
-#   3. If no environment var fall back to default 'docs/'
-
-# NOTE: It might seem weird to gather the second argument before gathering the first argument
-#       but since DOCS_INSTALL_PATH can be derived from DOCS_VERSION it's probably better to
-#       try and gather it first, just so we don't potentially break people who rely on this script
-# Argument 2: What version of the docs we are building.
-version="${2:-${DOCS_VERSION:-master}}"
-if [ -z "$version" ]; then
-echo "error: python_doc_push_script.sh: version (arg2) not specified"
+# Argument 1: Where to copy the built documentation to
+# (pytorch.github.io/$install_path)
+install_path="$1"
+if [ -z "$install_path" ]; then
+echo "error: python_doc_push_script.sh: install_path (arg1) not specified"
  exit 1
 fi

-# Argument 1: Where to copy the built documentation to
-# (pytorch.github.io/$install_path)
-install_path="${1:-${DOCS_INSTALL_PATH:-docs/${DOCS_VERSION}}}"
-if [ -z "$install_path" ]; then
-echo "error: python_doc_push_script.sh: install_path (arg1) not specified"
+# Argument 2: What version of the docs we are building.
+version="$2"
+if [ -z "$version" ]; then
+echo "error: python_doc_push_script.sh: version (arg2) not specified"
  exit 1
 fi

@ -43,7 +34,7 @@ if [ "$version" == "master" ]; then
 fi

 # Argument 3: The branch to push to. Usually is "site"
-branch="${3:-${DOCS_BRANCH:-site}}"
+branch="$3"
 if [ -z "$branch" ]; then
 echo "error: python_doc_push_script.sh: branch (arg3) not specified"
  exit 1
--- a/.circleci/scripts/setup_ci_environment.sh
+++ b/.circleci/scripts/setup_ci_environment.sh
@ -7,9 +7,6 @@ sudo rm -f /etc/apt/heroku.list
 sudo rm -f /etc/apt/openjdk-r-ubuntu-ppa-xenial.list
 sudo rm -f /etc/apt/partner.list

-# To increase the network reliability, let apt decide which mirror is best to use
-sudo sed -i -e 's/http:\/\/.*archive/mirror:\/\/mirrors/' -e 's/\/ubuntu\//\/mirrors.txt/' /etc/apt/sources.list
-
 retry () {
  $*  || $* || $* || $* || $*
 }
@ -27,9 +24,7 @@ retry sudo apt-get -y install \
 echo "== DOCKER VERSION =="
 docker version

-if ! command -v aws >/dev/null; then
-  retry sudo pip3 -q install awscli==1.19.64
-fi
+retry sudo pip -q install awscli==1.16.35

 if [ -n "${USE_CUDA_DOCKER_RUNTIME:-}" ]; then
  DRIVER_FN="NVIDIA-Linux-x86_64-460.39.run"
@ -43,9 +38,9 @@ if [ -n "${USE_CUDA_DOCKER_RUNTIME:-}" ]; then
  curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
  curl -s -L "https://nvidia.github.io/nvidia-docker/${distribution}/nvidia-docker.list" | sudo tee /etc/apt/sources.list.d/nvidia-docker.list

-  retry sudo apt-get update -qq
+  sudo apt-get update -qq
  # Necessary to get the `--gpus` flag to function within docker
-  retry sudo apt-get install -y nvidia-container-toolkit
+  sudo apt-get install -y nvidia-container-toolkit
  sudo systemctl restart docker
 else
  # Explicitly remove nvidia docker apt repositories if not building for cuda
@ -53,51 +48,43 @@ else
 fi

 add_to_env_file() {
-  local name=$1
-  local value=$2
-  case "$value" in
-    *\ *)
-      # BASH_ENV should be set by CircleCI
-      echo "${name}='${value}'" >> "${BASH_ENV:-/tmp/env}"
-      ;;
-    *)
-      echo "${name}=${value}" >> "${BASH_ENV:-/tmp/env}"
-      ;;
-  esac
+  local content
+  content=$1
+  # BASH_ENV should be set by CircleCI
+  echo "${content}" >> "${BASH_ENV:-/tmp/env}"
 }

-add_to_env_file IN_CI 1
-add_to_env_file CI_MASTER "${CI_MASTER:-}"
-add_to_env_file COMMIT_SOURCE "${CIRCLE_BRANCH:-}"
-add_to_env_file BUILD_ENVIRONMENT "${BUILD_ENVIRONMENT}"
-add_to_env_file CIRCLE_PULL_REQUEST "${CIRCLE_PULL_REQUEST}"
+add_to_env_file "IN_CI=1"
+add_to_env_file "COMMIT_SOURCE=${CIRCLE_BRANCH:-}"
+add_to_env_file "BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}"
+add_to_env_file "CIRCLE_PULL_REQUEST=${CIRCLE_PULL_REQUEST}"


 if [[ "${BUILD_ENVIRONMENT}" == *-build ]]; then
-  add_to_env_file SCCACHE_BUCKET ossci-compiler-cache-circleci-v2
+  add_to_env_file "SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2"

  SCCACHE_MAX_JOBS=$(( $(nproc) - 1 ))
  MEMORY_LIMIT_MAX_JOBS=8  # the "large" resource class on CircleCI has 32 CPU cores, if we use all of them we'll OOM
  MAX_JOBS=$(( ${SCCACHE_MAX_JOBS} > ${MEMORY_LIMIT_MAX_JOBS} ? ${MEMORY_LIMIT_MAX_JOBS} : ${SCCACHE_MAX_JOBS} ))
-  add_to_env_file MAX_JOBS "${MAX_JOBS}"
+  add_to_env_file "MAX_JOBS=${MAX_JOBS}"

  if [ -n "${USE_CUDA_DOCKER_RUNTIME:-}" ]; then
-    add_to_env_file TORCH_CUDA_ARCH_LIST 5.2
+    add_to_env_file "TORCH_CUDA_ARCH_LIST=5.2"
  fi

  if [[ "${BUILD_ENVIRONMENT}" == *xla* ]]; then
    # This IAM user allows write access to S3 bucket for sccache & bazels3cache
    set +x
-    add_to_env_file XLA_CLANG_CACHE_S3_BUCKET_NAME "${XLA_CLANG_CACHE_S3_BUCKET_NAME:-}"
-    add_to_env_file AWS_ACCESS_KEY_ID "${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_AND_XLA_BAZEL_S3_BUCKET_V2:-}"
-    add_to_env_file AWS_SECRET_ACCESS_KEY "${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_AND_XLA_BAZEL_S3_BUCKET_V2:-}"
+    add_to_env_file "XLA_CLANG_CACHE_S3_BUCKET_NAME=${XLA_CLANG_CACHE_S3_BUCKET_NAME:-}"
+    add_to_env_file "AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_AND_XLA_BAZEL_S3_BUCKET_V2:-}"
+    add_to_env_file "AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_AND_XLA_BAZEL_S3_BUCKET_V2:-}"
    set -x
  else
    # This IAM user allows write access to S3 bucket for sccache
    set +x
-    add_to_env_file XLA_CLANG_CACHE_S3_BUCKET_NAME "${XLA_CLANG_CACHE_S3_BUCKET_NAME:-}"
-    add_to_env_file AWS_ACCESS_KEY_ID "${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_S3_BUCKET_V4:-}"
-    add_to_env_file AWS_SECRET_ACCESS_KEY "${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET_V4:-}"
+    add_to_env_file "XLA_CLANG_CACHE_S3_BUCKET_NAME=${XLA_CLANG_CACHE_S3_BUCKET_NAME:-}"
+    add_to_env_file "AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_S3_BUCKET_V4:-}"
+    add_to_env_file "AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET_V4:-}"
    set -x
  fi
 fi
@ -106,7 +93,5 @@ fi
 set +x
 export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_ECR_READ_WRITE_V4:-}
 export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_ECR_READ_WRITE_V4:-}
-export AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
-export AWS_REGION=us-east-1
-aws ecr get-login-password --region $AWS_REGION|docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com
+eval "$(aws ecr get-login --region us-east-1 --no-include-email)"
 set -x
--- a/.circleci/scripts/trigger_azure_pipeline.py
+++ b/.circleci/scripts/trigger_azure_pipeline.py
@ -1,140 +0,0 @@
-# Documentation: https://docs.microsoft.com/en-us/rest/api/azure/devops/build/?view=azure-devops-rest-6.0
-
-import re
-import json
-import os
-import sys
-import requests
-import time
-
-AZURE_PIPELINE_BASE_URL = "https://aiinfra.visualstudio.com/PyTorch/"
-AZURE_DEVOPS_PAT_BASE64 = os.environ.get("AZURE_DEVOPS_PAT_BASE64_SECRET", "")
-PIPELINE_ID = "911"
-PROJECT_ID = "0628bce4-2d33-499e-bac5-530e12db160f"
-TARGET_BRANCH = os.environ.get("CIRCLE_BRANCH", "master")
-TARGET_COMMIT = os.environ.get("CIRCLE_SHA1", "")
-
-build_base_url = AZURE_PIPELINE_BASE_URL + "_apis/build/builds?api-version=6.0"
-
-s = requests.Session()
-s.headers.update({"Authorization": "Basic " + AZURE_DEVOPS_PAT_BASE64})
-
-def submit_build(pipeline_id, project_id, source_branch, source_version):
-    print("Submitting build for branch: " + source_branch)
-    print("Commit SHA1: ", source_version)
-
-    run_build_raw = s.post(build_base_url, json={
-        "definition": {"id": pipeline_id},
-        "project": {"id": project_id},
-        "sourceBranch": source_branch,
-        "sourceVersion": source_version
-    })
-
-    try:
-        run_build_json = run_build_raw.json()
-    except json.decoder.JSONDecodeError as e:
-        print(e)
-        print("Failed to parse the response. Check if the Azure DevOps PAT is incorrect or expired.")
-        sys.exit(-1)
-
-    build_id = run_build_json['id']
-
-    print("Submitted bulid: " + str(build_id))
-    print("Bulid URL: " + run_build_json['url'])
-    return build_id
-
-def get_build(_id):
-    get_build_url = AZURE_PIPELINE_BASE_URL + f"/_apis/build/builds/{_id}?api-version=6.0"
-    get_build_raw = s.get(get_build_url)
-    return get_build_raw.json()
-
-def get_build_logs(_id):
-    get_build_logs_url = AZURE_PIPELINE_BASE_URL + f"/_apis/build/builds/{_id}/logs?api-version=6.0"
-    get_build_logs_raw = s.get(get_build_logs_url)
-    return get_build_logs_raw.json()
-
-def get_log_content(url):
-    resp = s.get(url)
-    return resp.text
-
-def wait_for_build(_id):
-    build_detail = get_build(_id)
-    build_status = build_detail['status']
-
-    while build_status == 'notStarted':
-        print('Waiting for run to start: ' + str(_id))
-        sys.stdout.flush()
-        try:
-            build_detail = get_build(_id)
-            build_status = build_detail['status']
-        except Exception as e:
-            print("Error getting build")
-            print(e)
-
-        time.sleep(30)
-
-    print("Bulid started: ", str(_id))
-
-    handled_logs = set()
-    while build_status == 'inProgress':
-        try:
-            print("Waiting for log: " + str(_id))
-            logs = get_build_logs(_id)
-        except Exception as e:
-            print("Error fetching logs")
-            print(e)
-            time.sleep(30)
-            continue
-
-        for log in logs['value']:
-            log_id = log['id']
-            if log_id in handled_logs:
-                continue
-            handled_logs.add(log_id)
-            print('Fetching log: \n' + log['url'])
-            try:
-                log_content = get_log_content(log['url'])
-                print(log_content)
-            except Exception as e:
-                print("Error getting log content")
-                print(e)
-            sys.stdout.flush()
-        build_detail = get_build(_id)
-        build_status = build_detail['status']
-        time.sleep(30)
-
-    build_result = build_detail['result']
-
-    print("Bulid status: " + build_status)
-    print("Bulid result: " + build_result)
-
-    return build_status, build_result
-
-if __name__ == '__main__':
-    # Convert the branch name for Azure DevOps
-    match = re.search(r'pull/(\d+)', TARGET_BRANCH)
-    if match is not None:
-        pr_num = match.group(1)
-        SOURCE_BRANCH = f'refs/pull/{pr_num}/head'
-    else:
-        SOURCE_BRANCH = f'refs/heads/{TARGET_BRANCH}'
-
-    MAX_RETRY = 2
-    retry = MAX_RETRY
-
-    while retry > 0:
-        build_id = submit_build(PIPELINE_ID, PROJECT_ID, SOURCE_BRANCH, TARGET_COMMIT)
-        build_status, build_result = wait_for_build(build_id)
-
-        if build_result != 'succeeded':
-            retry = retry - 1
-            if retry > 0:
-                print("Retrying... remaining attempt: " + str(retry))
-                # Wait a bit before retrying
-                time.sleep((MAX_RETRY - retry) * 120)
-                continue
-            else:
-                print("No more chance to retry. Giving up.")
-                sys.exit(-1)
-        else:
-            break
--- a/.circleci/scripts/upload_binary_size_to_scuba.py
+++ b/.circleci/scripts/upload_binary_size_to_scuba.py
@ -9,40 +9,28 @@ import sys
 import time
 import zipfile

-from typing import Any, Dict, Generator, List
-from tools.stats.scribe import (
-    send_to_scribe,
-    rds_write,
-    register_rds_schema,
-    schema_from_sample,
-)
+import requests


-def get_size(file_dir: str) -> int:
+def get_size(file_dir):
    try:
        # we should only expect one file, if no, something is wrong
        file_name = glob.glob(os.path.join(file_dir, "*"))[0]
        return os.stat(file_name).st_size
-    except Exception:
+    except:
        logging.exception(f"error getting file from: {file_dir}")
        return 0


-def base_data() -> Dict[str, Any]:
-    return {
-        "run_duration_seconds": int(
-            time.time() - os.path.getmtime(os.path.realpath(__file__))
-        ),
-    }
-
-
-def build_message(size: int) -> Dict[str, Any]:
-    build_env_split: List[Any] = os.environ.get("BUILD_ENVIRONMENT", "").split()
-    pkg_type, py_ver, cu_ver, *_ = build_env_split + [None, None, None]
+def build_message(size):
+    pkg_type, py_ver, cu_ver, *_ = os.environ.get("BUILD_ENVIRONMENT", "").split() + [
+        None,
+        None,
+        None,
+    ]
    os_name = os.uname()[0].lower()
    if os_name == "darwin":
        os_name = "macos"
-
    return {
        "normal": {
            "os": os_name,
@ -59,30 +47,38 @@ def build_message(size: int) -> Dict[str, Any]:
            "time": int(time.time()),
            "size": size,
            "commit_time": int(os.environ.get("COMMIT_TIME", "0")),
-            "run_duration": int(
-                time.time() - os.path.getmtime(os.path.realpath(__file__))
-            ),
+            "run_duration": int(time.time() - os.path.getmtime(os.path.realpath(__file__))),
        },
    }


-def send_message(messages: List[Dict[str, Any]]) -> None:
-    logs = json.dumps(
-        [
-            {
-                "category": "perfpipe_pytorch_binary_size",
-                "message": json.dumps(message),
-                "line_escape": False,
-            }
-            for message in messages
-        ]
+def send_message(messages):
+    access_token = os.environ.get("SCRIBE_GRAPHQL_ACCESS_TOKEN")
+    if not access_token:
+        raise ValueError("Can't find access token from environment variable")
+    url = "https://graph.facebook.com/scribe_logs"
+    r = requests.post(
+        url,
+        data={
+            "access_token": access_token,
+            "logs": json.dumps(
+                [
+                    {
+                        "category": "perfpipe_pytorch_binary_size",
+                        "message": json.dumps(message),
+                        "line_escape": False,
+                    }
+                    for message in messages
+                ]
+            ),
+        },
    )
-    res = send_to_scribe(logs)
-    print(res)
+    print(r.text)
+    r.raise_for_status()


-def report_android_sizes(file_dir: str) -> None:
-    def gen_sizes() -> Generator[List[Any], None, None]:
+def report_android_sizes(file_dir):
+    def gen_sizes():
        # we should only expect one file, if no, something is wrong
        aar_files = list(pathlib.Path(file_dir).rglob("pytorch_android-*.aar"))
        if len(aar_files) != 1:
@ -105,7 +101,7 @@ def report_android_sizes(file_dir: str) -> None:
        # report whole package size
        yield ["aar", aar_file.name, os.stat(aar_file).st_size, 0]

-    def gen_messages() -> Generator[Dict[str, Any], None, None]:
+    def gen_messages():
        android_build_type = os.environ.get("ANDROID_BUILD_TYPE")
        for arch, lib, comp_size, uncomp_size in gen_sizes():
            print(android_build_type, arch, lib, comp_size, uncomp_size)
@ -125,9 +121,7 @@ def report_android_sizes(file_dir: str) -> None:
                "int": {
                    "time": int(time.time()),
                    "commit_time": int(os.environ.get("COMMIT_TIME", "0")),
-                    "run_duration": int(
-                        time.time() - os.path.getmtime(os.path.realpath(__file__))
-                    ),
+                    "run_duration": int(time.time() - os.path.getmtime(os.path.realpath(__file__))),
                    "size": comp_size,
                    "raw_size": uncomp_size,
                },
@ -142,42 +136,14 @@ if __name__ == "__main__":
    )
    if len(sys.argv) == 2:
        file_dir = sys.argv[1]
-
-    if os.getenv("IS_GHA", "0") == "1":
-        sample_lib = {
-            "library": "abcd",
-            "size": 1234,
-        }
-        sample_data = {
-            **base_data(),
-            **sample_lib,
-        }
-        register_rds_schema("binary_size", schema_from_sample(sample_data))
+    print("checking dir: " + file_dir)

    if "-android" in os.environ.get("BUILD_ENVIRONMENT", ""):
        report_android_sizes(file_dir)
    else:
-        if os.getenv("IS_GHA", "0") == "1":
-            build_path = pathlib.Path("build") / "lib"
-            libraries = [
-                (path.name, os.stat(path).st_size) for path in build_path.glob("*")
-            ]
-            data = []
-            for name, size in libraries:
-                if name.strip() == "":
-                    continue
-                library_data = {
-                    "library": name,
-                    "size": size,
-                }
-                data.append({**base_data(), **library_data})
-            rds_write("binary_size", data)
-            print(json.dumps(data, indent=2))
-        else:
-            print("checking dir: " + file_dir)
-            size = get_size(file_dir)
-            # Sending the message anyway if no size info is collected.
+        size = get_size(file_dir)
+        if size != 0:
            try:
                send_message([build_message(size)])
-            except Exception:
+            except:
                logging.exception("can't send message")
--- a/.circleci/scripts/vs_install.ps1
+++ b/.circleci/scripts/vs_install.ps1
@ -1,10 +1,7 @@
-# https://developercommunity.visualstudio.com/t/install-specific-version-of-vs-component/1142479
-# Where to find the links: https://docs.microsoft.com/en-us/visualstudio/releases/2019/history#release-dates-and-build-numbers
-
-# BuildTools from S3
-$VS_DOWNLOAD_LINK = "https://s3.amazonaws.com/ossci-windows/vs${env:VS_VERSION}_BuildTools.exe"
+$VS_DOWNLOAD_LINK = "https://aka.ms/vs/15/release/vs_buildtools.exe"
 $COLLECT_DOWNLOAD_LINK = "https://aka.ms/vscollect.exe"
 $VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools",
+                                                     "--add Microsoft.VisualStudio.Component.VC.Tools.14.13",
                                                     "--add Microsoft.Component.MSBuild",
                                                     "--add Microsoft.VisualStudio.Component.Roslyn.Compiler",
                                                     "--add Microsoft.VisualStudio.Component.TextTemplating",
@ -14,45 +11,17 @@ $VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStud
                                                     "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64",
                                                     "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Win81")

-if (${env:INSTALL_WINDOWS_SDK} -eq "1") {
-    $VS_INSTALL_ARGS += "--add Microsoft.VisualStudio.Component.Windows10SDK.19041"
-}
-
-if (Test-Path "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe") {
-    $VS_VERSION_major = [int] ${env:VS_VERSION}.split(".")[0]
-    $existingPath = & "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe" -products "Microsoft.VisualStudio.Product.BuildTools" -version "[${env:VS_VERSION}, ${env:VS_VERSION_major + 1})" -property installationPath
-    if (($existingPath -ne $null) -and (!${env:CIRCLECI})) {
-        echo "Found correctly versioned existing BuildTools installation in $existingPath"
-        exit 0
-    }
-    $pathToRemove = & "${env:ProgramFiles(x86)}\Microsoft Visual Studio\Installer\vswhere.exe" -products "Microsoft.VisualStudio.Product.BuildTools" -property installationPath
-}
-
-echo "Downloading VS installer from S3."
 curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe
 if ($LASTEXITCODE -ne 0) {
-    echo "Download of the VS 2019 Version ${env:VS_VERSION} installer failed"
+    echo "Download of the VS 2017 installer failed"
    exit 1
 }

-if ($pathToRemove -ne $null) {
-    echo "Uninstalling $pathToRemove."
-    $VS_UNINSTALL_ARGS = @("uninstall", "--installPath", "`"$pathToRemove`"", "--quiet","--wait")
-    $process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_UNINSTALL_ARGS -NoNewWindow -Wait -PassThru
-    $exitCode = $process.ExitCode
-    if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
-        echo "Original BuildTools uninstall failed with code $exitCode"
-        exit 1
-    }
-    echo "Other versioned BuildTools uninstalled."
-}
-
-echo "Installing Visual Studio version ${env:VS_VERSION}."
 $process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru
 Remove-Item -Path vs_installer.exe -Force
 $exitCode = $process.ExitCode
 if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
-    echo "VS 2019 installer exited with code $exitCode, which should be one of [0, 3010]."
+    echo "VS 2017 installer exited with code $exitCode, which should be one of [0, 3010]."
    curl.exe --retry 3 -kL $COLLECT_DOWNLOAD_LINK --output Collect.exe
    if ($LASTEXITCODE -ne 0) {
        echo "Download of the VS Collect tool failed."
@ -60,6 +29,6 @@ if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
    }
    Start-Process "${PWD}\Collect.exe" -NoNewWindow -Wait -PassThru
    New-Item -Path "C:\w\build-results" -ItemType "directory" -Force
-    Copy-Item -Path "${env:TEMP}\vslogs.zip" -Destination "C:\w\build-results\"
+    Copy-Item -Path "C:\Users\circleci\AppData\Local\Temp\vslogs.zip" -Destination "C:\w\build-results\"
    exit 1
 }
--- a/.circleci/scripts/windows_cuda_install.sh
+++ b/.circleci/scripts/windows_cuda_install.sh
@ -1,74 +1,61 @@
 #!/bin/bash
 set -eux -o pipefail

-case ${CUDA_VERSION} in
-    10.1)
-        cuda_installer_name="cuda_10.1.243_426.00_win10"
-        cuda_install_packages="nvcc_10.1 cuobjdump_10.1 nvprune_10.1 cupti_10.1 cublas_10.1 cublas_dev_10.1 cudart_10.1 cufft_10.1 cufft_dev_10.1 curand_10.1 curand_dev_10.1 cusolver_10.1 cusolver_dev_10.1 cusparse_10.1 cusparse_dev_10.1 nvgraph_10.1 nvgraph_dev_10.1 npp_10.1 npp_dev_10.1 nvrtc_10.1 nvrtc_dev_10.1 nvml_dev_10.1"
-        ;;
-    10.2)
-        cuda_installer_name="cuda_10.2.89_441.22_win10"
-        cuda_install_packages="nvcc_10.2 cuobjdump_10.2 nvprune_10.2 cupti_10.2 cublas_10.2 cublas_dev_10.2 cudart_10.2 cufft_10.2 cufft_dev_10.2 curand_10.2 curand_dev_10.2 cusolver_10.2 cusolver_dev_10.2 cusparse_10.2 cusparse_dev_10.2 nvgraph_10.2 nvgraph_dev_10.2 npp_10.2 npp_dev_10.2 nvrtc_10.2 nvrtc_dev_10.2 nvml_dev_10.2"
-        ;;
-    11.1)
-        cuda_installer_name="cuda_11.1.0_456.43_win10"
-        cuda_install_packages="nvcc_11.1 cuobjdump_11.1 nvprune_11.1 nvprof_11.1 cupti_11.1 cublas_11.1 cublas_dev_11.1 cudart_11.1 cufft_11.1 cufft_dev_11.1 curand_11.1 curand_dev_11.1 cusolver_11.1 cusolver_dev_11.1 cusparse_11.1 cusparse_dev_11.1 npp_11.1 npp_dev_11.1 nvrtc_11.1 nvrtc_dev_11.1 nvml_dev_11.1"
-        ;;
-    11.3)
-        cuda_installer_name="cuda_11.3.0_465.89_win10"
-        cuda_install_packages="thrust_11.3 nvcc_11.3 cuobjdump_11.3 nvprune_11.3 nvprof_11.3 cupti_11.3 cublas_11.3 cublas_dev_11.3 cudart_11.3 cufft_11.3 cufft_dev_11.3 curand_11.3 curand_dev_11.3 cusolver_11.3 cusolver_dev_11.3 cusparse_11.3 cusparse_dev_11.3 npp_11.3 npp_dev_11.3 nvrtc_11.3 nvrtc_dev_11.3 nvml_dev_11.3"
-        ;;
-    *)
-        echo "CUDA_VERSION $CUDA_VERSION is not supported yet"
-        exit 1
-        ;;
-esac
+cuda_major_version=${CUDA_VERSION%.*}

-
-if [[ -f "/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${CUDA_VERSION}/bin/nvcc.exe" ]]; then
-    echo "Existing CUDA v${CUDA_VERSION} installation found, skipping install"
+if [[ "$cuda_major_version" == "10" ]]; then
+    cuda_installer_name="cuda_10.1.243_426.00_win10"
+    msbuild_project_dir="CUDAVisualStudioIntegration/extras/visual_studio_integration/MSBuildExtensions"
+    cuda_install_packages="nvcc_10.1 cuobjdump_10.1 nvprune_10.1 cupti_10.1 cublas_10.1 cublas_dev_10.1 cudart_10.1 cufft_10.1 cufft_dev_10.1 curand_10.1 curand_dev_10.1 cusolver_10.1 cusolver_dev_10.1 cusparse_10.1 cusparse_dev_10.1 nvgraph_10.1 nvgraph_dev_10.1 npp_10.1 npp_dev_10.1 nvrtc_10.1 nvrtc_dev_10.1 nvml_dev_10.1"
+elif [[ "$cuda_major_version" == "11" ]]; then
+    cuda_installer_name="cuda_11.1.0_456.43_win10"
+    msbuild_project_dir="visual_studio_integration/CUDAVisualStudioIntegration/extras/visual_studio_integration/MSBuildExtensions"
+    cuda_install_packages="nvcc_11.1 cuobjdump_11.1 nvprune_11.1 nvprof_11.1 cupti_11.1 cublas_11.1 cublas_dev_11.1 cudart_11.1 cufft_11.1 cufft_dev_11.1 curand_11.1 curand_dev_11.1 cusolver_11.1 cusolver_dev_11.1 cusparse_11.1 cusparse_dev_11.1 npp_11.1 npp_dev_11.1 nvrtc_11.1 nvrtc_dev_11.1 nvml_dev_11.1"
 else
-    tmp_dir=$(mktemp -d)
-    (
-        # no need to popd after, the subshell shouldn't affect the parent shell
-        pushd "${tmp_dir}"
-        cuda_installer_link="https://ossci-windows.s3.amazonaws.com/${cuda_installer_name}.exe"
-
-        curl --retry 3 -kLO $cuda_installer_link
-        7z x ${cuda_installer_name}.exe -o${cuda_installer_name}
-        pushd ${cuda_installer_name}
-        mkdir cuda_install_logs
-
-        set +e
-
-        # This breaks for some reason if you quote cuda_install_packages
-        # shellcheck disable=SC2086
-        ./setup.exe -s ${cuda_install_packages} -loglevel:6 -log:"$(pwd -W)/cuda_install_logs"
-
-        set -e
-
-        if [[ ! -f "/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${CUDA_VERSION}/bin/nvcc.exe" ]]; then
-            echo "CUDA installation failed"
-            mkdir -p /c/w/build-results
-            7z a "c:\\w\\build-results\\cuda_install_logs.7z" cuda_install_logs
-            exit 1
-        fi
-    )
-    rm -rf "${tmp_dir}"
+    echo "CUDA_VERSION $CUDA_VERSION is not supported yet"
+    exit 1
 fi

-if [[ -f "/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64/nvToolsExt64_1.dll" ]]; then
-    echo "Existing nvtools installation found, skipping install"
-else
-    # create tmp dir for download
-    tmp_dir=$(mktemp -d)
-    (
-        # no need to popd after, the subshell shouldn't affect the parent shell
-        pushd "${tmp_dir}"
-        curl --retry 3 -kLO https://ossci-windows.s3.amazonaws.com/NvToolsExt.7z
-        7z x NvToolsExt.7z -oNvToolsExt
-        mkdir -p "C:/Program Files/NVIDIA Corporation/NvToolsExt"
-        cp -r NvToolsExt/* "C:/Program Files/NVIDIA Corporation/NvToolsExt/"
-    )
-    rm -rf "${tmp_dir}"
+if [[ "$cuda_major_version" == "11" && "${JOB_EXECUTOR}" == "windows-with-nvidia-gpu" ]]; then
+    cuda_install_packages="${cuda_install_packages} Display.Driver"
 fi
+
+cuda_installer_link="https://ossci-windows.s3.amazonaws.com/${cuda_installer_name}.exe"
+
+curl --retry 3 -kLO $cuda_installer_link
+7z x ${cuda_installer_name}.exe -o${cuda_installer_name}
+cd ${cuda_installer_name}
+mkdir cuda_install_logs
+
+set +e
+
+./setup.exe -s ${cuda_install_packages} -loglevel:6 -log:"$(pwd -W)/cuda_install_logs"
+
+set -e
+
+if [[ "${VC_YEAR}" == "2017" ]]; then
+    cp -r ${msbuild_project_dir}/* "C:/Program Files (x86)/Microsoft Visual Studio/2017/${VC_PRODUCT}/Common7/IDE/VC/VCTargets/BuildCustomizations/"
+else
+    cp -r ${msbuild_project_dir}/* "C:/Program Files (x86)/Microsoft Visual Studio/2019/${VC_PRODUCT}/MSBuild/Microsoft/VC/v160/BuildCustomizations/"
+fi
+
+if ! ls "/c/Program Files/NVIDIA Corporation/NvToolsExt/bin/x64/nvToolsExt64_1.dll"
+then
+    curl --retry 3 -kLO https://ossci-windows.s3.amazonaws.com/NvToolsExt.7z
+    7z x NvToolsExt.7z -oNvToolsExt
+    mkdir -p "C:/Program Files/NVIDIA Corporation/NvToolsExt"
+    cp -r NvToolsExt/* "C:/Program Files/NVIDIA Corporation/NvToolsExt/"
+    export NVTOOLSEXT_PATH="C:\\Program Files\\NVIDIA Corporation\\NvToolsExt\\"
+fi
+
+if ! ls "/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${CUDA_VERSION}/bin/nvcc.exe"
+then
+    echo "CUDA installation failed"
+    mkdir -p /c/w/build-results
+    7z a "c:\\w\\build-results\\cuda_install_logs.7z" cuda_install_logs
+    exit 1
+fi
+
+cd ..
+rm -rf ./${cuda_installer_name}
+rm -f ./${cuda_installer_name}.exe
--- a/.circleci/scripts/windows_cudnn_install.sh
+++ b/.circleci/scripts/windows_cudnn_install.sh
@ -1,46 +1,21 @@
 #!/bin/bash
 set -eux -o pipefail

-# This is typically blank but for CUDA 10* it'll be set to 10
-windows_version_qualifier=""
+cuda_major_version=${CUDA_VERSION%.*}

-case ${CUDA_VERSION} in
-    10.1)
-        archive_version="v7.6.4.38"
-        windows_version_qualifier="10"
-        ;;
-    10.2)
-        archive_version="v7.6.5.32"
-        windows_version_qualifier="10"
-        ;;
-    11.1)
-        archive_version="v8.0.5.39"
-        ;;
-    11.3)
-        archive_version="v8.2.0.53"
-        ;;
-    *)
-        echo "CUDA_VERSION: ${CUDA_VERSION} not supported yet"
-        exit 1
-        ;;
-esac
-
-cudnn_installer_name="cudnn_installer.zip"
-cudnn_installer_link="https://ossci-windows.s3.amazonaws.com/cudnn-${CUDA_VERSION}-windows${windows_version_qualifier}-x64-${archive_version}.zip"
-cudnn_install_folder="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${CUDA_VERSION}/"
-
-if [[ -f "${cudnn_install_folder}/include/cudnn.h" ]]; then
-    echo "Existing cudnn installation found, skipping install..."
+if [[ "$cuda_major_version" == "10" ]]; then
+    cudnn_installer_name="cudnn-${CUDA_VERSION}-windows10-x64-v7.6.4.38"
+elif [[ "$cuda_major_version" == "11" ]]; then
+    cudnn_installer_name="cudnn-${CUDA_VERSION}-windows-x64-v8.0.5.39"
 else
-    tmp_dir=$(mktemp -d)
-    (
-        pushd "${tmp_dir}"
-        curl --retry 3 -o "${cudnn_installer_name}" "$cudnn_installer_link"
-        7z x "${cudnn_installer_name}" -ocudnn
-        # Use '${var:?}/*' to avoid potentially expanding to '/*'
-        # Remove all of the directories before attempting to copy files
-        rm -rf "${cudnn_install_folder:?}/*"
-        cp -rf cudnn/cuda/* "${cudnn_install_folder}"
-    )
-    rm -rf "${tmp_dir}"
+    echo "CUDNN for CUDA_VERSION $CUDA_VERSION is not supported yet"
+    exit 1
 fi
+
+cudnn_installer_link="https://ossci-windows.s3.amazonaws.com/${cudnn_installer_name}.zip"
+
+curl --retry 3 -O $cudnn_installer_link
+7z x ${cudnn_installer_name}.zip -ocudnn
+cp -r cudnn/cuda/* "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${CUDA_VERSION}/"
+rm -rf cudnn
+rm -f ${cudnn_installer_name}.zip
--- a/.circleci/verbatim-sources/build-parameters/pytorch-build-params.yml
+++ b/.circleci/verbatim-sources/build-parameters/pytorch-build-params.yml
@ -15,35 +15,13 @@ pytorch_params: &pytorch_params
    build_only:
      type: string
      default: ""
-    ci_master:
-      type: string
-      default: ""
  environment:
    BUILD_ENVIRONMENT: << parameters.build_environment >>
    DOCKER_IMAGE: << parameters.docker_image >>
    USE_CUDA_DOCKER_RUNTIME: << parameters.use_cuda_docker_runtime >>
    BUILD_ONLY: << parameters.build_only >>
-    CI_MASTER: << pipeline.parameters.run_master_build >>
  resource_class: << parameters.resource_class >>

-pytorch_android_params: &pytorch_android_params
-  parameters:
-    build_environment:
-      type: string
-      default: ""
-    op_list:
-      type: string
-      default: ""
-    lite_interpreter:
-      type: string
-      default: "1"
-  environment:
-    BUILD_ENVIRONMENT: pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single
-    DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c"
-    PYTHON_VERSION: "3.6"
-    SELECTED_OP_LIST: << parameters.op_list >>
-    BUILD_LITE_INTERPRETER: << parameters.lite_interpreter >>
-
 pytorch_ios_params: &pytorch_ios_params
  parameters:
    build_environment:
@ -61,20 +39,12 @@ pytorch_ios_params: &pytorch_ios_params
    use_metal:
      type: string
      default: "0"
-    lite_interpreter:
-      type: string
-      default: "1"
-    use_coreml:
-      type: string
-      default: "0"
  environment:
    BUILD_ENVIRONMENT: << parameters.build_environment >>
    IOS_ARCH: << parameters.ios_arch >>
    IOS_PLATFORM: << parameters.ios_platform >>
    SELECTED_OP_LIST: << parameters.op_list >>
    USE_PYTORCH_METAL: << parameters.use_metal >>
-    BUILD_LITE_INTERPRETER: << parameters.lite_interpreter >>
-    USE_COREML_DELEGATE: << parameters.use_coreml >>

 pytorch_windows_params: &pytorch_windows_params
  parameters:
@ -92,10 +62,7 @@ pytorch_windows_params: &pytorch_windows_params
      default: "10.1"
    python_version:
      type: string
-      default: "3.8"
-    vs_version:
-      type: string
-      default: "16.8.6"
+      default: "3.6"
    vc_version:
      type: string
      default: "14.16"
@ -113,11 +80,10 @@ pytorch_windows_params: &pytorch_windows_params
    SCCACHE_BUCKET: "ossci-compiler-cache"
    CUDA_VERSION: <<parameters.cuda_version>>
    PYTHON_VERSION: <<parameters.python_version>>
-    VS_VERSION: <<parameters.vs_version>>
    VC_VERSION: <<parameters.vc_version>>
    VC_YEAR: <<parameters.vc_year>>
    VC_PRODUCT: <<parameters.vc_product>>
    USE_CUDA: <<parameters.use_cuda>>
-    TORCH_CUDA_ARCH_LIST: "5.2 7.5"
+    TORCH_CUDA_ARCH_LIST: "7.5"
    JOB_BASE_NAME: <<parameters.test_name>>
    JOB_EXECUTOR: <<parameters.executor>>
--- a/.circleci/verbatim-sources/commands.yml
+++ b/.circleci/verbatim-sources/commands.yml
@ -111,11 +111,11 @@ commands:
                git config --global user.email "circleci.ossci@gmail.com"
                git config --global user.name "CircleCI"
                git config remote.origin.url https://github.com/pytorch/pytorch.git
-                git config --add remote.origin.fetch +refs/heads/master:refs/remotes/origin/master
-                git fetch --tags --progress https://github.com/pytorch/pytorch.git +refs/heads/master:refs/remotes/origin/master --depth=100 --quiet
+                git config --add remote.origin.fetch +refs/heads/release/1.8:refs/remotes/origin/release/1.8
+                git fetch --tags --progress https://github.com/pytorch/pytorch.git +refs/heads/release/1.8:refs/remotes/origin/release/1.8 --depth=100 --quiet
                # PRs generated from ghstack has format CIRCLE_PR_BASE_BRANCH=gh/xxx/1234/base
                if [[ "${CIRCLE_PR_BASE_BRANCH}" == "gh/"* ]]; then
-                  CIRCLE_PR_BASE_BRANCH=master
+                  CIRCLE_PR_BASE_BRANCH=release/1.8
                fi
                export GIT_MERGE_TARGET=`git log -n 1 --pretty=format:"%H" origin/$CIRCLE_PR_BASE_BRANCH`
                echo "GIT_MERGE_TARGET: " ${GIT_MERGE_TARGET}
@ -171,4 +171,4 @@ commands:
            cd ~/project
            export ANDROID_BUILD_TYPE="<< parameters.build_type >>"
            export COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
-            python3 -m tools.stats.upload_binary_size_to_scuba android
+            python3 .circleci/scripts/upload_binary_size_to_scuba.py android
--- a/.circleci/verbatim-sources/header-section.yml
+++ b/.circleci/verbatim-sources/header-section.yml
@ -14,18 +14,19 @@ parameters:
  run_build:
    type: boolean
    default: true
-  run_master_build:
-    type: boolean
-    default: false
-  run_slow_gradcheck_build:
-    type: boolean
-    default: false
+
+docker_config_defaults: &docker_config_defaults
+  user: jenkins
+  aws_auth:
+    # This IAM user only allows read-write access to ECR
+    aws_access_key_id: ${CIRCLECI_AWS_ACCESS_KEY_FOR_ECR_READ_WRITE_V4}
+    aws_secret_access_key: ${CIRCLECI_AWS_SECRET_KEY_FOR_ECR_READ_WRITE_V4}

 executors:
  windows-with-nvidia-gpu:
    machine:
      resource_class: windows.gpu.nvidia.medium
-      image: windows-server-2019-nvidia:previous
+      image: windows-server-2019-nvidia:stable
      shell: bash.exe

  windows-xlarge-cpu-with-nvidia-cuda:
--- a/.circleci/verbatim-sources/job-specs/binary-job-specs.yml
+++ b/.circleci/verbatim-sources/job-specs/binary-job-specs.yml
@ -22,14 +22,14 @@
        command: |
            ls -lah /final_pkgs
    - run:
-        name: upload build & binary data
+        name: save binary size
        no_output_timeout: "5m"
        command: |
            source /env
            cd /pytorch && export COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
            python3 -mpip install requests && \
            SCRIBE_GRAPHQL_ACCESS_TOKEN=${SCRIBE_GRAPHQL_ACCESS_TOKEN} \
-            python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
+            python3 /pytorch/.circleci/scripts/upload_binary_size_to_scuba.py || exit 0
    - persist_to_workspace:
        root: /
        paths: final_pkgs
@ -45,7 +45,7 @@
  binary_linux_test:
    <<: *binary_linux_test_upload_params
    machine:
-        image: ubuntu-2004:202104-01
+        image: ubuntu-1604:202007-01
    steps:
    # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml
    - checkout
@ -108,7 +108,7 @@
  smoke_linux_test:
    <<: *binary_linux_test_upload_params
    machine:
-      image: ubuntu-2004:202104-01
+      image: ubuntu-1604:202007-01
    steps:
    - checkout
    - calculate_docker_image_tag
@ -239,7 +239,7 @@
  binary_ios_build:
    <<: *pytorch_ios_params
    macos:
-      xcode: "12.5.1"
+      xcode: "12.0"
    steps:
    - attach_workspace:
        at: ~/workspace
@ -266,7 +266,7 @@
  binary_ios_upload:
    <<: *pytorch_ios_params
    macos:
-      xcode: "12.5.1"
+      xcode: "12.0"
    steps:
    - attach_workspace:
        at: ~/workspace
@ -293,6 +293,11 @@
    steps:
    # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml
    - checkout
+    - run:
+        name: _HACK_ Install CUDA compatible cmath
+        no_output_timeout: 1m
+        command: |
+            powershell .circleci/scripts/vs_install_cmath.ps1
    - run:
        <<: *binary_checkout
    - run:
@ -308,8 +313,6 @@
    - persist_to_workspace:
        root: "C:/w"
        paths: final_pkgs
-    - store_artifacts:
-        path: C:/w/final_pkgs

  binary_windows_test:
    <<: *binary_windows_params
@ -392,3 +395,4 @@
          command: |
              ANACONDA_API_TOKEN="${CONDA_PYTORCHBOT_TOKEN}" \
              scripts/release/anaconda-prune/run.sh
+
--- a/.circleci/verbatim-sources/job-specs/binary_update_htmls.yml
+++ b/.circleci/verbatim-sources/job-specs/binary_update_htmls.yml
@ -8,7 +8,7 @@
  # then install the one with the most recent version.
  update_s3_htmls: &update_s3_htmls
    machine:
-      image: ubuntu-2004:202104-01
+      image: ubuntu-1604:202007-01
    resource_class: medium
    steps:
    - checkout
--- a/.circleci/verbatim-sources/job-specs/docker_jobs.yml
+++ b/.circleci/verbatim-sources/job-specs/docker_jobs.yml
@ -4,7 +4,7 @@
          type: string
          default: ""
      machine:
-        image: ubuntu-2004:202104-01
+        image: ubuntu-1604:202007-01
      resource_class: large
      environment:
        IMAGE_NAME: << parameters.image_name >>
@ -20,10 +20,7 @@
              set +x
              export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_DOCKER_BUILDER_V1}
              export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_DOCKER_BUILDER_V1}
-              export AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
-              export AWS_REGION=us-east-1
-              aws ecr get-login-password --region $AWS_REGION|docker login --username AWS \
-                       --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com
+              eval $(aws ecr get-login --no-include-email --region us-east-1)
              set -x
              # Check if image already exists, if it does then skip building it
              if docker manifest inspect "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/${IMAGE_NAME}:${DOCKER_TAG}"; then
@ -56,7 +53,7 @@
              cd .circleci/docker && ./build_docker.sh
  docker_for_ecr_gc_build_job:
      machine:
-        image: ubuntu-2004:202104-01
+        image: ubuntu-1604:202007-01
      steps:
        - checkout
        - run:
@ -68,12 +65,9 @@
              set +x
              export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_DOCKER_BUILDER_V1}
              export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_DOCKER_BUILDER_V1}
-              export AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\")
-              export AWS_REGION=us-east-1
-              aws ecr get-login-password --region $AWS_REGION|docker login --username AWS \
-                       --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com
+              eval $(aws ecr get-login --no-include-email --region us-east-1)
              set -x
-              docker push $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/gc/ecr
+              docker push 308535385114.dkr.ecr.us-east-1.amazonaws.com/gc/ecr
  ecr_gc_job:
      parameters:
        project:
--- a/.circleci/verbatim-sources/job-specs/job-specs-custom.yml
+++ b/.circleci/verbatim-sources/job-specs/job-specs-custom.yml
@ -1,7 +1,7 @@
  pytorch_doc_push:
    resource_class: medium
    machine:
-      image: ubuntu-2004:202104-01
+      image: ubuntu-1604:202007-01
    parameters:
      branch:
        type: string
@ -30,7 +30,7 @@
      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4"
    resource_class: large
    machine:
-      image: ubuntu-2004:202104-01
+      image: ubuntu-1604:202007-01
    steps:
    - checkout
    - calculate_docker_image_tag
@ -41,10 +41,9 @@
        no_output_timeout: "1h"
        command: |
          set -ex
-          export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}
+          export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}:${DOCKER_TAG}-${CIRCLE_SHA1}
          echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
-          # turn v1.12.0rc3 into 1.12.0
-          tag=$(echo $CIRCLE_TAG | sed -e 's/v*\([0-9.]*\).*/\1/')
+          tag=${CIRCLE_TAG:1:5}
          target=${tag:-master}
          echo "building for ${target}"
          time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null
@ -76,7 +75,7 @@
      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4"
    resource_class: large
    machine:
-      image: ubuntu-2004:202104-01
+      image: ubuntu-1604:202007-01
    steps:
    - checkout
    - calculate_docker_image_tag
@ -87,10 +86,8 @@
        no_output_timeout: "1h"
        command: |
          set -ex
-          export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}
+          export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}:${DOCKER_TAG}-${CIRCLE_SHA1}
          echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
-          # turn v1.12.0rc3 into 1.12.0
-          tag=$(echo $CIRCLE_TAG | sed -e 's/v*\([0-9.]*\).*/\1/')
          tag=${CIRCLE_TAG:1:5}
          target=${tag:-master}
          echo "building for ${target}"
@ -114,44 +111,6 @@
        paths:
          - .

-  pytorch_macos_10_15_py3_build:
-    environment:
-      BUILD_ENVIRONMENT: pytorch-macos-10.15-py3-arm64-build
-    macos:
-      xcode: "12.3.0"
-    steps:
-      - checkout
-      - run_brew_for_macos_build
-      - run:
-          name: Build
-          no_output_timeout: "1h"
-          command: |
-            set -e
-            export IN_CI=1
-            export CROSS_COMPILE_ARM64=1
-            export JOB_BASE_NAME=$CIRCLE_JOB
-
-            # Install sccache
-            sudo curl --retry 3 https://s3.amazonaws.com/ossci-macos/sccache_v2.15 --output /usr/local/bin/sccache
-            sudo chmod +x /usr/local/bin/sccache
-            export SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2
-
-            # This IAM user allows write access to S3 bucket for sccache
-            set +x
-            export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_S3_BUCKET_V4}
-            export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET_V4}
-            set -x
-
-            chmod a+x .jenkins/pytorch/macos-build.sh
-            unbuffer .jenkins/pytorch/macos-build.sh 2>&1 | ts
-
-      - persist_to_workspace:
-          root: /Users/distiller/workspace/
-          paths:
-            - miniconda3
-      - store_artifacts:
-          path: /Users/distiller/project/dist
-
  pytorch_macos_10_13_py3_build:
    environment:
      BUILD_ENVIRONMENT: pytorch-macos-10.13-py3-build
@ -166,10 +125,9 @@
          command: |
            set -e
            export IN_CI=1
-            export JOB_BASE_NAME=$CIRCLE_JOB

            # Install sccache
-            sudo curl --retry 3 https://s3.amazonaws.com/ossci-macos/sccache_v2.15 --output /usr/local/bin/sccache
+            sudo curl --retry 3 https://s3.amazonaws.com/ossci-macos/sccache --output /usr/local/bin/sccache
            sudo chmod +x /usr/local/bin/sccache
            export SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2

@ -203,49 +161,9 @@
          command: |
            set -e
            export IN_CI=1
-            export JOB_BASE_NAME=$CIRCLE_JOB

            chmod a+x .jenkins/pytorch/macos-test.sh
            unbuffer .jenkins/pytorch/macos-test.sh 2>&1 | ts
-      - run:
-          name: Report results
-          no_output_timeout: "5m"
-          command: |
-            set -ex
-            source /Users/distiller/workspace/miniconda3/bin/activate
-            pip install boto3
-
-            export IN_CI=1
-            export JOB_BASE_NAME=$CIRCLE_JOB
-
-            # Using the same IAM user to write stats to our OSS bucket
-            export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_S3_BUCKET_V4}
-            export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET_V4}
-            python -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
-          when: always
-      - store_test_results:
-          path: test/test-reports
-
-  pytorch_macos_10_13_py3_lite_interpreter_build_test:
-    environment:
-      BUILD_ENVIRONMENT: pytorch-macos-10.13-py3-test
-    macos:
-      xcode: "12.0"
-    steps:
-      - checkout
-      - attach_workspace:
-          at: ~/workspace
-      - run_brew_for_macos_build
-      - run:
-          name: Test
-          no_output_timeout: "1h"
-          command: |
-            set -e
-            export IN_CI=1
-            export BUILD_LITE_INTERPRETER=1
-            export JOB_BASE_NAME=$CIRCLE_JOB
-            chmod a+x ${HOME}/project/.jenkins/pytorch/macos-lite-interpreter-build-test.sh
-            unbuffer ${HOME}/project/.jenkins/pytorch/macos-lite-interpreter-build-test.sh 2>&1 | ts
      - store_test_results:
          path: test/test-reports

@ -256,7 +174,7 @@
      PYTHON_VERSION: "3.6"
    resource_class: large
    machine:
-      image: ubuntu-2004:202104-01
+      image: ubuntu-1604:202007-01
    steps:
    - checkout
    - calculate_docker_image_tag
@ -267,7 +185,7 @@
        no_output_timeout: "1h"
        command: |
          set -eux
-          docker_image_commit=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}
+          docker_image_commit=${DOCKER_IMAGE}:${DOCKER_TAG}-${CIRCLE_SHA1}

          docker_image_libtorch_android_x86_32=${docker_image_commit}-android-x86_32
          docker_image_libtorch_android_x86_64=${docker_image_commit}-android-x86_64
@ -345,7 +263,7 @@
      PYTHON_VERSION: "3.6"
    resource_class: large
    machine:
-      image: ubuntu-2004:202104-01
+      image: ubuntu-1604:202007-01
    steps:
    - checkout
    - calculate_docker_image_tag
@ -356,7 +274,7 @@
        no_output_timeout: "1h"
        command: |
          set -eux
-          docker_image_commit=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}
+          docker_image_commit=${DOCKER_IMAGE}:${DOCKER_TAG}-${CIRCLE_SHA1}

          docker_image_libtorch_android_x86_32_gradle=${docker_image_commit}-android-x86_32-gradle

@ -381,7 +299,7 @@
      PYTHON_VERSION: "3.6"
    resource_class: large
    machine:
-      image: ubuntu-2004:202104-01
+      image: ubuntu-1604:202007-01
    steps:
    - checkout
    - calculate_docker_image_tag
@ -393,7 +311,7 @@
        no_output_timeout: "1h"
        command: |
          set -e
-          docker_image_libtorch_android_x86_32=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}-android-x86_32
+          docker_image_libtorch_android_x86_32=${DOCKER_IMAGE}:${DOCKER_TAG}-${CIRCLE_SHA1}-android-x86_32
          echo "docker_image_libtorch_android_x86_32: "${docker_image_libtorch_android_x86_32}

          # x86
@ -417,10 +335,13 @@
        destination: artifacts.tgz

  pytorch_android_gradle_custom_build_single:
-    <<: *pytorch_android_params
+    environment:
+      BUILD_ENVIRONMENT: pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-custom-build-single
+      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c"
+      PYTHON_VERSION: "3.6"
    resource_class: large
    machine:
-      image: ubuntu-2004:202104-01
+      image: ubuntu-1604:202007-01
    steps:
    - checkout
    - calculate_docker_image_tag
@ -440,11 +361,11 @@
          echo "DOCKER_IMAGE: ${DOCKER_IMAGE}:${DOCKER_TAG}"
          time docker pull ${DOCKER_IMAGE}:${DOCKER_TAG} >/dev/null

-          git submodule sync && git submodule update -q --init --recursive --depth 1 --jobs 0
+          git submodule sync && git submodule update -q --init --recursive
          VOLUME_MOUNTS="-v /home/circleci/project/:/var/lib/jenkins/workspace"
          export id=$(docker run --env-file "${BASH_ENV}" ${VOLUME_MOUNTS} --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}:${DOCKER_TAG})

-          export COMMAND='((echo "export GRADLE_OFFLINE=1" && echo "export BUILD_LITE_INTERPRETER=${BUILD_LITE_INTERPRETER}" && echo "sudo chown -R jenkins workspace && cd workspace && ./.circleci/scripts/build_android_gradle.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
+          export COMMAND='((echo "export GRADLE_OFFLINE=1" && echo "sudo chown -R jenkins workspace && cd workspace && ./.circleci/scripts/build_android_gradle.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts

          # Skip docker push as this job is purely for size analysis purpose.
@ -456,7 +377,7 @@
  pytorch_ios_build:
    <<: *pytorch_ios_params
    macos:
-      xcode: "12.5.1"
+      xcode: "12.0"
    steps:
      - checkout
      - run_brew_for_ios_build
@ -470,17 +391,16 @@
            # install fastlane
            sudo gem install bundler && bundle install
            # install certificates
-            echo ${IOS_CERT_KEY_2022} >> cert.txt
+            echo ${IOS_CERT_KEY} >> cert.txt
            base64 --decode cert.txt -o Certificates.p12
            rm cert.txt
-            bundle exec fastlane install_root_cert
-            bundle exec fastlane install_dev_cert
+            bundle exec fastlane install_cert
            # install the provisioning profile
-            PROFILE=PyTorch_CI_2022.mobileprovision
+            PROFILE=PyTorch_CI_2021.mobileprovision
            PROVISIONING_PROFILES=~/Library/MobileDevice/Provisioning\ Profiles
            mkdir -pv "${PROVISIONING_PROFILES}"
            cd "${PROVISIONING_PROFILES}"
-            echo ${IOS_SIGN_KEY_2022} >> cert.txt
+            echo ${IOS_SIGN_KEY} >> cert.txt
            base64 --decode cert.txt -o ${PROFILE}
            rm cert.txt
      - run:
@ -510,7 +430,7 @@
            # sync submodules
            cd ${PROJ_ROOT}
            git submodule sync
-            git submodule update --init --recursive --depth 1 --jobs 0
+            git submodule update --init --recursive

            # export
            export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
@ -520,7 +440,6 @@
            echo "IOS_ARCH: ${IOS_ARCH}"
            echo "IOS_PLATFORM: ${IOS_PLATFORM}"
            echo "USE_PYTORCH_METAL": "${USE_METAL}"
-            echo "BUILD_LITE_INTERPRETER": "${BUILD_LITE_INTERPRETER}"

            #check the custom build flag
            echo "SELECTED_OP_LIST: ${SELECTED_OP_LIST}"
@ -539,7 +458,7 @@
          command: |
            set -e
            PROJ_ROOT=/Users/distiller/project
-            PROFILE=PyTorch_CI_2022
+            PROFILE=PyTorch_CI_2021
            # run the ruby build script
            if ! [ -x "$(command -v xcodebuild)" ]; then
              echo 'Error: xcodebuild is not installed.'
@ -567,28 +486,18 @@
            WORKSPACE=/Users/distiller/workspace
            PROJ_ROOT=/Users/distiller/project
            source ~/anaconda/bin/activate
-            # use the pytorch nightly build to generate models
-            conda install pytorch torchvision -c pytorch-nightly --yes
-            # generate models for differnet backends
+            pip install torch torchvision --progress-bar off
+            #run unit test
            cd ${PROJ_ROOT}/ios/TestApp/benchmark
-            mkdir -p ../models
            python trace_model.py
-            if [ ${BUILD_LITE_INTERPRETER} == 1 ]; then
-              ruby setup.rb --lite 1
-            else
-              ruby setup.rb
-            fi
+            ruby setup.rb
            cd ${PROJ_ROOT}/ios/TestApp
            instruments -s -devices
-            if [ ${BUILD_LITE_INTERPRETER} == 1 ]; then
-              fastlane scan --only_testing TestAppTests/TestAppTests/testLiteInterpreter
-            else
-              fastlane scan --only_testing TestAppTests/TestAppTests/testFullJIT
-            fi
+            fastlane scan
  pytorch_linux_bazel_build:
    <<: *pytorch_params
    machine:
-      image: ubuntu-2004:202104-01
+      image: ubuntu-1604:202007-01
    steps:
    - checkout
    - calculate_docker_image_tag
@ -606,7 +515,7 @@

          echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT"

-          git submodule sync && git submodule update -q --init --recursive --depth 1 --jobs 0
+          git submodule sync && git submodule update -q --init --recursive

          docker cp /home/circleci/project/. $id:/var/lib/jenkins/workspace

@ -617,7 +526,7 @@
          # Push intermediate Docker image for next phase to use
          if [ -z "${BUILD_ONLY}" ]; then
            # Augment our output image name with bazel to avoid collisions
-            output_image=${DOCKER_IMAGE}:build-${DOCKER_TAG}-bazel-${CIRCLE_SHA1}
+            output_image=${DOCKER_IMAGE}:${DOCKER_TAG}-bazel-${CIRCLE_SHA1}
            export COMMIT_DOCKER_IMAGE=$output_image
            docker commit "$id" ${COMMIT_DOCKER_IMAGE}
            time docker push ${COMMIT_DOCKER_IMAGE}
@ -626,7 +535,7 @@
  pytorch_linux_bazel_test:
    <<: *pytorch_params
    machine:
-      image: ubuntu-2004:202104-01
+      image: ubuntu-1604:202007-01
    steps:
    - checkout
    - calculate_docker_image_tag
@ -637,7 +546,7 @@
        no_output_timeout: "90m"
        command: |
          set -e
-          output_image=${DOCKER_IMAGE}:build-${DOCKER_TAG}-bazel-${CIRCLE_SHA1}
+          output_image=${DOCKER_IMAGE}:${DOCKER_TAG}-bazel-${CIRCLE_SHA1}
          export COMMIT_DOCKER_IMAGE=$output_image
          echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}

@ -667,26 +576,13 @@
    - store_test_results:
        path: bazel-testlogs

-  pytorch_windows_test_multigpu:
-    machine:
-      image: ubuntu-2004:202104-01
-    steps:
-      - checkout
-      - run:
-          name: Test
-          no_output_timeout: "90m"
-          command: |
-            set -e
-            python3 -m pip install requests
-            python3 ./.circleci/scripts/trigger_azure_pipeline.py
-
  pytorch_doc_test:
    environment:
      BUILD_ENVIRONMENT: pytorch-doc-test
      DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc5.4"
    resource_class: medium
    machine:
-      image: ubuntu-2004:202104-01
+      image: ubuntu-1604:202007-01
    steps:
    - checkout
    - calculate_docker_image_tag
@ -697,7 +593,7 @@
        no_output_timeout: "30m"
        command: |
          set -ex
-          export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}
+          export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}:${DOCKER_TAG}-${CIRCLE_SHA1}
          echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
          time docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null
          export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
--- a/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml
+++ b/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml
@ -2,7 +2,7 @@ jobs:
  pytorch_linux_build:
    <<: *pytorch_params
    machine:
-      image: ubuntu-2004:202104-01
+      image: ubuntu-1604:202007-01
    steps:
    # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml
    - checkout
@ -15,6 +15,9 @@ jobs:
        no_output_timeout: "1h"
        command: |
          set -e
+          if [[ "${DOCKER_IMAGE}" == *rocm3.9* ]]; then
+            export DOCKER_TAG="f3d89a32912f62815e4feaeed47e564e887dffd6"
+          fi
          if [[ ${BUILD_ENVIRONMENT} == *"pure_torch"* ]]; then
            echo 'BUILD_CAFFE2=OFF' >> "${BASH_ENV}"
          fi
@ -30,11 +33,11 @@ jobs:
          time docker pull ${DOCKER_IMAGE}:${DOCKER_TAG} >/dev/null
          export id=$(docker run --env-file "${BASH_ENV}" --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}:${DOCKER_TAG})

-          git submodule sync && git submodule update -q --init --recursive --depth 1 --jobs 0
+          git submodule sync && git submodule update -q --init --recursive

          docker cp /home/circleci/project/. $id:/var/lib/jenkins/workspace

-          export COMMAND='((echo "sudo chown -R jenkins workspace && export JOB_BASE_NAME="$CIRCLE_JOB" && cd workspace && .jenkins/pytorch/build.sh && find ${BUILD_ROOT} -type f -name "*.a" -or -name "*.o" -delete") | docker exec -u jenkins -i "$id" bash) 2>&1'
+          export COMMAND='((echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/build.sh && find ${BUILD_ROOT} -type f -name "*.a" -or -name "*.o" -delete") | docker exec -u jenkins -i "$id" bash) 2>&1'

          echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts

@ -47,7 +50,7 @@ jobs:
            # The xla build uses the same docker image as
            # pytorch_linux_bionic_py3_6_clang9_build. In the push step, we have to
            # distinguish between them so the test can pick up the correct image.
-            output_image=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}
+            output_image=${DOCKER_IMAGE}:${DOCKER_TAG}-${CIRCLE_SHA1}
            if [[ ${BUILD_ENVIRONMENT} == *"xla"* ]]; then
              export COMMIT_DOCKER_IMAGE=$output_image-xla
            elif [[ ${BUILD_ENVIRONMENT} == *"libtorch"* ]]; then
@ -74,21 +77,13 @@ jobs:
            docker commit "$id" ${COMMIT_DOCKER_IMAGE}
            time docker push ${COMMIT_DOCKER_IMAGE}
          fi
-    - run:
-        name: upload build & binary data
-        no_output_timeout: "5m"
-        command: |
-            cd /pytorch && export COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
-            python3 -mpip install requests && \
-            SCRIBE_GRAPHQL_ACCESS_TOKEN=${SCRIBE_GRAPHQL_ACCESS_TOKEN} \
-            python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
    - store_artifacts:
        path: /home/circleci/project/dist

  pytorch_linux_test:
    <<: *pytorch_params
    machine:
-      image: ubuntu-2004:202104-01
+      image: ubuntu-1604:202007-01
    steps:
    # See Note [Workspace for CircleCI scripts] in job-specs-setup.yml
    - checkout
@ -105,7 +100,7 @@ jobs:
            export DOCKER_TAG="f3d89a32912f62815e4feaeed47e564e887dffd6"
          fi
          # See Note [Special build images]
-          output_image=${DOCKER_IMAGE}:build-${DOCKER_TAG}-${CIRCLE_SHA1}
+          output_image=${DOCKER_IMAGE}:${DOCKER_TAG}-${CIRCLE_SHA1}
          if [[ ${BUILD_ENVIRONMENT} == *"xla"* ]]; then
            export COMMIT_DOCKER_IMAGE=$output_image-xla
          elif [[ ${BUILD_ENVIRONMENT} == *"libtorch"* ]]; then
@ -158,14 +153,13 @@ jobs:
          }

          if is_vanilla_build; then
-            echo "apt-get update || apt-get install libgnutls30" | docker exec -u root -i "$id" bash
-            echo "apt-get install -y qemu-user gdb" | docker exec -u root -i "$id" bash
+            echo "apt-get update && apt-get install -y qemu-user gdb" | docker exec -u root -i "$id" bash
            echo "cd workspace/build; qemu-x86_64 -g 2345 -cpu Broadwell -E ATEN_CPU_CAPABILITY=default ./bin/basic --gtest_filter=BasicTest.BasicTestCPU & gdb ./bin/basic -ex 'set pagination off' -ex 'target remote :2345' -ex 'continue' -ex 'bt' -ex='set confirm off' -ex 'quit \$_isvoid(\$_exitcode)'" | docker exec -u jenkins -i "$id" bash
          else
            echo "Skipping for ${BUILD_ENVIRONMENT}"
          fi
    - run:
-        name: Test
+        name: Run tests
        no_output_timeout: "90m"
        command: |
          set -e
@ -174,16 +168,6 @@ jobs:
          # =================== The following code will be executed inside Docker container ===================
          set -ex
          export SCRIBE_GRAPHQL_ACCESS_TOKEN="${SCRIBE_GRAPHQL_ACCESS_TOKEN}"
-          export JOB_BASE_NAME="$CIRCLE_JOB"
-          # temporary fix for https://github.com/pytorch/pytorch/issues/60746
-          if [ -z "$CIRCLE_PR_NUMBER" ]; then
-            if [[ $CIRCLE_BRANCH =~ .*pull.* ]]; then
-              export PR_NUMBER="$(echo $CIRCLE_BRANCH | sed 's/[^0-9]//g')"
-              export CIRCLE_PR_NUMBER="$PR_NUMBER"
-            fi
-          else
-            export PR_NUMBER="$CIRCLE_PR_NUMBER"
-          fi
          ${PARALLEL_FLAGS}
          cd workspace
          EOL
@ -198,27 +182,11 @@ jobs:
          fi
          echo "(cat docker_commands.sh | docker exec -u jenkins -i "$id" bash) 2>&1" > command.sh
          unbuffer bash command.sh | ts
-
-          if [[ ${BUILD_ENVIRONMENT} == *"coverage"* ]]; then
-              echo "Retrieving C++ coverage report"
-              docker cp $id:/var/lib/jenkins/workspace/build/coverage.info ./test
-          fi
-          if [[ ${BUILD_ENVIRONMENT} == *"coverage"* || ${BUILD_ENVIRONMENT} == *"onnx"* ]]; then
-              echo "Retrieving Python coverage report"
-              docker cp $id:/var/lib/jenkins/workspace/test/.coverage ./test
-              docker cp $id:/var/lib/jenkins/workspace/test/coverage.xml ./test
-              python3 -mpip install codecov
-              python3 -mcodecov
-          fi
    - run:
        name: Report results
        no_output_timeout: "5m"
        command: |
          set -e
-          # Retrieving test results should be done as very first step as command never fails
-          # But is always executed if previous step fails for some reason
-          echo "Retrieving test reports"
-          docker cp $id:/var/lib/jenkins/workspace/test/test-reports ./ || echo 'No test reports found!'
          docker stats --all --no-stream

          cat >docker_commands.sh \<<EOL
@ -230,20 +198,30 @@ jobs:
          export CIRCLE_SHA1="$CIRCLE_SHA1"
          export CIRCLE_PR_NUMBER="${CIRCLE_PR_NUMBER:-}"
          export CIRCLE_BRANCH="$CIRCLE_BRANCH"
-          export JOB_BASE_NAME="$CIRCLE_JOB"
+          export CIRCLE_JOB="$CIRCLE_JOB"
          export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"
          cd workspace
-          python -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
+          python test/print_test_stats.py --upload-to-s3 test
          EOL
-          echo "(cat docker_commands.sh | docker exec -u jenkins -e LANG=C.UTF-8 -i "$id" bash) 2>&1" > command.sh
+          echo "(cat docker_commands.sh | docker exec -u jenkins -i "$id" bash) 2>&1" > command.sh
          unbuffer bash command.sh | ts
+
+          echo "Retrieving test reports"
+          docker cp $id:/var/lib/jenkins/workspace/test/test-reports ./ || echo 'No test reports found!'
+          if [[ ${BUILD_ENVIRONMENT} == *"coverage"* ]]; then
+              echo "Retrieving C++ coverage report"
+              docker cp $id:/var/lib/jenkins/workspace/build/coverage.info ./test
+          fi
+          if [[ ${BUILD_ENVIRONMENT} == *"coverage"* || ${BUILD_ENVIRONMENT} == *"onnx"* ]]; then
+              echo "Retrieving Python coverage report"
+              docker cp $id:/var/lib/jenkins/workspace/test/.coverage ./test
+              docker cp $id:/var/lib/jenkins/workspace/test/coverage.xml ./test
+              python3 -mpip install codecov
+              python3 -mcodecov
+          fi
        when: always
    - store_test_results:
        path: test-reports
-    - store_artifacts:
-        path: test/.coverage
-    - store_artifacts:
-        path: test/coverage.xml

  pytorch_windows_build:
    <<: *pytorch_windows_params
@ -262,10 +240,7 @@ jobs:
        default: "10.1"
      python_version:
        type: string
-        default: "3.8"
-      vs_version:
-        type: string
-        default: "16.8.6"
+        default: "3.6"
      vc_version:
        type: string
        default: "14.16"
@ -282,10 +257,10 @@ jobs:
    steps:
      - checkout
      - run:
-          name: Install VS2019 toolchain
-          no_output_timeout: 10m
+          name: _HACK_ Install CUDA compatible cmath
+          no_output_timeout: 1m
          command: |
-              powershell .circleci/scripts/vs_install.ps1
+              powershell .circleci/scripts/vs_install_cmath.ps1
      - run:
          name: Install Cuda
          no_output_timeout: 30m
@ -332,10 +307,7 @@ jobs:
        default: "10.1"
      python_version:
        type: string
-        default: "3.8"
-      vs_version:
-        type: string
-        default: "16.8.6"
+        default: "3.6"
      vc_version:
        type: string
        default: "14.16"
@ -353,11 +325,6 @@ jobs:
      - checkout
      - attach_workspace:
          at: c:/users/circleci/workspace
-      - run:
-          name: Install VS2019 toolchain
-          no_output_timeout: 10m
-          command: |
-              powershell .circleci/scripts/vs_install.ps1
      - run:
          name: Install Cuda
          no_output_timeout: 30m
@ -384,17 +351,5 @@ jobs:
            export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_WIN_BUILD_V1}
            set -x
            .jenkins/pytorch/win-test.sh
-      - run:
-          name: Report results
-          no_output_timeout: "5m"
-          command: |
-            set -ex
-            export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_WIN_BUILD_V1}
-            export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_WIN_BUILD_V1}
-            pip install typing_extensions boto3
-            python -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
-          when: always
      - store_test_results:
          path: test/test-reports
-      - store_artifacts:
-          path: test/coverage.xml
--- a/.circleci/verbatim-sources/workflows/workflows-scheduled-ci.yml
+++ b/.circleci/verbatim-sources/workflows/workflows-scheduled-ci.yml
@ -1,37 +0,0 @@
-  # the following clones pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7's tests but enables
-  # slow tests and sets an environment variable so gradcheck runs with fast_mode=False
-  slow-gradcheck-scheduled-ci:
-    triggers:
-      - schedule:
-          # runs every 8 hours on the 45th minute
-          cron: "45 0,8,16 * * *"
-          filters:
-            branches:
-              only:
-                - master
-    jobs:
-      - docker_build_job:
-          name: "docker-pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
-          image_name: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
-      - pytorch_linux_build:
-          name: periodic_pytorch_xenial_cuda10_2_cudnn7_gcc7_build
-          requires:
-            - "docker-pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
-          build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-build"
-          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
-      - pytorch_linux_test:
-          name: periodic_pytorch_xenial_cuda10_2_cudnn7_gcc7_old_gradcheck_test1
-          requires:
-            - periodic_pytorch_xenial_cuda10_2_cudnn7_gcc7_build
-          build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-old-gradcheck-test1"
-          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
-          use_cuda_docker_runtime: "1"
-          resource_class: gpu.medium
-      - pytorch_linux_test:
-          name: periodic_pytorch_xenial_cuda10_2_cudnn7_gcc7_old_gradcheck_test2
-          requires:
-            - periodic_pytorch_xenial_cuda10_2_cudnn7_gcc7_build
-          build_environment: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7-old-gradcheck-test2"
-          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
-          use_cuda_docker_runtime: "1"
-          resource_class: gpu.medium
--- a/.circleci/windows-jni/include/jni.h
+++ b/.circleci/windows-jni/include/jni.h
@ -1129,3 +1129,4 @@ JNIEXPORT void JNI_OnUnload(JavaVM* vm, void* reserved);
 #define JNI_ABORT       2           /* free buffer w/o copying back */

 #endif  /* JNI_H_ */
+
--- a/.clang-tidy
+++ b/.clang-tidy
@ -6,12 +6,8 @@ bugprone-*,
 -bugprone-forward-declaration-namespace,
 -bugprone-macro-parentheses,
 -bugprone-lambda-function-name,
-bugprone-reserved-identifier,
 cppcoreguidelines-*,
-cppcoreguidelines-avoid-magic-numbers,
-cppcoreguidelines-avoid-non-const-global-variables,
 -cppcoreguidelines-interfaces-global-init,
-cppcoreguidelines-macro-usage,
 -cppcoreguidelines-owning-memory,
 -cppcoreguidelines-pro-bounds-array-to-pointer-decay,
 -cppcoreguidelines-pro-bounds-constant-array-index,
@ -22,7 +18,6 @@ cppcoreguidelines-*,
 -cppcoreguidelines-pro-type-union-access,
 -cppcoreguidelines-pro-type-vararg,
 -cppcoreguidelines-special-member-functions,
-cppcoreguidelines-non-private-member-variables-in-classes,
 -facebook-hte-RelativeInclude,
 hicpp-exception-baseclass,
 hicpp-avoid-goto,
@ -35,10 +30,8 @@ modernize-*,
 -modernize-use-trailing-return-type,
 performance-*,
 -performance-noexcept-move-constructor,
-performance-unnecessary-value-param,
 '
 HeaderFilterRegex: 'torch/csrc/.*'
 AnalyzeTemporaryDtors: false
-WarningsAsErrors: '*'
 CheckOptions:
 ...
--- a/.coveragerc
+++ b/.coveragerc
@ -1,15 +0,0 @@
-[run]
-plugins =
-    coverage_plugins.jit_plugin
-omit =
-    */tmp*
-    */Temp/*
-    */usr/local/lib*
-    *test/*
-
-[report]
-omit =
-    */tmp*
-    */Temp/*
-    */usr/local/lib*
-    *test/*
--- a/.flake8
+++ b/.flake8
@ -4,7 +4,7 @@ max-line-length = 120
 # C408 ignored because we like the dict keyword argument syntax
 # E501 is not flexible enough, we're using B950 instead
 ignore =
-    E203,E305,E402,E501,E721,E741,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303,
+    E203,E305,E402,E501,E721,E741,F403,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303,
    # shebang has extra meaning in fbcode lints, so I think it's not worth trying
    # to line this up with executable bit
    EXE001,
@ -13,20 +13,21 @@ ignore =
    # these ignores are from flake8-comprehensions; please fix!
    C400,C401,C402,C403,C404,C405,C407,C411,C413,C414,C415
 per-file-ignores = __init__.py: F401 torch/utils/cpp_extension.py: B950
-optional-ascii-coding = True
 exclude =
-    ./.git,
-    ./build_code_analyzer,
-    ./build_test_custom_build,
-    ./build,
-    ./caffe2,
-    ./docs/caffe2,
-    ./docs/cpp/src,
-    ./docs/src,
-    ./scripts,
-    ./test/generated_type_hints_smoketest.py,
-    ./third_party,
-    ./torch/include,
-    ./torch/lib,
-    ./venv,
-    *.pyi
+    docs/src,
+    docs/cpp/src,
+    venv,
+    third_party,
+    caffe2,
+    scripts,
+    docs/caffe2,
+    torch/lib/include,
+    torch/lib/tmp_install,
+    build,
+    torch/include,
+    *.pyi,
+    .git,
+    build,
+    build_test_custom_build,
+    build_code_analyzer,
+    test/generated_type_hints_smoketest.py
--- a/.gdbinit
+++ b/.gdbinit
@ -1,14 +0,0 @@
-# automatically load the pytoch-gdb extension.
-#
-# gdb automatically tries to load this file whenever it is executed from the
-# root of the pytorch repo, but by default it is not allowed to do so due to
-# security reasons. If you want to use pytorch-gdb, please add the following
-# line to your ~/.gdbinit (i.e., the .gdbinit file which is in your home
-# directory, NOT this file):
-#    add-auto-load-safe-path /path/to/pytorch/.gdbinit
-#
-# Alternatively, you can manually load the pytorch-gdb commands into your
-# existing gdb session by doing the following:
-#    (gdb) source /path/to/pytorch/tools/gdb/pytorch-gdb.py
-
-source tools/gdb/pytorch-gdb.py
--- a/.gitattributes
+++ b/.gitattributes
@ -1,4 +1 @@
-*.bat text eol=crlf
-.circleci/config.yml linguist-generated=true
-.github/workflows/generated-*.yml linguist-generated=true
-.github/generated-* linguist-generated=true
+*.bat	text eol=crlf
--- a/.github/ISSUE_TEMPLATE/feature-request.md
+++ b/.github/ISSUE_TEMPLATE/feature-request.md
@ -1,5 +1,5 @@
 ---
-name: "\U0001F680 Feature Request"
+name: "\U0001F680Feature Request"
 about: Submit a proposal/request for a new PyTorch feature

 ---
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@ -1,8 +0,0 @@
-self-hosted-runner:
-  labels:
-    - linux.2xlarge
-    - linux.8xlarge.nvidia.gpu
-    - linux.16xlarge.nvidia.gpu
-    - windows.4xlarge
-    - windows.8xlarge.nvidia.gpu
-    - bm-runner
--- a/.github/generated-ciflow-ruleset.json
+++ b/.github/generated-ciflow-ruleset.json
@ -1,102 +0,0 @@
-{
-  "__comment": "@generated DO NOT EDIT MANUALLY, Generation script: .github/scripts/generate_ci_workflows.py",
-  "label_rules": {
-    "ciflow/all": [
-      "libtorch-linux-xenial-cuda10.2-py3.6-gcc7",
-      "libtorch-linux-xenial-cuda11.3-py3.6-gcc7",
-      "linux-bionic-cuda10.2-py3.9-gcc7",
-      "linux-bionic-py3.6-clang9",
-      "linux-bionic-py3.8-gcc9-coverage",
-      "linux-xenial-cuda10.2-py3.6-gcc7",
-      "linux-xenial-cuda11.3-py3.6-gcc7",
-      "linux-xenial-py3.6-gcc5.4",
-      "linux-xenial-py3.6-gcc7-bazel-test",
-      "parallelnative-linux-xenial-py3.6-gcc5.4",
-      "periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7",
-      "periodic-linux-xenial-cuda11.1-py3.6-gcc7",
-      "periodic-win-vs2019-cuda11.1-py3",
-      "puretorch-linux-xenial-py3.6-gcc5.4",
-      "win-vs2019-cpu-py3",
-      "win-vs2019-cuda10.2-py3",
-      "win-vs2019-cuda11.3-py3"
-    ],
-    "ciflow/bazel": [
-      "linux-xenial-py3.6-gcc7-bazel-test"
-    ],
-    "ciflow/coverage": [
-      "linux-bionic-py3.8-gcc9-coverage"
-    ],
-    "ciflow/cpu": [
-      "linux-bionic-py3.6-clang9",
-      "linux-bionic-py3.8-gcc9-coverage",
-      "linux-xenial-py3.6-gcc5.4",
-      "linux-xenial-py3.6-gcc7-bazel-test",
-      "parallelnative-linux-xenial-py3.6-gcc5.4",
-      "puretorch-linux-xenial-py3.6-gcc5.4",
-      "win-vs2019-cpu-py3"
-    ],
-    "ciflow/cuda": [
-      "libtorch-linux-xenial-cuda10.2-py3.6-gcc7",
-      "libtorch-linux-xenial-cuda11.3-py3.6-gcc7",
-      "linux-bionic-cuda10.2-py3.9-gcc7",
-      "linux-xenial-cuda10.2-py3.6-gcc7",
-      "linux-xenial-cuda11.3-py3.6-gcc7",
-      "periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7",
-      "periodic-linux-xenial-cuda11.1-py3.6-gcc7",
-      "periodic-win-vs2019-cuda11.1-py3",
-      "win-vs2019-cuda10.2-py3",
-      "win-vs2019-cuda11.3-py3"
-    ],
-    "ciflow/default": [
-      "linux-bionic-py3.6-clang9",
-      "linux-bionic-py3.8-gcc9-coverage",
-      "linux-xenial-cuda11.3-py3.6-gcc7",
-      "linux-xenial-py3.6-gcc5.4",
-      "linux-xenial-py3.6-gcc7-bazel-test",
-      "win-vs2019-cpu-py3",
-      "win-vs2019-cuda11.3-py3"
-    ],
-    "ciflow/libtorch": [
-      "libtorch-linux-xenial-cuda10.2-py3.6-gcc7",
-      "libtorch-linux-xenial-cuda11.3-py3.6-gcc7",
-      "periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7"
-    ],
-    "ciflow/linux": [
-      "libtorch-linux-xenial-cuda10.2-py3.6-gcc7",
-      "libtorch-linux-xenial-cuda11.3-py3.6-gcc7",
-      "linux-bionic-cuda10.2-py3.9-gcc7",
-      "linux-bionic-py3.6-clang9",
-      "linux-bionic-py3.8-gcc9-coverage",
-      "linux-xenial-cuda10.2-py3.6-gcc7",
-      "linux-xenial-cuda11.3-py3.6-gcc7",
-      "linux-xenial-py3.6-gcc5.4",
-      "linux-xenial-py3.6-gcc7-bazel-test",
-      "parallelnative-linux-xenial-py3.6-gcc5.4",
-      "periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7",
-      "periodic-linux-xenial-cuda11.1-py3.6-gcc7",
-      "puretorch-linux-xenial-py3.6-gcc5.4"
-    ],
-    "ciflow/noarch": [
-      "linux-bionic-py3.6-clang9"
-    ],
-    "ciflow/scheduled": [
-      "periodic-libtorch-linux-xenial-cuda11.1-py3.6-gcc7",
-      "periodic-linux-xenial-cuda11.1-py3.6-gcc7",
-      "periodic-win-vs2019-cuda11.1-py3"
-    ],
-    "ciflow/slow": [
-      "linux-bionic-cuda10.2-py3.9-gcc7",
-      "linux-xenial-cuda10.2-py3.6-gcc7"
-    ],
-    "ciflow/win": [
-      "periodic-win-vs2019-cuda11.1-py3",
-      "win-vs2019-cpu-py3",
-      "win-vs2019-cuda10.2-py3",
-      "win-vs2019-cuda11.3-py3"
-    ],
-    "ciflow/xla": [
-      "linux-bionic-py3.6-clang9"
-    ]
-  },
-  "version": "v1"
-}
--- a/.github/pytorch-circleci-labels.yml
+++ b/.github/pytorch-circleci-labels.yml
@ -11,11 +11,3 @@ labels_to_circle_params:
        - v[0-9]+(\.[0-9]+)*-rc[0-9]+
    set_to_false:
      - run_build
-  ci/master:
-    parameter: run_master_build
-    set_to_false:
-      - run_build
-  ci/slow-gradcheck:
-    parameter: run_slow_gradcheck_build
-    set_to_false:
-      - run_build
--- a/.github/pytorch-probot.yml
+++ b/.github/pytorch-probot.yml
@ -1,2 +1 @@
 tracking_issue: 24422
-ciflow_tracking_issue: 64124
--- a/.github/regenerate.sh
+++ b/.github/regenerate.sh
@ -1,6 +0,0 @@
-#!/bin/bash -e
-
-# Allows this script to be invoked from any directory:
-cd "$(dirname "$0")"
-
-python3 scripts/generate_ci_workflows.py
--- a/Show More
+++ b/Show More
 @ -1 +1 @@
 .2.1
 .1.0