Move GHA to ONNX (#65975)

Summary: - Delete CircleCI ONNX config - Add sharded ONNX job to the list of generated workflows - Move ONNX runtime installation from `pytorch-job-specs.yml` to `.jenkins/caffe2/test.sh` - Limit MKLDNN to AVX2 ISA while running Caffe2 tests Pull Request resolved: https://github.com/pytorch/pytorch/pull/65975 Reviewed By: seemethere Differential Revision: D31327206 Pulled By: malfet fbshipit-source-id: 15aa53e4481e846c62b4ee2db5c03047d68679a4
2025-10-20 21:14:14 +08:00 · 2021-10-05 09:29:27 -07:00
parent df475aa1dc
commit 1932bc69e9
17 changed files with 563 additions and 41 deletions
--- a/.circleci/cimodel/data/pytorch_build_data.py
+++ b/.circleci/cimodel/data/pytorch_build_data.py
@ -12,13 +12,6 @@ CONFIG_TREE_DATA = [
            # TODO: bring back libtorch test
            ("7", [X("3.6")]),
        ]),
-        ("clang", [
-            ("7", [
-                ("3.6", [
-                    ("onnx", [XImportant(True)]),
-                ]),
-            ]),
-        ]),
        ("cuda", [
            ("10.2", [
                ("3.6", [
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -664,8 +664,6 @@ jobs:
          if [[ ${BUILD_ENVIRONMENT} == *"multigpu"* ]]; then
            echo ".jenkins/pytorch/multigpu-test.sh" >> docker_commands.sh
          elif [[ ${BUILD_ENVIRONMENT} == *onnx* ]]; then
-            echo "pip install click mock tabulate networkx==2.0" >> docker_commands.sh
-            echo "pip -q install --user \"file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx\"" >> docker_commands.sh
            echo ".jenkins/caffe2/test.sh" >> docker_commands.sh
          else
            echo ".jenkins/pytorch/test.sh" >> docker_commands.sh
@ -6620,26 +6618,6 @@ workflows:
          build_environment: "pytorch-linux-pytorch_linux_xenial_py3_6_gcc7_distributed-test"
          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-gcc7"
          resource_class: large
-      - pytorch_linux_build:
-          name: pytorch_linux_xenial_py3_clang7_onnx_build
-          requires:
-            - "docker-pytorch-linux-xenial-py3-clang7-onnx"
-          build_environment: "pytorch-linux-xenial-py3-clang7-onnx-build"
-          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang7-onnx"
-      - pytorch_linux_test:
-          name: pytorch_linux_xenial_py3_clang7_onnx_ort_test1
-          requires:
-            - pytorch_linux_xenial_py3_clang7_onnx_build
-          build_environment: "pytorch-linux-xenial-py3-clang7-onnx-ort_test1"
-          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang7-onnx"
-          resource_class: large
-      - pytorch_linux_test:
-          name: pytorch_linux_xenial_py3_clang7_onnx_ort_test2
-          requires:
-            - pytorch_linux_xenial_py3_clang7_onnx_build
-          build_environment: "pytorch-linux-xenial-py3-clang7-onnx-ort_test2"
-          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang7-onnx"
-          resource_class: large
      - pytorch_linux_build:
          name: pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_build
          requires:
@ -8409,9 +8387,6 @@ workflows:
      - docker_build_job:
          name: "docker-pytorch-linux-xenial-py3-clang5-asan"
          image_name: "pytorch-linux-xenial-py3-clang5-asan"
-      - docker_build_job:
-          name: "docker-pytorch-linux-xenial-py3-clang7-onnx"
-          image_name: "pytorch-linux-xenial-py3-clang7-onnx"
      - docker_build_job:
          name: "docker-pytorch-linux-xenial-py3.6-gcc5.4"
          image_name: "pytorch-linux-xenial-py3.6-gcc5.4"
--- a/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml
+++ b/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml
@ -190,8 +190,6 @@ jobs:
          if [[ ${BUILD_ENVIRONMENT} == *"multigpu"* ]]; then
            echo ".jenkins/pytorch/multigpu-test.sh" >> docker_commands.sh
          elif [[ ${BUILD_ENVIRONMENT} == *onnx* ]]; then
-            echo "pip install click mock tabulate networkx==2.0" >> docker_commands.sh
-            echo "pip -q install --user \"file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx\"" >> docker_commands.sh
            echo ".jenkins/caffe2/test.sh" >> docker_commands.sh
          else
            echo ".jenkins/pytorch/test.sh" >> docker_commands.sh
--- a/.github/generated-ciflow-ruleset.json
+++ b/.github/generated-ciflow-ruleset.json
@ -9,6 +9,7 @@
      "linux-xenial-cuda10.2-py3.6-gcc7",
      "linux-xenial-cuda11.3-py3.6-gcc7",
      "linux-xenial-py3.6-clang7-asan",
+      "linux-xenial-py3.6-clang7-onnx",
      "linux-xenial-py3.6-gcc5.4",
      "linux-xenial-py3.6-gcc7-bazel-test",
      "parallelnative-linux-xenial-py3.6-gcc5.4",
@ -25,6 +26,7 @@
    "ciflow/cpu": [
      "linux-bionic-py3.6-clang9",
      "linux-xenial-py3.6-clang7-asan",
+      "linux-xenial-py3.6-clang7-onnx",
      "linux-xenial-py3.6-gcc5.4",
      "linux-xenial-py3.6-gcc7-bazel-test",
      "parallelnative-linux-xenial-py3.6-gcc5.4",
@ -46,6 +48,7 @@
      "linux-bionic-py3.6-clang9",
      "linux-xenial-cuda11.3-py3.6-gcc7",
      "linux-xenial-py3.6-clang7-asan",
+      "linux-xenial-py3.6-clang7-onnx",
      "linux-xenial-py3.6-gcc5.4",
      "linux-xenial-py3.6-gcc7-bazel-test",
      "win-vs2019-cpu-py3",
@ -64,6 +67,7 @@
      "linux-xenial-cuda10.2-py3.6-gcc7",
      "linux-xenial-cuda11.3-py3.6-gcc7",
      "linux-xenial-py3.6-clang7-asan",
+      "linux-xenial-py3.6-clang7-onnx",
      "linux-xenial-py3.6-gcc5.4",
      "linux-xenial-py3.6-gcc7-bazel-test",
      "parallelnative-linux-xenial-py3.6-gcc5.4",
@ -74,6 +78,9 @@
    "ciflow/noarch": [
      "linux-bionic-py3.6-clang9"
    ],
+    "ciflow/onnx": [
+      "linux-xenial-py3.6-clang7-onnx"
+    ],
    "ciflow/sanitizers": [
      "linux-xenial-py3.6-clang7-asan"
    ],
--- a/.github/scripts/generate_ci_workflows.py
+++ b/.github/scripts/generate_ci_workflows.py
@ -47,6 +47,7 @@ LABEL_CIFLOW_DEFAULT = "ciflow/default"
 LABEL_CIFLOW_LIBTORCH = "ciflow/libtorch"
 LABEL_CIFLOW_LINUX = "ciflow/linux"
 LABEL_CIFLOW_SANITIZERS = "ciflow/sanitizers"
+LABEL_CIFLOW_ONNX = "ciflow/onnx"
 LABEL_CIFLOW_SCHEDULED = "ciflow/scheduled"
 LABEL_CIFLOW_SLOW = "ciflow/slow"
 LABEL_CIFLOW_WIN = "ciflow/win"
@ -329,18 +330,24 @@ LINUX_WORKFLOWS = [
            labels={LABEL_CIFLOW_DEFAULT, LABEL_CIFLOW_LINUX, LABEL_CIFLOW_SANITIZERS, LABEL_CIFLOW_CPU},
        ),
    ),
+    CIWorkflow(
+        arch="linux",
+        build_environment="linux-xenial-py3.6-clang7-onnx",
+        docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang7-onnx",
+        test_runner_type=LINUX_CPU_TEST_RUNNER,
+        num_test_shards=2,
+        distributed_test=False,
+        ciflow_config=CIFlowConfig(
+            enabled=True,
+            labels={LABEL_CIFLOW_DEFAULT, LABEL_CIFLOW_LINUX, LABEL_CIFLOW_ONNX, LABEL_CIFLOW_CPU},
+        ),
+    ),
    # CIWorkflow(
    #     arch="linux",
    #     build_environment="linux-xenial-py3.6-gcc7",
    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3.6-gcc7",
    #     test_runner_type=LINUX_CPU_TEST_RUNNER,
    # ),
-    # CIWorkflow(
-    #     arch="linux",
-    #     build_environment="linux-xenial-py3.6-clang7-onnx",
-    #     docker_image_base=f"{DOCKER_REGISTRY}/pytorch/pytorch-linux-xenial-py3-clang7-onnx",
-    #     test_runner_type=LINUX_CPU_TEST_RUNNER,
-    # ),
    CIWorkflow(
        arch="linux",
        build_environment="linux-bionic-cuda10.2-py3.9-gcc7",
--- a/.github/templates/linux_ci_workflow.yml.j2
+++ b/.github/templates/linux_ci_workflow.yml.j2
@ -233,6 +233,8 @@ jobs:
        run: |
          if [[ $TEST_CONFIG == 'multigpu' ]]; then
            TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
+          elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
+            TEST_COMMAND=.jenkins/caffe2/test.sh
          else
            TEST_COMMAND=.jenkins/pytorch/test.sh
          fi
--- a/.github/workflows/generated-linux-bionic-cuda10.2-py3.9-gcc7.yml
+++ b/.github/workflows/generated-linux-bionic-cuda10.2-py3.9-gcc7.yml
@ -396,6 +396,8 @@ jobs:
        run: |
          if [[ $TEST_CONFIG == 'multigpu' ]]; then
            TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
+          elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
+            TEST_COMMAND=.jenkins/caffe2/test.sh
          else
            TEST_COMMAND=.jenkins/pytorch/test.sh
          fi
--- a/.github/workflows/generated-linux-bionic-py3.6-clang9.yml
+++ b/.github/workflows/generated-linux-bionic-py3.6-clang9.yml
@ -396,6 +396,8 @@ jobs:
        run: |
          if [[ $TEST_CONFIG == 'multigpu' ]]; then
            TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
+          elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
+            TEST_COMMAND=.jenkins/caffe2/test.sh
          else
            TEST_COMMAND=.jenkins/pytorch/test.sh
          fi
--- a/.github/workflows/generated-linux-xenial-cuda10.2-py3.6-gcc7.yml
+++ b/.github/workflows/generated-linux-xenial-cuda10.2-py3.6-gcc7.yml
@ -396,6 +396,8 @@ jobs:
        run: |
          if [[ $TEST_CONFIG == 'multigpu' ]]; then
            TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
+          elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
+            TEST_COMMAND=.jenkins/caffe2/test.sh
          else
            TEST_COMMAND=.jenkins/pytorch/test.sh
          fi
--- a/.github/workflows/generated-linux-xenial-cuda11.3-py3.6-gcc7.yml
+++ b/.github/workflows/generated-linux-xenial-cuda11.3-py3.6-gcc7.yml
@ -396,6 +396,8 @@ jobs:
        run: |
          if [[ $TEST_CONFIG == 'multigpu' ]]; then
            TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
+          elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
+            TEST_COMMAND=.jenkins/caffe2/test.sh
          else
            TEST_COMMAND=.jenkins/pytorch/test.sh
          fi
--- a/.github/workflows/generated-linux-xenial-py3.6-clang7-asan.yml
+++ b/.github/workflows/generated-linux-xenial-py3.6-clang7-asan.yml
@ -396,6 +396,8 @@ jobs:
        run: |
          if [[ $TEST_CONFIG == 'multigpu' ]]; then
            TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
+          elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
+            TEST_COMMAND=.jenkins/caffe2/test.sh
          else
            TEST_COMMAND=.jenkins/pytorch/test.sh
          fi
--- a/.github/workflows/generated-linux-xenial-py3.6-clang7-onnx.yml
+++ b/.github/workflows/generated-linux-xenial-py3.6-clang7-onnx.yml
@ -0,0 +1,514 @@
+# @generated DO NOT EDIT MANUALLY
+# Template is at:    .github/templates/linux_ci_workflow.yml.j2
+# Generation script: .github/scripts/generate_ci_workflows.py
+name: linux-xenial-py3.6-clang7-onnx
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, unassigned]
+  push:
+    branches:
+      - master
+      - release/*
+  workflow_dispatch:
+
+env:
+  BUILD_ENVIRONMENT: linux-xenial-py3.6-clang7-onnx
+  DOCKER_IMAGE_BASE: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang7-onnx
+  SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
+  XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
+  TORCH_CUDA_ARCH_LIST: 5.2
+  IN_CI: 1
+  IS_GHA: 1
+  # This is used for the phase of adding wheel tests only, will be removed once completed
+  IN_WHEEL_TEST: 1
+  # Used for custom_opertor, jit_hooks, custom_backend, see .jenkins/pytorch/build.sh
+  CUSTOM_TEST_ARTIFACT_BUILD_DIR: build/custom_test_artifacts
+  ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
+  PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
+  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+  AWS_DEFAULT_REGION: us-east-1
+  CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
+  CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
+concurrency:
+  group: linux-xenial-py3.6-clang7-onnx-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
+
+jobs:
+
+  ciflow_should_run:
+    runs-on: ubuntu-18.04
+    env:
+      IS_PROBOT_TRIGGER_EVENT: ${{ (github.event.action == 'unassigned') && (github.event.assigneed.login == 'pytorchbot') }}
+      LABEL_CONDITIONS: ${{ contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/cpu') || contains(github.event.pull_request.labels.*.name, 'ciflow/default') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux') || contains(github.event.pull_request.labels.*.name, 'ciflow/onnx') }}
+      LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }}
+    if: ${{ (github.repository == 'pytorch/pytorch') && (
+            (github.event_name == 'push') ||
+            (contains(github.event.pull_request.labels.*.name, 'ciflow/all') || contains(github.event.pull_request.labels.*.name, 'ciflow/cpu') || contains(github.event.pull_request.labels.*.name, 'ciflow/default') || contains(github.event.pull_request.labels.*.name, 'ciflow/linux') || contains(github.event.pull_request.labels.*.name, 'ciflow/onnx')) ||
+            ((github.event_name == 'pull_request' && github.event.action != 'unassigned') && !contains(join(github.event.pull_request.labels.*.name), 'ciflow/')))
+         }}
+    steps:
+      - name: noop
+        run: echo running ciflow_should_run
+      - name: print labels
+        run: echo "${LABELS}"
+
+  build:
+    runs-on: linux.2xlarge
+    needs: [ciflow_should_run]
+    env:
+      JOB_BASE_NAME: linux-xenial-py3.6-clang7-onnx-build
+    outputs:
+      docker_image: ${{ steps.calculate-tag.outputs.docker_image }}
+    steps:
+      - name: Display EC2 information
+        shell: bash
+        run: |
+          set -euo pipefail
+          function get_ec2_metadata() {
+            # Pulled from instance metadata endpoint for EC2
+            # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
+            category=$1
+            curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
+          }
+          echo "ami-id: $(get_ec2_metadata ami-id)"
+          echo "instance-id: $(get_ec2_metadata instance-id)"
+          echo "instance-type: $(get_ec2_metadata instance-type)"
+      - name: Log in to ECR
+        env:
+          AWS_RETRY_MODE: standard
+          AWS_MAX_ATTEMPTS: 5
+        run: |
+          aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
+          bash /tmp/ecr-login.sh
+          rm /tmp/ecr-login.sh
+      - name: Chown workspace
+        env:
+          ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
+        run: |
+          retry () {
+              "$@"  || (sleep 1 && "$@") || (sleep 2 && "$@")
+          }
+          retry docker pull "${ALPINE_IMAGE}"
+          # Ensure the working directory gets chowned back to the current user
+          docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
+      - name: Clean workspace
+        run: |
+          rm -rf "${GITHUB_WORKSPACE:?}/*"
+          rm -f ~/.ssh/authorized_keys
+      - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
+        uses: seemethere/add-github-ssh-key@v1
+        with:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      - name: Preserve github env variables for use in docker
+        run: |
+          env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
+      - name: Checkout PyTorch
+        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
+        with:
+          # deep clone, to allow use of git merge-base
+          fetch-depth: 0
+          submodules: recursive
+      - name: Calculate docker image tag
+        id: calculate-tag
+        run: |
+          DOCKER_TAG=$(git rev-parse HEAD:.circleci/docker)
+          echo "DOCKER_TAG=${DOCKER_TAG}" >> "${GITHUB_ENV}"
+          echo "DOCKER_IMAGE=${DOCKER_IMAGE_BASE}:${DOCKER_TAG}" >> "${GITHUB_ENV}"
+          echo "::set-output name=docker_tag::${DOCKER_TAG}"
+          echo "::set-output name=docker_image::${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"
+      - name: Check if image should be built
+        id: check
+        env:
+          BASE_REVISION: ${{ github.event.pull_request.base.sha || github.sha }}
+        run: |
+          set -x
+          # Check if image already exists, if it does then skip building it
+          if docker manifest inspect "${DOCKER_IMAGE_BASE}:${DOCKER_TAG}"; then
+            exit 0
+          fi
+          if [[ "$BASE_REVISION" = "$(git rev-parse HEAD)" ]]; then
+            # if we're on the base branch then use the parent commit
+            MERGE_BASE=$(git rev-parse HEAD~)
+          else
+            # otherwise we're on a PR, so use the most recent base commit
+            MERGE_BASE=$(git merge-base HEAD "$BASE_REVISION")
+          fi
+          # Covers the case where a previous tag doesn't exist for the tree
+          # this is only really applicable on trees that don't have `.circleci/docker` at its merge base, i.e. nightly
+          if ! git rev-parse "$MERGE_BASE:.circleci/docker"; then
+            echo "Directory '.circleci/docker' not found in commit $MERGE_BASE, you should probably rebase onto a more recent commit"
+            exit 1
+          fi
+          PREVIOUS_DOCKER_TAG=$(git rev-parse "$MERGE_BASE:.circleci/docker")
+          # If no image exists but the hash is the same as the previous hash then we should error out here
+          if [[ "${PREVIOUS_DOCKER_TAG}" = "${DOCKER_TAG}" ]]; then
+            echo "ERROR: Something has gone wrong and the previous image isn't available for the merge-base of your branch"
+            echo "       contact the PyTorch team to restore the original images"
+            exit 1
+          fi
+          echo ::set-output name=rebuild::yes
+      - name: Build and push docker image
+        if: ${{ steps.check.outputs.rebuild }}
+        env:
+          DOCKER_SKIP_S3_UPLOAD: 1
+        working-directory: .circleci/docker
+        run: |
+          export IMAGE_NAME=${DOCKER_IMAGE_BASE#308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/}
+          ./build_docker.sh
+      - name: Pull Docker image
+        run: |
+          retry () {
+              "$@"  || (sleep 1 && "$@") || (sleep 2 && "$@")
+          }
+          retry docker pull "${DOCKER_IMAGE}"
+      - name: Parse ref
+        id: parse-ref
+        run: .github/scripts/parse_ref.py
+      - name: Build
+        env:
+          CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
+        run: |
+          # detached container should get cleaned up by teardown_ec2_linux
+          container_name=$(docker run \
+            -e BUILD_ENVIRONMENT \
+            -e JOB_BASE_NAME \
+            -e MAX_JOBS="$(nproc --ignore=2)" \
+            -e AWS_DEFAULT_REGION \
+            -e IS_GHA \
+            -e CIRCLE_PR_NUMBER \
+            -e CIRCLE_SHA1 \
+            -e CIRCLE_BRANCH \
+            -e GITHUB_RUN_ID \
+            -e SCCACHE_BUCKET \
+            -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
+            -e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
+            -e SKIP_SCCACHE_INITIALIZATION=1 \
+            -e TORCH_CUDA_ARCH_LIST \
+            -e PR_LABELS \
+            -e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
+            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
+            --security-opt seccomp=unconfined \
+            --cap-add=SYS_PTRACE \
+            --tty \
+            --detach \
+            --user jenkins \
+            -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
+            -w /var/lib/jenkins/workspace \
+            "${DOCKER_IMAGE}"
+          )
+          docker exec -t "${container_name}" sh -c 'sudo chown -R jenkins . && .jenkins/pytorch/build.sh'
+      - name: Display and upload binary build size statistics (Click Me)
+        # temporary hack: set CIRCLE_* vars, until we update
+        # tools/stats/print_test_stats.py to natively support GitHub Actions
+        env:
+          SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
+          CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
+          CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
+          CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
+        run: |
+          COMMIT_TIME=$(git log --max-count=1 --format=%ct || echo 0)
+          export COMMIT_TIME
+          pip3 install requests==2.26 boto3==1.16.34
+          python3 -m tools.stats.upload_binary_size_to_scuba || exit 0
+      - name: Chown workspace
+        run: |
+          # Ensure the working directory gets chowned back to the current user
+          docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
+      - name: Archive artifacts into zip
+        run: |
+          zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .pytorch-test-times.json
+      - uses: seemethere/upload-artifact-s3@v3
+        name: Store PyTorch Build Artifacts on S3
+        with:
+          name: ${{ env.BUILD_ENVIRONMENT }}
+          retention-days: 14
+          if-no-files-found: error
+          path:
+            artifacts.zip
+      - name: Hold runner for 2 hours or until ssh sessions have drained
+        # Always hold for active ssh sessions
+        if: always()
+        run: .github/scripts/wait_for_ssh_to_drain.sh
+      - name: Chown workspace
+        if: always()
+        env:
+          ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
+        run: |
+          # Ensure the working directory gets chowned back to the current user
+          docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
+      - name: Kill containers, clean up images
+        if: always()
+        run: |
+          # ignore expansion of "docker ps -q" since it could be empty
+          # shellcheck disable=SC2046
+          docker stop $(docker ps -q) || true
+          # Prune all of the docker images
+          docker system prune -af
+      - name: Hold runner for 2 hours or until ssh sessions have drained
+        # Always hold for active ssh sessions
+        if: always()
+        run: .github/scripts/wait_for_ssh_to_drain.sh
+      - name: Clean up docker images
+        if: always()
+        run: |
+          # Prune all of the docker images
+          docker system prune -af
+
+  generate-test-matrix:
+    runs-on: ubuntu-18.04
+    needs: [ciflow_should_run]
+    env:
+      TEST_RUNNER_TYPE: linux.2xlarge
+      ENABLE_DISTRIBUTED_TEST: ''
+      ENABLE_JIT_LEGACY_TEST: ''
+      ENABLE_MULTIGPU_TEST: ''
+      ENABLE_NOGPU_NO_AVX_TEST: ''
+      ENABLE_NOGPU_NO_AVX2_TEST: ''
+      ENABLE_SLOW_TEST: ''
+      ENABLE_DOCS_TEST: ''
+      ENABLE_BACKWARDS_COMPAT_TEST: ''
+      ENABLE_XLA_TEST: ''
+      ENABLE_NOARCH_TEST: ''
+      NUM_TEST_SHARDS: 2
+      MULTIGPU_RUNNER_TYPE: linux.16xlarge.nvidia.gpu
+      NOGPU_RUNNER_TYPE: linux.2xlarge
+      PR_BODY: ${{ github.event.pull_request.body }}
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+      render-matrix: ${{ steps.set-matrix.outputs.render-matrix }}
+      ignore-disabled-issues: ${{ steps.set-matrix.outputs.ignore-disabled-issues }}
+    container:
+      image: python:3.9
+    steps:
+      - name: Install dependencies
+        run: pip install typing-extensions==3.10
+      - name: Clone pytorch/pytorch
+        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
+      - name: Generating test matrix
+        id: set-matrix
+        run: .github/scripts/generate_pytorch_test_matrix.py
+
+  test:
+    needs: [build, generate-test-matrix, ciflow_should_run]
+    strategy:
+      matrix: ${{ fromJson(needs.generate-test-matrix.outputs.matrix) }}
+      fail-fast: false
+    runs-on: ${{ matrix.runner }}
+    env:
+      DOCKER_IMAGE: ${{ needs.build.outputs.docker_image }}
+      JOB_BASE_NAME: linux-xenial-py3.6-clang7-onnx-test
+      TEST_CONFIG: ${{ matrix.config }}
+      SHARD_NUMBER: ${{ matrix.shard }}
+      NUM_TEST_SHARDS: ${{ matrix.num_shards }}
+      PYTORCH_IGNORE_DISABLED_ISSUES: ${{ needs.generate-test-matrix.outputs.ignore-disabled-issues }}
+    steps:
+      - name: Display EC2 information
+        shell: bash
+        run: |
+          set -euo pipefail
+          function get_ec2_metadata() {
+            # Pulled from instance metadata endpoint for EC2
+            # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
+            category=$1
+            curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
+          }
+          echo "ami-id: $(get_ec2_metadata ami-id)"
+          echo "instance-id: $(get_ec2_metadata instance-id)"
+          echo "instance-type: $(get_ec2_metadata instance-type)"
+      - name: Log in to ECR
+        env:
+          AWS_RETRY_MODE: standard
+          AWS_MAX_ATTEMPTS: 5
+        run: |
+          aws ecr get-login --no-include-email --region us-east-1 > /tmp/ecr-login.sh
+          bash /tmp/ecr-login.sh
+          rm /tmp/ecr-login.sh
+      - name: Chown workspace
+        env:
+          ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
+        run: |
+          retry () {
+              "$@"  || (sleep 1 && "$@") || (sleep 2 && "$@")
+          }
+          retry docker pull "${ALPINE_IMAGE}"
+          # Ensure the working directory gets chowned back to the current user
+          docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
+      - name: Clean workspace
+        run: |
+          rm -rf "${GITHUB_WORKSPACE:?}/*"
+          rm -f ~/.ssh/authorized_keys
+      - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
+        uses: seemethere/add-github-ssh-key@v1
+        with:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      - name: Preserve github env variables for use in docker
+        run: |
+          env | grep '^GITHUB' > "/tmp/github_env_${GITHUB_RUN_ID}"
+      - name: Checkout PyTorch
+        uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
+        with:
+          # deep clone, to allow use of git merge-base
+          fetch-depth: 0
+          submodules: recursive
+      - name: Pull Docker image
+        run: |
+          retry () {
+              "$@"  || (sleep 1 && "$@") || (sleep 2 && "$@")
+          }
+          retry docker pull "${DOCKER_IMAGE}"
+      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
+        if: ${{ contains(env.BUILD_ENVIRONMENT, 'cuda') && !contains(matrix.config, 'nogpu') }}
+        run: |
+          bash .github/scripts/install_nvidia_utils_linux.sh
+          echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
+      - name: Determine shm-size
+        run: |
+          shm_size="1g"
+          case "${BUILD_ENVIRONMENT}" in
+            *cuda*)
+              shm_size="2g"
+              ;;
+            *rocm*)
+              shm_size="8g"
+              ;;
+          esac
+          echo "SHM_SIZE=${shm_size}" >> "${GITHUB_ENV}"
+      - uses: seemethere/download-artifact-s3@0504774707cbc8603d7dca922e8026eb8bf3b47b
+        name: Download PyTorch Build Artifacts
+        with:
+          name: ${{ env.BUILD_ENVIRONMENT }}
+      - name: Unzip artifacts
+        run: |
+          unzip -o artifacts.zip
+      - name: Output disk space left
+        run: |
+          sudo df -H
+      - name: Parse ref
+        id: parse-ref
+        run: .github/scripts/parse_ref.py
+      - name: Test
+        env:
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
+        # Time out the test phase after 3.5 hours
+        timeout-minutes: 210
+        run: |
+          if [[ $TEST_CONFIG == 'multigpu' ]]; then
+            TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
+          elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
+            TEST_COMMAND=.jenkins/caffe2/test.sh
+          else
+            TEST_COMMAND=.jenkins/pytorch/test.sh
+          fi
+          # detached container should get cleaned up by teardown_ec2_linux
+          # TODO: Stop building test binaries as part of the build phase
+          # Used for GPU_FLAG since that doesn't play nice
+          # shellcheck disable=SC2086
+          container_name=$(docker run \
+            ${GPU_FLAG:-} \
+            -e BUILD_ENVIRONMENT \
+            -e PR_NUMBER \
+            -e CUSTOM_TEST_ARTIFACT_BUILD_DIR \
+            -e GITHUB_ACTIONS \
+            -e IN_CI \
+            -e IS_GHA \
+            -e CIRCLE_BRANCH \
+            -e CIRCLE_SHA1 \
+            -e CIRCLE_PR_NUMBER \
+            -e AWS_DEFAULT_REGION \
+            -e IN_WHEEL_TEST \
+            -e SHARD_NUMBER \
+            -e JOB_BASE_NAME \
+            -e TEST_CONFIG \
+            -e NUM_TEST_SHARDS \
+            -e PYTORCH_IGNORE_DISABLED_ISSUES \
+            -e PR_LABELS \
+            -e MAX_JOBS="$(nproc --ignore=2)" \
+            -e SCCACHE_BUCKET \
+            -e http_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e https_proxy="http://internal-tf-lb-20210727220640487900000002-835786077.us-east-1.elb.amazonaws.com:3128" -e no_proxy="localhost,127.0.0.1,github.com,amazonaws.com,s3.amazonaws.com,169.254.169.254,169.254.170.2,/var/run/docker.sock" \
+            -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
+            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
+            --ulimit stack=10485760:83886080 \
+            --security-opt seccomp=unconfined \
+            --cap-add=SYS_PTRACE \
+            --shm-size="${SHM_SIZE}" \
+            --tty \
+            --detach \
+            --name="${container_name}" \
+            --user jenkins \
+            -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
+            -w /var/lib/jenkins/workspace \
+            "${DOCKER_IMAGE}"
+          )
+          docker exec -t "${container_name}" sh -c "sudo chown -R jenkins . && pip install dist/*.whl && ${TEST_COMMAND}"
+      - name: Chown workspace
+        if: always()
+        run: |
+          # Ensure the working directory gets chowned back to the current user
+          docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
+      - name: Install render_test_results dependencies
+        if: always()
+        shell: bash
+        run: |
+          python3 -m pip install junitparser==2.1.1 rich==10.9.0
+      - name: "[[ Click me for rendered test results (useful for finding failing tests) ]]"
+        if: always()
+        shell: bash
+        # Encoding is weird on windows, just try to default to utf-8 if possible
+        env:
+          PYTHONIOENCODING: "utf-8"
+        run: |
+          python3 tools/render_junit.py test/
+      - name: Zip test reports for upload
+        if: always()
+        env:
+          FILE_SUFFIX: '${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}'
+        run: |
+          # Remove any previous test reports if they exist
+          rm -f test-reports-*.zip
+          zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml'
+      - uses: seemethere/upload-artifact-s3@v3
+        name: Store Test Reports on S3
+        if: always()
+        with:
+          retention-days: 14
+          if-no-files-found: error
+          path:
+            test-reports-*.zip
+      - name: Display and upload test statistics (Click Me)
+        if: always()
+        # temporary hack: set CIRCLE_* vars, until we update
+        # tools/stats/print_test_stats.py to natively support GitHub Actions
+        env:
+          AWS_DEFAULT_REGION: us-east-1
+          CIRCLE_BRANCH: ${{ steps.parse-ref.outputs.branch }}
+          JOB_BASE_NAME: linux-xenial-py3.6-clang7-onnx-test
+          CIRCLE_PR_NUMBER: ${{ github.event.pull_request.number }}
+          CIRCLE_SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
+          CIRCLE_TAG: ${{ steps.parse-ref.outputs.tag }}
+          CIRCLE_WORKFLOW_ID: '${{ github.run_id }}_${{ github.run_number }}'
+        shell: bash
+        run: |
+          python3 -m pip install -r requirements.txt
+          python3 -m pip install boto3==1.16.34
+          python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
+      - name: Hold runner for 2 hours or until ssh sessions have drained
+        # Always hold for active ssh sessions
+        if: always()
+        run: .github/scripts/wait_for_ssh_to_drain.sh
+      - name: Chown workspace
+        if: always()
+        env:
+          ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
+        run: |
+          # Ensure the working directory gets chowned back to the current user
+          docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
+      - name: Kill containers, clean up images
+        if: always()
+        run: |
+          # ignore expansion of "docker ps -q" since it could be empty
+          # shellcheck disable=SC2046
+          docker stop $(docker ps -q) || true
+          # Prune all of the docker images
+          docker system prune -af
--- a/.github/workflows/generated-linux-xenial-py3.6-gcc5.4.yml
+++ b/.github/workflows/generated-linux-xenial-py3.6-gcc5.4.yml
@ -396,6 +396,8 @@ jobs:
        run: |
          if [[ $TEST_CONFIG == 'multigpu' ]]; then
            TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
+          elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
+            TEST_COMMAND=.jenkins/caffe2/test.sh
          else
            TEST_COMMAND=.jenkins/pytorch/test.sh
          fi
--- a/.github/workflows/generated-parallelnative-linux-xenial-py3.6-gcc5.4.yml
+++ b/.github/workflows/generated-parallelnative-linux-xenial-py3.6-gcc5.4.yml
@ -396,6 +396,8 @@ jobs:
        run: |
          if [[ $TEST_CONFIG == 'multigpu' ]]; then
            TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
+          elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
+            TEST_COMMAND=.jenkins/caffe2/test.sh
          else
            TEST_COMMAND=.jenkins/pytorch/test.sh
          fi
--- a/.github/workflows/generated-periodic-linux-xenial-cuda11.1-py3.6-gcc7.yml
+++ b/.github/workflows/generated-periodic-linux-xenial-cuda11.1-py3.6-gcc7.yml
@ -394,6 +394,8 @@ jobs:
        run: |
          if [[ $TEST_CONFIG == 'multigpu' ]]; then
            TEST_COMMAND=.jenkins/pytorch/multigpu-test.sh
+          elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
+            TEST_COMMAND=.jenkins/caffe2/test.sh
          else
            TEST_COMMAND=.jenkins/pytorch/test.sh
          fi
--- a/.jenkins/caffe2/test.sh
+++ b/.jenkins/caffe2/test.sh
@ -3,6 +3,11 @@
 # shellcheck source=./common.sh
 source "$(dirname "${BASH_SOURCE[0]}")/common.sh"

+if [[ ${BUILD_ENVIRONMENT} == *onnx* ]]; then
+  pip install click mock tabulate networkx==2.0
+  pip -q install --user "file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx"
+fi
+
 # Skip tests in environments where they are not built/applicable
 if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
  echo 'Skipping tests'
@ -143,6 +148,9 @@ if [[ "$BUILD_ENVIRONMENT" == *py3* ]]; then
  done
 fi

+# Some Caffe2 tests fail when run using AVX512 ISA, see https://github.com/pytorch/pytorch/issues/66111
+export DNNL_MAX_CPU_ISA=AVX2
+
 pip install --user pytest-sugar
 "$PYTHON" \
  -m pytest \
--- a/scripts/onnx/test.sh
+++ b/scripts/onnx/test.sh
@ -87,4 +87,6 @@ if [[ "$BUILD_ENVIRONMENT" == *ort_test2* ]]; then
 fi

 # Our CI expects both coverage.xml and .coverage to be within test/
-mv .coverage test/.coverage
+if [ -d .coverage ]; then
+  mv .coverage test/.coverage
+fi