tc

2025-10-22 22:25:10 +08:00 · 2025-10-15 10:58:45 -07:00
2 changed files with 411 additions and 411 deletions
--- a/.ci/docker/common/install_cache.sh
+++ b/.ci/docker/common/install_cache.sh
@ -9,7 +9,7 @@ install_ubuntu() {
  # Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``
  apt-get install -y cargo
  echo "Checking out sccache repo"
-  git clone https://github.com/mozilla/sccache -b v0.10.0
+  git clone https://github.com/mozilla/sccache -b v0.11.0
  cd sccache
  echo "Building sccache"
  cargo build --release
--- a/.github/workflows/_linux-test.yml
+++ b/.github/workflows/_linux-test.yml
@ -107,457 +107,457 @@ jobs:
            All testing is done inside the container, to start an interactive session run:
              docker exec -it $(docker container ps --format '{{.ID}}') bash

-      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
-        with:
-          no-sudo: true
+      # - name: Checkout PyTorch
+      #   uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+      #   with:
+      #     no-sudo: true

-      - name: Setup Python
-        if: contains(matrix.runner, 'b200')
-        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
-        with:
-          python-version: '3.12'
-          cache: pip
+      # - name: Setup Python
+      #   if: contains(matrix.runner, 'b200')
+      #   uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+      #   with:
+      #     python-version: '3.12'
+      #     cache: pip

-      - name: Setup Linux
-        uses: ./.github/actions/setup-linux
-        if: inputs.build-environment != 'linux-s390x-binary-manywheel' && !contains(matrix.runner, 'b200')
+      # - name: Setup Linux
+      #   uses: ./.github/actions/setup-linux
+      #   if: inputs.build-environment != 'linux-s390x-binary-manywheel' && !contains(matrix.runner, 'b200')

-      - name: configure aws credentials
-        if: ${{ inputs.aws-role-to-assume != '' && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
-        uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
-        with:
-          role-to-assume: ${{ inputs.aws-role-to-assume }}
-          role-session-name: gha-linux-test
-          aws-region: us-east-1
+      # - name: configure aws credentials
+      #   if: ${{ inputs.aws-role-to-assume != '' && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
+      #   uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
+      #   with:
+      #     role-to-assume: ${{ inputs.aws-role-to-assume }}
+      #     role-session-name: gha-linux-test
+      #     aws-region: us-east-1

-      - name: Login to Amazon ECR
-        if: ${{ inputs.aws-role-to-assume != '' && contains(matrix.runner, 'b200') }}
-        id: login-ecr
-        continue-on-error: true
-        uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1
+      # - name: Login to Amazon ECR
+      #   if: ${{ inputs.aws-role-to-assume != '' && contains(matrix.runner, 'b200') }}
+      #   id: login-ecr
+      #   continue-on-error: true
+      #   uses: aws-actions/amazon-ecr-login@062b18b96a7aff071d4dc91bc00c4c1a7945b076 # v2.0.1

-      - name: Calculate docker image
-        id: calculate-docker-image
-        uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
-        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
-        with:
-          docker-image-name: ${{ inputs.docker-image }}
+      # - name: Calculate docker image
+      #   id: calculate-docker-image
+      #   uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
+      #   if: inputs.build-environment != 'linux-s390x-binary-manywheel'
+      #   with:
+      #     docker-image-name: ${{ inputs.docker-image }}

-      - name: Use following to pull public copy of the image
-        id: print-ghcr-mirror
-        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
-        env:
-          ECR_DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
-        shell: bash
-        run: |
-          tag=${ECR_DOCKER_IMAGE##*:}
-          echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}"
+      # - name: Use following to pull public copy of the image
+      #   id: print-ghcr-mirror
+      #   if: inputs.build-environment != 'linux-s390x-binary-manywheel'
+      #   env:
+      #     ECR_DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
+      #   shell: bash
+      #   run: |
+      #     tag=${ECR_DOCKER_IMAGE##*:}
+      #     echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}"

-      - name: Pull docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
-        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
-        with:
-          docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
+      # - name: Pull docker image
+      #   uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+      #   if: inputs.build-environment != 'linux-s390x-binary-manywheel'
+      #   with:
+      #     docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}

-      - name: Check if in a container runner
-        shell: bash
-        id: check_container_runner
-        run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
+      # - name: Check if in a container runner
+      #   shell: bash
+      #   id: check_container_runner
+      #   run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"

-      - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
-        id: install-nvidia-driver
-        uses: pytorch/test-infra/.github/actions/setup-nvidia@main
-        with:
-          driver-version: ${{ matrix.config == 'legacy_nvidia_driver' && '525.105.17' || '580.82.07' }}
-        if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' && !contains(matrix.runner, 'b200') }}
+      # - name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
+      #   id: install-nvidia-driver
+      #   uses: pytorch/test-infra/.github/actions/setup-nvidia@main
+      #   with:
+      #     driver-version: ${{ matrix.config == 'legacy_nvidia_driver' && '525.105.17' || '580.82.07' }}
+      #   if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' && !contains(matrix.runner, 'b200') }}

-      - name: Setup GPU_FLAG for docker run
-        id: setup-gpu-flag
-        run: echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
-        if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && (steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' || contains(matrix.runner, 'b200')) }}
+      # - name: Setup GPU_FLAG for docker run
+      #   id: setup-gpu-flag
+      #   run: echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
+      #   if: ${{ contains(inputs.build-environment, 'cuda') && !contains(matrix.config, 'nogpu') && (steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' || contains(matrix.runner, 'b200')) }}

-      - name: Setup SCCACHE_SERVER_PORT environment for docker run when on container
-        id: setup-sscache-port-flag
-        run: echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"
-        if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' && !contains(matrix.runner, 'b200') }}
+      # - name: Setup SCCACHE_SERVER_PORT environment for docker run when on container
+      #   id: setup-sscache-port-flag
+      #   run: echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"
+      #   if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'true' && !contains(matrix.runner, 'b200') }}

-      - name: Lock NVIDIA A100 40GB Frequency
-        run: |
-          sudo nvidia-smi -pm 1
-          sudo nvidia-smi -ac 1215,1410
-          nvidia-smi
-        if: ${{ contains(matrix.runner, 'a100') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
+      # - name: Lock NVIDIA A100 40GB Frequency
+      #   run: |
+      #     sudo nvidia-smi -pm 1
+      #     sudo nvidia-smi -ac 1215,1410
+      #     nvidia-smi
+      #   if: ${{ contains(matrix.runner, 'a100') && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}

-      - name: Get workflow job id
-        id: get-job-id
-        uses: ./.github/actions/get-workflow-job-id
-        if: always()
-        with:
-          github-token: ${{ secrets.GITHUB_TOKEN }}
+      # - name: Get workflow job id
+      #   id: get-job-id
+      #   uses: ./.github/actions/get-workflow-job-id
+      #   if: always()
+      #   with:
+      #     github-token: ${{ secrets.GITHUB_TOKEN }}

-      - name: Start monitoring script
-        id: monitor-script
-        if: ${{ !inputs.disable-monitor }}
-        shell: bash
-        continue-on-error: true
-        env:
-          JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
-          JOB_NAME: ${{ steps.get-job-id.outputs.job-name }}
-          WORKFLOW_NAME: ${{ github.workflow }}
-          WORKFLOW_RUN_ID: ${{github.run_id}}
-          MONITOR_LOG_INTERVAL: ${{ inputs.monitor-log-interval }}
-          MONITOR_DATA_COLLECT_INTERVAL: ${{ inputs.monitor-data-collect-interval }}
-        run: |
-          python3 -m pip install psutil==5.9.8 dataclasses_json==0.6.7 nvidia-ml-py==11.525.84
-          python3 -m tools.stats.monitor --log-interval "$MONITOR_LOG_INTERVAL" --data-collect-interval "$MONITOR_DATA_COLLECT_INTERVAL" > usage_log.txt 2>&1 &
-          echo "monitor-script-pid=${!}" >> "${GITHUB_OUTPUT}"
+      # - name: Start monitoring script
+      #   id: monitor-script
+      #   if: ${{ !inputs.disable-monitor }}
+      #   shell: bash
+      #   continue-on-error: true
+      #   env:
+      #     JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
+      #     JOB_NAME: ${{ steps.get-job-id.outputs.job-name }}
+      #     WORKFLOW_NAME: ${{ github.workflow }}
+      #     WORKFLOW_RUN_ID: ${{github.run_id}}
+      #     MONITOR_LOG_INTERVAL: ${{ inputs.monitor-log-interval }}
+      #     MONITOR_DATA_COLLECT_INTERVAL: ${{ inputs.monitor-data-collect-interval }}
+      #   run: |
+      #     python3 -m pip install psutil==5.9.8 dataclasses_json==0.6.7 nvidia-ml-py==11.525.84
+      #     python3 -m tools.stats.monitor --log-interval "$MONITOR_LOG_INTERVAL" --data-collect-interval "$MONITOR_DATA_COLLECT_INTERVAL" > usage_log.txt 2>&1 &
+      #     echo "monitor-script-pid=${!}" >> "${GITHUB_OUTPUT}"

-      - name: Download build artifacts
-        uses: ./.github/actions/download-build-artifacts
-        with:
-          name: ${{ inputs.build-environment }}
-          s3-bucket: ${{ inputs.s3-bucket }}
-          use-gha: ${{ inputs.use-gha }}
+      # - name: Download build artifacts
+      #   uses: ./.github/actions/download-build-artifacts
+      #   with:
+      #     name: ${{ inputs.build-environment }}
+      #     s3-bucket: ${{ inputs.s3-bucket }}
+      #     use-gha: ${{ inputs.use-gha }}

-      - name: Download TD artifacts
-        continue-on-error: true
-        uses: ./.github/actions/download-td-artifacts
+      # - name: Download TD artifacts
+      #   continue-on-error: true
+      #   uses: ./.github/actions/download-td-artifacts

-      - name: Parse ref
-        id: parse-ref
-        run: .github/scripts/parse_ref.py
+      # - name: Parse ref
+      #   id: parse-ref
+      #   run: .github/scripts/parse_ref.py

-      - name: Check for keep-going label and re-enabled test issues
-        # This uses the filter-test-configs action because it conveniently
-        # checks for labels and re-enabled test issues.  It does not actually do
-        # any filtering.  All filtering is done in the build step.
-        id: keep-going
-        uses: ./.github/actions/filter-test-configs
-        with:
-          github-token: ${{ secrets.GITHUB_TOKEN }}
-          test-matrix: ${{ inputs.test-matrix }}
-          job-name: ${{ steps.get-job-id.outputs.job-name }}
+      # - name: Check for keep-going label and re-enabled test issues
+      #   # This uses the filter-test-configs action because it conveniently
+      #   # checks for labels and re-enabled test issues.  It does not actually do
+      #   # any filtering.  All filtering is done in the build step.
+      #   id: keep-going
+      #   uses: ./.github/actions/filter-test-configs
+      #   with:
+      #     github-token: ${{ secrets.GITHUB_TOKEN }}
+      #     test-matrix: ${{ inputs.test-matrix }}
+      #     job-name: ${{ steps.get-job-id.outputs.job-name }}

-      - name: Set Test step time
-        id: test-timeout
-        shell: bash
-        env:
-          JOB_TIMEOUT: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
-        run: |
-          echo "timeout=$((JOB_TIMEOUT-30))" >> "${GITHUB_OUTPUT}"
+      # - name: Set Test step time
+      #   id: test-timeout
+      #   shell: bash
+      #   env:
+      #     JOB_TIMEOUT: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
+      #   run: |
+      #     echo "timeout=$((JOB_TIMEOUT-30))" >> "${GITHUB_OUTPUT}"

-      - name: Preserve github env variables for use in docker
-        shell: bash
-        run: |
-          env | grep '^GITHUB' >> "/tmp/github_env_${GITHUB_RUN_ID}"
-          env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"
+      # - name: Preserve github env variables for use in docker
+      #   shell: bash
+      #   run: |
+      #     env | grep '^GITHUB' >> "/tmp/github_env_${GITHUB_RUN_ID}"
+      #     env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"

-      - name: Test
-        id: test
-        timeout-minutes: ${{ fromJson(steps.test-timeout.outputs.timeout) }}
-        env:
-          BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          GITHUB_REPOSITORY: ${{ github.repository }}
-          GITHUB_WORKFLOW: ${{ github.workflow }}
-          GITHUB_JOB: ${{ github.job }}
-          GITHUB_RUN_ID: ${{ github.run_id }}
-          GITHUB_RUN_NUMBER: ${{ github.run_number }}
-          GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
-          JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
-          JOB_NAME: ${{ steps.get-job-id.outputs.job-name }}
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
-          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-          BASE_SHA: ${{ github.event.pull_request.base.sha || github.sha }}
-          TEST_CONFIG: ${{ matrix.config }}
-          SHARD_NUMBER: ${{ matrix.shard }}
-          NUM_TEST_SHARDS: ${{ matrix.num_shards }}
-          EXTRA_FLAGS: ${{ matrix.extra_flags || '' }}
-          OP_BENCHMARK_TESTS: ${{ matrix.op_benchmark_tests }}
-          REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }}
-          CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
-          VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
-          TEST_SHOWLOCALS: ${{ steps.keep-going.outputs.ci-test-showlocals }}
-          NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }}
-          NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
-          TD_DISTRIBUTED: ${{ steps.keep-going.outputs.ci-td-distributed }}
-          # Do not set SCCACHE_S3_KEY_PREFIX to share the cache between all build jobs
-          SCCACHE_BUCKET: ${{ !contains(matrix.runner, 'b200') && 'ossci-compiler-cache-circleci-v2' || '' }}
-          SCCACHE_REGION: ${{ !contains(matrix.runner, 'b200') && 'us-east-1' || '' }}
-          SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
-          DOCKER_IMAGE: ${{ inputs.docker-image }}
-          XLA_CUDA: ${{ contains(inputs.build-environment, 'xla') && '0' || '' }}
-          XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
-          PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
-          PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
-          DASHBOARD_TAG: ${{ inputs.dashboard-tag }}
-          VLLM_TEST_HUGGING_FACE_TOKEN: ${{ secrets.VLLM_TEST_HUGGING_FACE_TOKEN }}
-          HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-          SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
-          ARTIFACTS_FILE_SUFFIX: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }}
-        run: |
-          set -x
+      # - name: Test
+      #   id: test
+      #   timeout-minutes: ${{ fromJson(steps.test-timeout.outputs.timeout) }}
+      #   env:
+      #     BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
+      #     PR_NUMBER: ${{ github.event.pull_request.number }}
+      #     GITHUB_REPOSITORY: ${{ github.repository }}
+      #     GITHUB_WORKFLOW: ${{ github.workflow }}
+      #     GITHUB_JOB: ${{ github.job }}
+      #     GITHUB_RUN_ID: ${{ github.run_id }}
+      #     GITHUB_RUN_NUMBER: ${{ github.run_number }}
+      #     GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
+      #     JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
+      #     JOB_NAME: ${{ steps.get-job-id.outputs.job-name }}
+      #     BRANCH: ${{ steps.parse-ref.outputs.branch }}
+      #     SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
+      #     BASE_SHA: ${{ github.event.pull_request.base.sha || github.sha }}
+      #     TEST_CONFIG: ${{ matrix.config }}
+      #     SHARD_NUMBER: ${{ matrix.shard }}
+      #     NUM_TEST_SHARDS: ${{ matrix.num_shards }}
+      #     EXTRA_FLAGS: ${{ matrix.extra_flags || '' }}
+      #     OP_BENCHMARK_TESTS: ${{ matrix.op_benchmark_tests }}
+      #     REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }}
+      #     CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
+      #     VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
+      #     TEST_SHOWLOCALS: ${{ steps.keep-going.outputs.ci-test-showlocals }}
+      #     NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }}
+      #     NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
+      #     TD_DISTRIBUTED: ${{ steps.keep-going.outputs.ci-td-distributed }}
+      #     # Do not set SCCACHE_S3_KEY_PREFIX to share the cache between all build jobs
+      #     SCCACHE_BUCKET: ${{ !contains(matrix.runner, 'b200') && 'ossci-compiler-cache-circleci-v2' || '' }}
+      #     SCCACHE_REGION: ${{ !contains(matrix.runner, 'b200') && 'us-east-1' || '' }}
+      #     SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
+      #     DOCKER_IMAGE: ${{ inputs.docker-image }}
+      #     XLA_CUDA: ${{ contains(inputs.build-environment, 'xla') && '0' || '' }}
+      #     XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
+      #     PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
+      #     PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
+      #     DASHBOARD_TAG: ${{ inputs.dashboard-tag }}
+      #     VLLM_TEST_HUGGING_FACE_TOKEN: ${{ secrets.VLLM_TEST_HUGGING_FACE_TOKEN }}
+      #     HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+      #     SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }}
+      #     ARTIFACTS_FILE_SUFFIX: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }}
+      #   run: |
+      #     set -x

-          if [[ $TEST_CONFIG == 'multigpu' ]]; then
-            TEST_COMMAND=.ci/pytorch/multigpu-test.sh
-          elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
-            TEST_COMMAND=.ci/onnx/test.sh
-          else
-            TEST_COMMAND=.ci/pytorch/test.sh
-          fi
+      #     if [[ $TEST_CONFIG == 'multigpu' ]]; then
+      #       TEST_COMMAND=.ci/pytorch/multigpu-test.sh
+      #     elif [[ $BUILD_ENVIRONMENT == *onnx* ]]; then
+      #       TEST_COMMAND=.ci/onnx/test.sh
+      #     else
+      #       TEST_COMMAND=.ci/pytorch/test.sh
+      #     fi

-          # Leaving 1GB for the runner and other things
-          TOTAL_AVAILABLE_MEMORY_IN_GB=$(awk '/MemTotal/ { printf "%.3f \n", $2/1024/1024 - 1 }' /proc/meminfo)
-          # https://docs.docker.com/engine/containers/resource_constraints/#--memory-swap-details, the 3GB swap
-          # comes from https://github.com/pytorch/test-infra/pull/6058
-          TOTAL_MEMORY_WITH_SWAP=$(("${TOTAL_AVAILABLE_MEMORY_IN_GB%.*}" + 3))
+      #     # Leaving 1GB for the runner and other things
+      #     TOTAL_AVAILABLE_MEMORY_IN_GB=$(awk '/MemTotal/ { printf "%.3f \n", $2/1024/1024 - 1 }' /proc/meminfo)
+      #     # https://docs.docker.com/engine/containers/resource_constraints/#--memory-swap-details, the 3GB swap
+      #     # comes from https://github.com/pytorch/test-infra/pull/6058
+      #     TOTAL_MEMORY_WITH_SWAP=$(("${TOTAL_AVAILABLE_MEMORY_IN_GB%.*}" + 3))

-          if [[ ${BUILD_ENVIRONMENT} == *"s390x"* ]]; then
-            SHM_OPTS=
-            JENKINS_USER=
-            # ensure that docker container cleanly exits in 12 hours
-            # if for some reason cleanup action doesn't stop container
-            # when job is cancelled
-            DOCKER_SHELL_CMD="sleep 12h"
-          else
-            SHM_OPTS="--shm-size=${SHM_SIZE}"
-            JENKINS_USER="--user jenkins"
-            DOCKER_SHELL_CMD=
-          fi
+      #     if [[ ${BUILD_ENVIRONMENT} == *"s390x"* ]]; then
+      #       SHM_OPTS=
+      #       JENKINS_USER=
+      #       # ensure that docker container cleanly exits in 12 hours
+      #       # if for some reason cleanup action doesn't stop container
+      #       # when job is cancelled
+      #       DOCKER_SHELL_CMD="sleep 12h"
+      #     else
+      #       SHM_OPTS="--shm-size=${SHM_SIZE}"
+      #       JENKINS_USER="--user jenkins"
+      #       DOCKER_SHELL_CMD=
+      #     fi

-          # detached container should get cleaned up by teardown_ec2_linux
-          # TODO: Stop building test binaries as part of the build phase
-          # Used for GPU_FLAG, SHM_OPTS, JENKINS_USER and DOCKER_SHELL_CMD since that doesn't play nice
-          # shellcheck disable=SC2086,SC2090
-          container_name=$(docker run \
-            ${GPU_FLAG:-} \
-            ${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \
-            -e BUILD_ENVIRONMENT \
-            -e PR_NUMBER \
-            -e GITHUB_ACTIONS \
-            -e GITHUB_REPOSITORY \
-            -e GITHUB_WORKFLOW \
-            -e GITHUB_JOB \
-            -e GITHUB_RUN_ID \
-            -e GITHUB_RUN_NUMBER \
-            -e GITHUB_RUN_ATTEMPT \
-            -e JOB_ID \
-            -e JOB_NAME \
-            -e BASE_SHA \
-            -e BRANCH \
-            -e SHA1 \
-            -e AWS_DEFAULT_REGION \
-            -e IN_WHEEL_TEST \
-            -e SHARD_NUMBER \
-            -e TEST_CONFIG \
-            -e NUM_TEST_SHARDS \
-            -e REENABLED_ISSUES \
-            -e CONTINUE_THROUGH_ERROR \
-            -e VERBOSE_TEST_LOGS \
-            -e TEST_SHOWLOCALS \
-            -e NO_TEST_TIMEOUT \
-            -e NO_TD \
-            -e TD_DISTRIBUTED \
-            -e PR_LABELS \
-            -e MAX_JOBS="$(nproc --ignore=2)" \
-            -e SCCACHE_BUCKET \
-            -e SCCACHE_REGION \
-            -e XLA_CUDA \
-            -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
-            -e PYTORCH_TEST_CUDA_MEM_LEAK_CHECK \
-            -e PYTORCH_TEST_RERUN_DISABLED_TESTS \
-            -e SKIP_SCCACHE_INITIALIZATION=1 \
-            -e HUGGING_FACE_HUB_TOKEN \
-            -e VLLM_TEST_HUGGING_FACE_TOKEN \
-            -e SCRIBE_GRAPHQL_ACCESS_TOKEN \
-            -e DASHBOARD_TAG \
-            -e ARTIFACTS_FILE_SUFFIX \
-            --memory="${TOTAL_AVAILABLE_MEMORY_IN_GB%.*}g" \
-            --memory-swap="${TOTAL_MEMORY_WITH_SWAP}g" \
-            --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
-            --security-opt seccomp=unconfined \
-            --cap-add=SYS_PTRACE \
-            --ipc=host \
-            ${SHM_OPTS} \
-            --tty \
-            --detach \
-            --name="${container_name}" \
-            ${JENKINS_USER} \
-            -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-            -w /var/lib/jenkins/workspace \
-            "${DOCKER_IMAGE}" \
-            ${DOCKER_SHELL_CMD}
-          )
-          echo "DOCKER_CONTAINER_ID=${container_name}" >> "${GITHUB_ENV}"
+      #     # detached container should get cleaned up by teardown_ec2_linux
+      #     # TODO: Stop building test binaries as part of the build phase
+      #     # Used for GPU_FLAG, SHM_OPTS, JENKINS_USER and DOCKER_SHELL_CMD since that doesn't play nice
+      #     # shellcheck disable=SC2086,SC2090
+      #     container_name=$(docker run \
+      #       ${GPU_FLAG:-} \
+      #       ${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \
+      #       -e BUILD_ENVIRONMENT \
+      #       -e PR_NUMBER \
+      #       -e GITHUB_ACTIONS \
+      #       -e GITHUB_REPOSITORY \
+      #       -e GITHUB_WORKFLOW \
+      #       -e GITHUB_JOB \
+      #       -e GITHUB_RUN_ID \
+      #       -e GITHUB_RUN_NUMBER \
+      #       -e GITHUB_RUN_ATTEMPT \
+      #       -e JOB_ID \
+      #       -e JOB_NAME \
+      #       -e BASE_SHA \
+      #       -e BRANCH \
+      #       -e SHA1 \
+      #       -e AWS_DEFAULT_REGION \
+      #       -e IN_WHEEL_TEST \
+      #       -e SHARD_NUMBER \
+      #       -e TEST_CONFIG \
+      #       -e NUM_TEST_SHARDS \
+      #       -e REENABLED_ISSUES \
+      #       -e CONTINUE_THROUGH_ERROR \
+      #       -e VERBOSE_TEST_LOGS \
+      #       -e TEST_SHOWLOCALS \
+      #       -e NO_TEST_TIMEOUT \
+      #       -e NO_TD \
+      #       -e TD_DISTRIBUTED \
+      #       -e PR_LABELS \
+      #       -e MAX_JOBS="$(nproc --ignore=2)" \
+      #       -e SCCACHE_BUCKET \
+      #       -e SCCACHE_REGION \
+      #       -e XLA_CUDA \
+      #       -e XLA_CLANG_CACHE_S3_BUCKET_NAME \
+      #       -e PYTORCH_TEST_CUDA_MEM_LEAK_CHECK \
+      #       -e PYTORCH_TEST_RERUN_DISABLED_TESTS \
+      #       -e SKIP_SCCACHE_INITIALIZATION=1 \
+      #       -e HUGGING_FACE_HUB_TOKEN \
+      #       -e VLLM_TEST_HUGGING_FACE_TOKEN \
+      #       -e SCRIBE_GRAPHQL_ACCESS_TOKEN \
+      #       -e DASHBOARD_TAG \
+      #       -e ARTIFACTS_FILE_SUFFIX \
+      #       --memory="${TOTAL_AVAILABLE_MEMORY_IN_GB%.*}g" \
+      #       --memory-swap="${TOTAL_MEMORY_WITH_SWAP}g" \
+      #       --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
+      #       --security-opt seccomp=unconfined \
+      #       --cap-add=SYS_PTRACE \
+      #       --ipc=host \
+      #       ${SHM_OPTS} \
+      #       --tty \
+      #       --detach \
+      #       --name="${container_name}" \
+      #       ${JENKINS_USER} \
+      #       -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
+      #       -w /var/lib/jenkins/workspace \
+      #       "${DOCKER_IMAGE}" \
+      #       ${DOCKER_SHELL_CMD}
+      #     )
+      #     echo "DOCKER_CONTAINER_ID=${container_name}" >> "${GITHUB_ENV}"

-          if [[ ${BUILD_ENVIRONMENT} == *"s390x"* ]]; then
-            docker exec -t "${container_name}" sh -c "python3 -m pip install -r .ci/docker/requirements-ci.txt"
-          fi
+      #     if [[ ${BUILD_ENVIRONMENT} == *"s390x"* ]]; then
+      #       docker exec -t "${container_name}" sh -c "python3 -m pip install -r .ci/docker/requirements-ci.txt"
+      #     fi

-          docker exec -t "${container_name}" sh -c "python3 -m pip install $(echo dist/*.whl)[opt-einsum] && ${TEST_COMMAND}"
+      #     docker exec -t "${container_name}" sh -c "python3 -m pip install $(echo dist/*.whl)[opt-einsum] && ${TEST_COMMAND}"

-      - name: Upload pytest cache if tests failed
-        uses: ./.github/actions/pytest-cache-upload
-        continue-on-error: true
-        if: failure() && steps.test.conclusion && steps.test.conclusion == 'failure' && inputs.build-environment != 'linux-s390x-binary-manywheel'
-        with:
-          cache_dir: .pytest_cache
-          shard: ${{ matrix.shard }}
-          sha: ${{ github.event.pull_request.head.sha || github.sha }}
-          test_config: ${{ matrix.config }}
-          job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
+      # - name: Upload pytest cache if tests failed
+      #   uses: ./.github/actions/pytest-cache-upload
+      #   continue-on-error: true
+      #   if: failure() && steps.test.conclusion && steps.test.conclusion == 'failure' && inputs.build-environment != 'linux-s390x-binary-manywheel'
+      #   with:
+      #     cache_dir: .pytest_cache
+      #     shard: ${{ matrix.shard }}
+      #     sha: ${{ github.event.pull_request.head.sha || github.sha }}
+      #     test_config: ${{ matrix.config }}
+      #     job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}

-      - name: Authenticate with AWS
-        if: ${{ always() && contains(matrix.runner, 'b200') }}
-        uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
-        with:
-          role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
-          # The max duration enforced by the server side
-          role-duration-seconds: 18000
-          aws-region: us-east-1
+      # - name: Authenticate with AWS
+      #   if: ${{ always() && contains(matrix.runner, 'b200') }}
+      #   uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
+      #   with:
+      #     role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
+      #     # The max duration enforced by the server side
+      #     role-duration-seconds: 18000
+      #     aws-region: us-east-1

-      - name: Upload the benchmark results
-        uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
-        if: inputs.build-environment != 'linux-s390x-binary-manywheel'
-        with:
-          benchmark-results-dir: test/test-reports
-          dry-run: false
-          schema-version: v3
-          github-token: ${{ secrets.GITHUB_TOKEN }}
+      # - name: Upload the benchmark results
+      #   uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
+      #   if: inputs.build-environment != 'linux-s390x-binary-manywheel'
+      #   with:
+      #     benchmark-results-dir: test/test-reports
+      #     dry-run: false
+      #     schema-version: v3
+      #     github-token: ${{ secrets.GITHUB_TOKEN }}

-      - name: Print remaining test logs
-        shell: bash
-        if: always() && steps.test.conclusion
-        run: |
-          cat test/**/*_toprint.log || true
+      # - name: Print remaining test logs
+      #   shell: bash
+      #   if: always() && steps.test.conclusion
+      #   run: |
+      #     cat test/**/*_toprint.log || true

-      - name: Stop monitoring script
-        if: ${{ always() && steps.monitor-script.outputs.monitor-script-pid }}
-        shell: bash
-        continue-on-error: true
-        env:
-          MONITOR_SCRIPT_PID: ${{ steps.monitor-script.outputs.monitor-script-pid }}
-        run: |
-          kill "$MONITOR_SCRIPT_PID"
+      # - name: Stop monitoring script
+      #   if: ${{ always() && steps.monitor-script.outputs.monitor-script-pid }}
+      #   shell: bash
+      #   continue-on-error: true
+      #   env:
+      #     MONITOR_SCRIPT_PID: ${{ steps.monitor-script.outputs.monitor-script-pid }}
+      #   run: |
+      #     kill "$MONITOR_SCRIPT_PID"

-      - name: Upload test artifacts
-        uses: ./.github/actions/upload-test-artifacts
-        if: always() && steps.test.conclusion && steps.test.conclusion != 'skipped'
-        with:
-          file-suffix: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }}
-          use-gha: ${{ inputs.use-gha }}
-          s3-bucket: ${{ inputs.s3-bucket }}
+      # - name: Upload test artifacts
+      #   uses: ./.github/actions/upload-test-artifacts
+      #   if: always() && steps.test.conclusion && steps.test.conclusion != 'skipped'
+      #   with:
+      #     file-suffix: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }}
+      #     use-gha: ${{ inputs.use-gha }}
+      #     s3-bucket: ${{ inputs.s3-bucket }}

-      - name: Collect backtraces from coredumps (if any)
-        if: always()
-        run: |
-          # shellcheck disable=SC2156
-          find . -iname "core.[1-9]*" -exec docker exec "${DOCKER_CONTAINER_ID}" sh -c "gdb python {} -ex 'bt' -ex 'q'" \;
+      # - name: Collect backtraces from coredumps (if any)
+      #   if: always()
+      #   run: |
+      #     # shellcheck disable=SC2156
+      #     find . -iname "core.[1-9]*" -exec docker exec "${DOCKER_CONTAINER_ID}" sh -c "gdb python {} -ex 'bt' -ex 'q'" \;

-      - name: Store Core dumps on S3
-        uses: seemethere/upload-artifact-s3@baba72d0712b404f646cebe0730933554ebce96a # v5.1.0
-        if: failure()
-        with:
-          name: coredumps-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}
-          retention-days: 14
-          if-no-files-found: ignore
-          path: ./**/core.[1-9]*
+      # - name: Store Core dumps on S3
+      #   uses: seemethere/upload-artifact-s3@baba72d0712b404f646cebe0730933554ebce96a # v5.1.0
+      #   if: failure()
+      #   with:
+      #     name: coredumps-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}
+      #     retention-days: 14
+      #     if-no-files-found: ignore
+      #     path: ./**/core.[1-9]*

-      - name: Upload utilization stats
-        if: ${{ always() && steps.test.conclusion && steps.test.conclusion != 'skipped' && !inputs.disable-monitor && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
-        continue-on-error: true
-        uses: ./.github/actions/upload-utilization-stats
-        with:
-          job_id: ${{ steps.get-job-id.outputs.job-id }}
-          job_name: ${{ steps.get-job-id.outputs.job-name }}
-          workflow_name: ${{ github.workflow }}
-          workflow_run_id: ${{github.run_id}}
-          workflow_attempt: ${{github.run_attempt}}
+      # - name: Upload utilization stats
+      #   if: ${{ always() && steps.test.conclusion && steps.test.conclusion != 'skipped' && !inputs.disable-monitor && inputs.build-environment != 'linux-s390x-binary-manywheel' }}
+      #   continue-on-error: true
+      #   uses: ./.github/actions/upload-utilization-stats
+      #   with:
+      #     job_id: ${{ steps.get-job-id.outputs.job-id }}
+      #     job_name: ${{ steps.get-job-id.outputs.job-name }}
+      #     workflow_name: ${{ github.workflow }}
+      #     workflow_run_id: ${{github.run_id}}
+      #     workflow_attempt: ${{github.run_attempt}}

-      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@main
-        if: always() && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false'
+      # - name: Teardown Linux
+      #   uses: pytorch/test-infra/.github/actions/teardown-linux@main
+      #   if: always() && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false'

-      # NB: We are currently having an intermittent GPU-related issue on G5 runners with
-      # A10G GPU. Once this happens, trying to reset the GPU as done in setup-nvidia does
-      # not seem to help. Here are some symptoms:
-      #   * Calling nvidia-smi timeouts after 60 second
-      #   * Fail to run nvidia-smi with an unable to determine the device handle for GPU
-      #     unknown error
-      #   * Test fails with a missing CUDA GPU error when initializing CUDA in PyTorch
-      #   * Run docker --gpus all fails with error response from daemon
-      #
-      # As both the root cause and recovery path are unclear, let's take the runner out of
-      # service so that it doesn't get any more jobs
-      - name: Check NVIDIA driver installation step
-        if: failure() && steps.install-nvidia-driver.outcome && steps.install-nvidia-driver.outcome != 'skipped'
-        shell: bash
-        run: |
-          set +e
-          set -x
+      # # NB: We are currently having an intermittent GPU-related issue on G5 runners with
+      # # A10G GPU. Once this happens, trying to reset the GPU as done in setup-nvidia does
+      # # not seem to help. Here are some symptoms:
+      # #   * Calling nvidia-smi timeouts after 60 second
+      # #   * Fail to run nvidia-smi with an unable to determine the device handle for GPU
+      # #     unknown error
+      # #   * Test fails with a missing CUDA GPU error when initializing CUDA in PyTorch
+      # #   * Run docker --gpus all fails with error response from daemon
+      # #
+      # # As both the root cause and recovery path are unclear, let's take the runner out of
+      # # service so that it doesn't get any more jobs
+      # - name: Check NVIDIA driver installation step
+      #   if: failure() && steps.install-nvidia-driver.outcome && steps.install-nvidia-driver.outcome != 'skipped'
+      #   shell: bash
+      #   run: |
+      #     set +e
+      #     set -x

-          nvidia-smi
-          # NB: Surprisingly, nvidia-smi command returns successfully with return code 0 even in
-          # the case where the driver has already crashed as it still can get the driver version
-          # and some basic information like the bus ID.  However, the rest of the information
-          # would be missing (ERR!), for example:
-          #
-          # +-----------------------------------------------------------------------------+
-          # | NVIDIA-SMI 525.89.02    Driver Version: 525.89.02    CUDA Version: 12.0     |
-          # |-------------------------------+----------------------+----------------------+
-          # | GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
-          # | Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
-          # |                               |                      |               MIG M. |
-          # |===============================+======================+======================|
-          # |   0  ERR!                Off  | 00000000:00:1E.0 Off |                 ERR! |
-          # |ERR!  ERR! ERR!    ERR! / ERR! |   4184MiB / 23028MiB |    ERR!      Default |
-          # |                               |                      |                 ERR! |
-          # +-------------------------------+----------------------+----------------------+
-          #
-          # +-----------------------------------------------------------------------------+
-          # | Processes:                                                                  |
-          # |  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
-          # |        ID   ID                                                   Usage      |
-          # |=============================================================================|
-          # +-----------------------------------------------------------------------------+
-          #
-          # This should be reported as a failure instead as it will guarantee to fail when
-          # Docker tries to run with --gpus all
-          #
-          # So, the correct check here is to query one of the missing piece of info like
-          # GPU name, so that the command can fail accordingly
-          nvidia-smi --query-gpu=gpu_name --format=csv,noheader --id=0
-          NVIDIA_SMI_STATUS=$?
+      #     nvidia-smi
+      #     # NB: Surprisingly, nvidia-smi command returns successfully with return code 0 even in
+      #     # the case where the driver has already crashed as it still can get the driver version
+      #     # and some basic information like the bus ID.  However, the rest of the information
+      #     # would be missing (ERR!), for example:
+      #     #
+      #     # +-----------------------------------------------------------------------------+
+      #     # | NVIDIA-SMI 525.89.02    Driver Version: 525.89.02    CUDA Version: 12.0     |
+      #     # |-------------------------------+----------------------+----------------------+
+      #     # | GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
+      #     # | Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
+      #     # |                               |                      |               MIG M. |
+      #     # |===============================+======================+======================|
+      #     # |   0  ERR!                Off  | 00000000:00:1E.0 Off |                 ERR! |
+      #     # |ERR!  ERR! ERR!    ERR! / ERR! |   4184MiB / 23028MiB |    ERR!      Default |
+      #     # |                               |                      |                 ERR! |
+      #     # +-------------------------------+----------------------+----------------------+
+      #     #
+      #     # +-----------------------------------------------------------------------------+
+      #     # | Processes:                                                                  |
+      #     # |  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
+      #     # |        ID   ID                                                   Usage      |
+      #     # |=============================================================================|
+      #     # +-----------------------------------------------------------------------------+
+      #     #
+      #     # This should be reported as a failure instead as it will guarantee to fail when
+      #     # Docker tries to run with --gpus all
+      #     #
+      #     # So, the correct check here is to query one of the missing piece of info like
+      #     # GPU name, so that the command can fail accordingly
+      #     nvidia-smi --query-gpu=gpu_name --format=csv,noheader --id=0
+      #     NVIDIA_SMI_STATUS=$?

-          # These are acceptable return code from nvidia-smi as copied from setup-nvidia GitHub action
-          if [ "$NVIDIA_SMI_STATUS" -ne 0 ] && [ "$NVIDIA_SMI_STATUS" -ne 14 ]; then
-            echo "NVIDIA driver installation has failed, shutting down the runner..."
-            .github/scripts/stop_runner_service.sh
-          fi
+      #     # These are acceptable return code from nvidia-smi as copied from setup-nvidia GitHub action
+      #     if [ "$NVIDIA_SMI_STATUS" -ne 0 ] && [ "$NVIDIA_SMI_STATUS" -ne 14 ]; then
+      #       echo "NVIDIA driver installation has failed, shutting down the runner..."
+      #       .github/scripts/stop_runner_service.sh
+      #     fi

-          # For runner with multiple GPUs, we also want to confirm that the number of GPUs are the
-          # power of 2, i.e. 1, 2, 4, or 8. This is to avoid flaky test issue when one GPU fails
-          # https://github.com/pytorch/test-infra/issues/4000
-          GPU_COUNT=$(nvidia-smi --list-gpus | wc -l)
-          NVIDIA_SMI_STATUS=$?
+      #     # For runner with multiple GPUs, we also want to confirm that the number of GPUs are the
+      #     # power of 2, i.e. 1, 2, 4, or 8. This is to avoid flaky test issue when one GPU fails
+      #     # https://github.com/pytorch/test-infra/issues/4000
+      #     GPU_COUNT=$(nvidia-smi --list-gpus | wc -l)
+      #     NVIDIA_SMI_STATUS=$?

-          # These are acceptable return code from nvidia-smi as copied from setup-nvidia GitHub action
-          if [ "$NVIDIA_SMI_STATUS" -ne 0 ] && [ "$NVIDIA_SMI_STATUS" -ne 14 ]; then
-            echo "NVIDIA driver installation has failed, shutting down the runner..."
-            .github/scripts/stop_runner_service.sh
-          fi
+      #     # These are acceptable return code from nvidia-smi as copied from setup-nvidia GitHub action
+      #     if [ "$NVIDIA_SMI_STATUS" -ne 0 ] && [ "$NVIDIA_SMI_STATUS" -ne 14 ]; then
+      #       echo "NVIDIA driver installation has failed, shutting down the runner..."
+      #       .github/scripts/stop_runner_service.sh
+      #     fi

-          # Check the GPU count to be a power of 2
-          if [ "$GPU_COUNT" -le 8 ] && [ "$GPU_COUNT" -ne 1 ] && [ "$GPU_COUNT" -ne 2 ] && [ "$GPU_COUNT" -ne 4 ] && [ "$GPU_COUNT" -ne 8 ]; then
-            echo "NVIDIA driver detects $GPU_COUNT GPUs. The runner has a broken GPU, shutting it down..."
-            .github/scripts/stop_runner_service.sh
-          fi
+      #     # Check the GPU count to be a power of 2
+      #     if [ "$GPU_COUNT" -le 8 ] && [ "$GPU_COUNT" -ne 1 ] && [ "$GPU_COUNT" -ne 2 ] && [ "$GPU_COUNT" -ne 4 ] && [ "$GPU_COUNT" -ne 8 ]; then
+      #       echo "NVIDIA driver detects $GPU_COUNT GPUs. The runner has a broken GPU, shutting it down..."
+      #       .github/scripts/stop_runner_service.sh
+      #     fi

-      - name: Cleanup docker
-        if: always() && inputs.build-environment == 'linux-s390x-binary-manywheel'
-        shell: bash
-        run: |
-          # on s390x stop the container for clean worker stop
-          docker stop -a || true
-          docker kill -a || true
+      # - name: Cleanup docker
+      #   if: always() && inputs.build-environment == 'linux-s390x-binary-manywheel'
+      #   shell: bash
+      #   run: |
+      #     # on s390x stop the container for clean worker stop
+      #     docker stop -a || true
+      #     docker kill -a || true