Compare commits

...

3 Commits

3 changed files with 38 additions and 21 deletions

View File

@ -178,6 +178,8 @@ elif [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
export PYTORCH_TESTING_DEVICE_ONLY_FOR="xpu"
# setting PYTHON_TEST_EXTRA_OPTION
export PYTHON_TEST_EXTRA_OPTION="--xpu"
# use serial mode run unit tests for xpu
export PYTORCH_TEST_RUN_EVERYTHING_IN_SERIAL=1
fi
if [[ "$TEST_CONFIG" == *crossref* ]]; then

View File

@ -45,7 +45,7 @@ on:
since we are investigating the behaviour of the monitor script with different tests.
required: false
type: boolean
default: true
default: false
monitor-log-interval:
description: |
Set the interval for the monitor script to log utilization.
@ -167,6 +167,7 @@ jobs:
- name: Test
id: test
timeout-minutes: ${{ fromJson(steps.test-timeout.outputs.timeout) }}
env:
BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
PR_NUMBER: ${{ github.event.pull_request.number }}
@ -180,26 +181,25 @@ jobs:
JOB_NAME: ${{ steps.get-job-id.outputs.job-name }}
BRANCH: ${{ steps.parse-ref.outputs.branch }}
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
PYTORCH_RETRY_TEST_CASES: 1
PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
BASE_SHA: ${{ github.event.pull_request.base.sha || github.sha }}
TEST_CONFIG: ${{ matrix.config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
EXTRA_FLAGS: ${{ matrix.extra_flags || '' }}
OP_BENCHMARK_TESTS: ${{ matrix.op_benchmark_tests }}
REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }}
CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
TEST_SHOWLOCALS: ${{ steps.keep-going.outputs.ci-test-showlocals }}
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }}
NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
TEST_CONFIG: ${{ matrix.config }}
SHARD_NUMBER: ${{ matrix.shard }}
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }}
TD_DISTRIBUTED: ${{ steps.keep-going.outputs.ci-td-distributed }}
DOCKER_IMAGE: ${{ inputs.docker-image }}
XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
TESTS_TO_INCLUDE: ${{ inputs.tests-to-include }}
timeout-minutes: ${{ fromJson(steps.test-timeout.outputs.timeout) }}
run: |
# Fetch aws credential from IMDs
eval "$(python3 .github/scripts/get_aws_session_tokens.py)"
set -x
TEST_COMMAND=.ci/pytorch/test.sh
@ -219,12 +219,11 @@ jobs:
-e GITHUB_RUN_NUMBER \
-e GITHUB_RUN_ATTEMPT \
-e JOB_ID \
-e JOB_NAME \
-e BASE_SHA \
-e BRANCH \
-e SHA1 \
-e AWS_DEFAULT_REGION \
-e AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY \
-e AWS_SESSION_TOKEN \
-e IN_WHEEL_TEST \
-e SHARD_NUMBER \
-e TEST_CONFIG \
@ -237,6 +236,7 @@ jobs:
-e TEST_SHOWLOCALS \
-e NO_TEST_TIMEOUT \
-e NO_TD \
-e TD_DISTRIBUTED \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SCCACHE_BUCKET \
-e SCCACHE_REGION \
@ -266,6 +266,17 @@ jobs:
# jenkins user does not have write permission to mounted workspace; work-around by copying within container to jenkins home
docker exec -t "${container_name}" sh -c "cd .. && cp -R workspace pytorch && cd pytorch && pip install dist/*.whl && ${TEST_COMMAND}"
- name: Upload pytest cache if tests failed
uses: ./.github/actions/pytest-cache-upload
continue-on-error: true
if: failure() && steps.test.conclusion && steps.test.conclusion == 'failure' && inputs.build-environment != 'linux-s390x-binary-manywheel'
with:
cache_dir: .pytest_cache
shard: ${{ matrix.shard }}
sha: ${{ github.event.pull_request.head.sha || github.sha }}
test_config: ${{ matrix.config }}
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
- name: Save test results
if: always()
run: |

View File

@ -59,14 +59,18 @@ jobs:
runner: linux.c7i.12xlarge
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 8, runner: "linux.idc.xpu" },
{ config: "default", shard: 2, num_shards: 8, runner: "linux.idc.xpu" },
{ config: "default", shard: 3, num_shards: 8, runner: "linux.idc.xpu" },
{ config: "default", shard: 4, num_shards: 8, runner: "linux.idc.xpu" },
{ config: "default", shard: 5, num_shards: 8, runner: "linux.idc.xpu" },
{ config: "default", shard: 6, num_shards: 8, runner: "linux.idc.xpu" },
{ config: "default", shard: 7, num_shards: 8, runner: "linux.idc.xpu" },
{ config: "default", shard: 8, num_shards: 8, runner: "linux.idc.xpu" },
{ config: "default", shard: 1, num_shards: 12, runner: "linux.idc.xpu" },
{ config: "default", shard: 2, num_shards: 12, runner: "linux.idc.xpu" },
{ config: "default", shard: 3, num_shards: 12, runner: "linux.idc.xpu" },
{ config: "default", shard: 4, num_shards: 12, runner: "linux.idc.xpu" },
{ config: "default", shard: 5, num_shards: 12, runner: "linux.idc.xpu" },
{ config: "default", shard: 6, num_shards: 12, runner: "linux.idc.xpu" },
{ config: "default", shard: 7, num_shards: 12, runner: "linux.idc.xpu" },
{ config: "default", shard: 8, num_shards: 12, runner: "linux.idc.xpu" },
{ config: "default", shard: 9, num_shards: 12, runner: "linux.idc.xpu" },
{ config: "default", shard: 10, num_shards: 12, runner: "linux.idc.xpu" },
{ config: "default", shard: 11, num_shards: 12, runner: "linux.idc.xpu" },
{ config: "default", shard: 12, num_shards: 12, runner: "linux.idc.xpu" },
]}
secrets: inherit