mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
test reporting (#29658)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/29658 This PR makes our test scripts output artifacts that CircleCI can understand. This has a few benefits: 1. We can actually see failed tests and their output in the job screen (instead of having to scroll through logs) 2. We can use the CircleCI test metadata API to track failed tests programmatically. it looks like this (old ui): https://circleci.com/gh/pytorch/pytorch/3546584?pipelines-ui-opt-out or this (new ui): https://app.circleci.com/jobs/github/pytorch/pytorch/3546584/tests Test Plan: Imported from OSS Differential Revision: D18597261 Pulled By: suo fbshipit-source-id: 07fc7d26bbb834e13cc4cc0e48178645ae6579f5
This commit is contained in:
committed by
Facebook Github Bot
parent
1dbc84ab6d
commit
4b0a6d299c
@ -409,12 +409,23 @@ jobs:
|
||||
else
|
||||
export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
|
||||
fi
|
||||
|
||||
retrieve_test_reports() {
|
||||
echo "retrieving test reports"
|
||||
docker cp $id:/var/lib/jenkins/workspace/test/test-reports ./ || echo 'No test reports found!'
|
||||
}
|
||||
trap "retrieve_test_reports" ERR
|
||||
|
||||
if [[ ${BUILD_ENVIRONMENT} == *"multigpu"* ]]; then
|
||||
export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "${PARALLEL_FLAGS}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/multigpu-test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
|
||||
else
|
||||
export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "${PARALLEL_FLAGS}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
|
||||
fi
|
||||
echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
|
||||
|
||||
retrieve_test_reports
|
||||
- store_test_results:
|
||||
path: test-reports
|
||||
caffe2_linux_build:
|
||||
<<: *caffe2_params
|
||||
machine:
|
||||
@ -1069,6 +1080,8 @@ jobs:
|
||||
|
||||
chmod a+x .jenkins/pytorch/macos-test.sh
|
||||
unbuffer .jenkins/pytorch/macos-test.sh 2>&1 | ts
|
||||
- store_test_results:
|
||||
path: test/test-reports
|
||||
|
||||
pytorch_macos_10_13_cuda9_2_cudnn7_py3_build:
|
||||
environment:
|
||||
@ -1299,7 +1312,7 @@ jobs:
|
||||
name: cert install
|
||||
no_output_timeout: "1h"
|
||||
command: |
|
||||
set -e
|
||||
set -e
|
||||
PROJ_ROOT=/Users/distiller/project
|
||||
cd ${PROJ_ROOT}/ios/TestApp
|
||||
# install fastlane
|
||||
@ -1360,7 +1373,7 @@ jobs:
|
||||
if ! [ -x "$(command -v xcodebuild)" ]; then
|
||||
echo 'Error: xcodebuild is not installed.'
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
echo ${IOS_DEV_TEAM_ID}
|
||||
ruby ${PROJ_ROOT}/scripts/xcode_build.rb -i ${PROJ_ROOT}/build_ios/install -x ${PROJ_ROOT}/ios/TestApp/TestApp.xcodeproj -p ${IOS_PLATFORM} -c ${PROFILE} -t ${IOS_DEV_TEAM_ID}
|
||||
if ! [ "$?" -eq "0" ]; then
|
||||
|
@ -151,6 +151,8 @@
|
||||
|
||||
chmod a+x .jenkins/pytorch/macos-test.sh
|
||||
unbuffer .jenkins/pytorch/macos-test.sh 2>&1 | ts
|
||||
- store_test_results:
|
||||
path: test/test-reports
|
||||
|
||||
pytorch_macos_10_13_cuda9_2_cudnn7_py3_build:
|
||||
environment:
|
||||
@ -381,7 +383,7 @@
|
||||
name: cert install
|
||||
no_output_timeout: "1h"
|
||||
command: |
|
||||
set -e
|
||||
set -e
|
||||
PROJ_ROOT=/Users/distiller/project
|
||||
cd ${PROJ_ROOT}/ios/TestApp
|
||||
# install fastlane
|
||||
@ -442,7 +444,7 @@
|
||||
if ! [ -x "$(command -v xcodebuild)" ]; then
|
||||
echo 'Error: xcodebuild is not installed.'
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
echo ${IOS_DEV_TEAM_ID}
|
||||
ruby ${PROJ_ROOT}/scripts/xcode_build.rb -i ${PROJ_ROOT}/build_ios/install -x ${PROJ_ROOT}/ios/TestApp/TestApp.xcodeproj -p ${IOS_PLATFORM} -c ${PROFILE} -t ${IOS_DEV_TEAM_ID}
|
||||
if ! [ "$?" -eq "0" ]; then
|
||||
|
@ -121,9 +121,20 @@ jobs:
|
||||
else
|
||||
export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
|
||||
fi
|
||||
|
||||
retrieve_test_reports() {
|
||||
echo "retrieving test reports"
|
||||
docker cp $id:/var/lib/jenkins/workspace/test/test-reports ./ || echo 'No test reports found!'
|
||||
}
|
||||
trap "retrieve_test_reports" ERR
|
||||
|
||||
if [[ ${BUILD_ENVIRONMENT} == *"multigpu"* ]]; then
|
||||
export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "${PARALLEL_FLAGS}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/multigpu-test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
|
||||
else
|
||||
export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "${PARALLEL_FLAGS}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
|
||||
fi
|
||||
echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
|
||||
|
||||
retrieve_test_reports
|
||||
- store_test_results:
|
||||
path: test-reports
|
||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -42,6 +42,7 @@ dropout_model.pt
|
||||
test/generated_type_hints_smoketest.py
|
||||
test/htmlcov
|
||||
test/cpp_extensions/install/
|
||||
test/test-reports/
|
||||
third_party/build/
|
||||
tools/shared/_utils_internal.py
|
||||
torch.egg-info/
|
||||
|
@ -6,6 +6,9 @@ source "$(dirname "${BASH_SOURCE[0]}")/macos-common.sh"
|
||||
conda install -y six
|
||||
pip install -q hypothesis "librosa>=0.6.2" psutil
|
||||
|
||||
# TODO move this to docker
|
||||
pip install unittest-xml-reporting
|
||||
|
||||
# faulthandler become built-in since 3.3
|
||||
if [[ ! $(python -c "import sys; print(int(sys.version_info >= (3, 3)))") == "1" ]]; then
|
||||
pip install -q faulthandler
|
||||
|
@ -10,8 +10,10 @@ COMPACT_JOB_NAME="${BUILD_ENVIRONMENT}"
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
|
||||
echo "Testing pytorch (distributed only)"
|
||||
|
||||
if [ -n "${IN_CIRCLECI}" ]; then
|
||||
# TODO move this to docker
|
||||
pip_install unittest-xml-reporting
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda9-* ]]; then
|
||||
# TODO: move this to Docker
|
||||
sudo apt-get update
|
||||
|
@ -12,6 +12,9 @@ source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
echo "Testing pytorch"
|
||||
|
||||
if [ -n "${IN_CIRCLECI}" ]; then
|
||||
# TODO move this to docker
|
||||
pip_install unittest-xml-reporting
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda9-* ]]; then
|
||||
# TODO: move this to Docker
|
||||
sudo apt-get -qq update
|
||||
|
@ -155,6 +155,13 @@ def repeat_test_for_types(dtypes):
|
||||
return call_helper
|
||||
return repeat_helper
|
||||
|
||||
# Environment variable `IS_PYTORCH_CI` is set in `.jenkins/common.sh`.
|
||||
IS_PYTORCH_CI = bool(os.environ.get('IS_PYTORCH_CI'))
|
||||
IN_CIRCLECI = bool(os.environ.get('IN_CIRCLECI'))
|
||||
TEST_REPORT_SOURCE_OVERRIDE = os.environ.get('TEST_REPORT_SOURCE_OVERRIDE')
|
||||
|
||||
PY3 = sys.version_info > (3, 0)
|
||||
PY34 = sys.version_info >= (3, 4)
|
||||
|
||||
def run_tests(argv=UNITTEST_ARGS):
|
||||
if TEST_IN_SUBPROCESS:
|
||||
@ -179,18 +186,32 @@ def run_tests(argv=UNITTEST_ARGS):
|
||||
assert len(failed_tests) == 0, "{} unit test(s) failed:\n\t{}".format(
|
||||
len(failed_tests), '\n\t'.join(failed_tests))
|
||||
else:
|
||||
unittest.main(argv=argv)
|
||||
if IN_CIRCLECI:
|
||||
# import here so that non-CI doesn't need xmlrunner installed
|
||||
import xmlrunner
|
||||
# allow users to override the test file location. We need this
|
||||
# because the distributed tests run the same test file multiple
|
||||
# times with different configurations.
|
||||
if TEST_REPORT_SOURCE_OVERRIDE is not None:
|
||||
test_source = TEST_REPORT_SOURCE_OVERRIDE
|
||||
else:
|
||||
test_source = 'python-unittest'
|
||||
|
||||
PY3 = sys.version_info > (3, 0)
|
||||
PY34 = sys.version_info >= (3, 4)
|
||||
test_report_path = os.path.join('test-reports', test_source)
|
||||
if PY3:
|
||||
os.makedirs(test_report_path, exist_ok=True)
|
||||
else:
|
||||
if not os.path.exists(test_report_path):
|
||||
os.makedirs(test_report_path)
|
||||
|
||||
unittest.main(argv=argv, testRunner=xmlrunner.XMLTestRunner(output=test_report_path))
|
||||
else:
|
||||
unittest.main(argv=argv)
|
||||
|
||||
IS_WINDOWS = sys.platform == "win32"
|
||||
IS_MACOS = sys.platform == "darwin"
|
||||
IS_PPC = platform.machine() == "ppc64le"
|
||||
|
||||
# Environment variable `IS_PYTORCH_CI` is set in `.jenkins/common.sh`.
|
||||
IS_PYTORCH_CI = bool(os.environ.get('IS_PYTORCH_CI', 0))
|
||||
|
||||
if IS_WINDOWS:
|
||||
@contextmanager
|
||||
def TemporaryFileName():
|
||||
|
@ -102,15 +102,18 @@ DISTRIBUTED_TESTS_CONFIG = {}
|
||||
if dist.is_available():
|
||||
if dist.is_mpi_available():
|
||||
DISTRIBUTED_TESTS_CONFIG['mpi'] = {
|
||||
'WORLD_SIZE': '3'
|
||||
'WORLD_SIZE': '3',
|
||||
'TEST_REPORT_SOURCE_OVERRIDE': 'dist-mpi'
|
||||
}
|
||||
if dist.is_nccl_available():
|
||||
DISTRIBUTED_TESTS_CONFIG['nccl'] = {
|
||||
'WORLD_SIZE': '2' if torch.cuda.device_count() == 2 else '3'
|
||||
'WORLD_SIZE': '2' if torch.cuda.device_count() == 2 else '3',
|
||||
'TEST_REPORT_SOURCE_OVERRIDE': 'dist-nccl'
|
||||
}
|
||||
if dist.is_gloo_available():
|
||||
DISTRIBUTED_TESTS_CONFIG['gloo'] = {
|
||||
'WORLD_SIZE': '2' if torch.cuda.device_count() == 2 else '3'
|
||||
'WORLD_SIZE': '2' if torch.cuda.device_count() == 2 else '3',
|
||||
'TEST_REPORT_SOURCE_OVERRIDE': 'dist-gloo'
|
||||
}
|
||||
|
||||
# https://stackoverflow.com/questions/2549939/get-signal-names-from-numbers-in-python
|
||||
|
@ -283,11 +283,11 @@ class TestConcatDataset(TestCase):
|
||||
# takes in dummy var so this can also be used as a `worker_init_fn`
|
||||
def set_faulthander_if_available(_=None):
|
||||
if HAS_FAULTHANDLER:
|
||||
faulthandler.enable()
|
||||
faulthandler.enable(sys.__stderr__)
|
||||
if not IS_WINDOWS:
|
||||
# windows does not have faulthandler.register
|
||||
# chain=False prevents the default behavior of killing the process
|
||||
faulthandler.register(signal.SIGUSR1, chain=False)
|
||||
faulthandler.register(signal.SIGUSR1, file=sys.__stderr__, chain=False)
|
||||
|
||||
|
||||
set_faulthander_if_available()
|
||||
|
Reference in New Issue
Block a user