diff --git a/.circleci/config.yml b/.circleci/config.yml
index 96fb2399bc7b..663c38159ff9 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -409,12 +409,23 @@ jobs:
           else
             export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
           fi
+
+          retrieve_test_reports() {
+            echo "retrieving test reports"
+            docker cp $id:/var/lib/jenkins/workspace/test/test-reports ./ || echo 'No test reports found!'
+          }
+          trap "retrieve_test_reports" ERR
+
           if [[ ${BUILD_ENVIRONMENT} == *"multigpu"* ]]; then
             export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "${PARALLEL_FLAGS}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/multigpu-test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
           else
             export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "${PARALLEL_FLAGS}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
           fi
           echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
+
+          retrieve_test_reports
+    - store_test_results:
+        path: test-reports
   caffe2_linux_build:
     <<: *caffe2_params
     machine:
@@ -1069,6 +1080,8 @@ jobs:
 
             chmod a+x .jenkins/pytorch/macos-test.sh
             unbuffer .jenkins/pytorch/macos-test.sh 2>&1 | ts
+      - store_test_results:
+          path: test/test-reports
 
   pytorch_macos_10_13_cuda9_2_cudnn7_py3_build:
     environment:
@@ -1299,7 +1312,7 @@ jobs:
           name: cert install
           no_output_timeout: "1h"
           command: |
-            set -e 
+            set -e
             PROJ_ROOT=/Users/distiller/project
             cd ${PROJ_ROOT}/ios/TestApp
             # install fastlane
@@ -1360,7 +1373,7 @@ jobs:
             if ! [ -x "$(command -v xcodebuild)" ]; then
               echo 'Error: xcodebuild is not installed.'
               exit 1
-            fi 
+            fi
             echo ${IOS_DEV_TEAM_ID}
             ruby ${PROJ_ROOT}/scripts/xcode_build.rb -i ${PROJ_ROOT}/build_ios/install -x ${PROJ_ROOT}/ios/TestApp/TestApp.xcodeproj -p ${IOS_PLATFORM} -c ${PROFILE} -t ${IOS_DEV_TEAM_ID}
             if ! [ "$?" -eq "0" ]; then
diff --git a/.circleci/verbatim-sources/job-specs-custom.yml b/.circleci/verbatim-sources/job-specs-custom.yml
index 8ab0ba8720ca..dc242ca87bbf 100644
--- a/.circleci/verbatim-sources/job-specs-custom.yml
+++ b/.circleci/verbatim-sources/job-specs-custom.yml
@@ -151,6 +151,8 @@
 
             chmod a+x .jenkins/pytorch/macos-test.sh
             unbuffer .jenkins/pytorch/macos-test.sh 2>&1 | ts
+      - store_test_results:
+          path: test/test-reports
 
   pytorch_macos_10_13_cuda9_2_cudnn7_py3_build:
     environment:
@@ -381,7 +383,7 @@
           name: cert install
           no_output_timeout: "1h"
           command: |
-            set -e 
+            set -e
             PROJ_ROOT=/Users/distiller/project
             cd ${PROJ_ROOT}/ios/TestApp
             # install fastlane
@@ -442,7 +444,7 @@
             if ! [ -x "$(command -v xcodebuild)" ]; then
               echo 'Error: xcodebuild is not installed.'
               exit 1
-            fi 
+            fi
             echo ${IOS_DEV_TEAM_ID}
             ruby ${PROJ_ROOT}/scripts/xcode_build.rb -i ${PROJ_ROOT}/build_ios/install -x ${PROJ_ROOT}/ios/TestApp/TestApp.xcodeproj -p ${IOS_PLATFORM} -c ${PROFILE} -t ${IOS_DEV_TEAM_ID}
             if ! [ "$?" -eq "0" ]; then
diff --git a/.circleci/verbatim-sources/pytorch-job-specs.yml b/.circleci/verbatim-sources/pytorch-job-specs.yml
index 6ba3fdd33301..11420b556fbc 100644
--- a/.circleci/verbatim-sources/pytorch-job-specs.yml
+++ b/.circleci/verbatim-sources/pytorch-job-specs.yml
@@ -121,9 +121,20 @@ jobs:
           else
             export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
           fi
+
+          retrieve_test_reports() {
+            echo "retrieving test reports"
+            docker cp $id:/var/lib/jenkins/workspace/test/test-reports ./ || echo 'No test reports found!'
+          }
+          trap "retrieve_test_reports" ERR
+
           if [[ ${BUILD_ENVIRONMENT} == *"multigpu"* ]]; then
             export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "${PARALLEL_FLAGS}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/multigpu-test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
           else
             export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "${PARALLEL_FLAGS}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
           fi
           echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
+
+          retrieve_test_reports
+    - store_test_results:
+        path: test-reports
diff --git a/.gitignore b/.gitignore
index 20d7c5993ca8..4892fe0138ca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,6 +42,7 @@ dropout_model.pt
 test/generated_type_hints_smoketest.py
 test/htmlcov
 test/cpp_extensions/install/
+test/test-reports/
 third_party/build/
 tools/shared/_utils_internal.py
 torch.egg-info/
diff --git a/.jenkins/pytorch/macos-test.sh b/.jenkins/pytorch/macos-test.sh
index 518ac11d7652..3df0c9a1bd41 100755
--- a/.jenkins/pytorch/macos-test.sh
+++ b/.jenkins/pytorch/macos-test.sh
@@ -6,6 +6,9 @@ source "$(dirname "${BASH_SOURCE[0]}")/macos-common.sh"
 conda install -y six
 pip install -q hypothesis "librosa>=0.6.2" psutil
 
+# TODO move this to docker
+pip install unittest-xml-reporting
+
 # faulthandler become built-in since 3.3
 if [[ ! $(python -c "import sys; print(int(sys.version_info >= (3, 3)))") == "1" ]]; then
   pip install -q faulthandler
diff --git a/.jenkins/pytorch/multigpu-test.sh b/.jenkins/pytorch/multigpu-test.sh
index 1b1604af24c6..74cd7730b6e2 100755
--- a/.jenkins/pytorch/multigpu-test.sh
+++ b/.jenkins/pytorch/multigpu-test.sh
@@ -10,8 +10,10 @@ COMPACT_JOB_NAME="${BUILD_ENVIRONMENT}"
 source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
 
 echo "Testing pytorch (distributed only)"
-
 if [ -n "${IN_CIRCLECI}" ]; then
+  # TODO move this to docker
+  pip_install unittest-xml-reporting
+
   if [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda9-* ]]; then
     # TODO: move this to Docker
     sudo apt-get update
diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh
index 8df55e36c464..27f4e4c843a5 100755
--- a/.jenkins/pytorch/test.sh
+++ b/.jenkins/pytorch/test.sh
@@ -12,6 +12,9 @@ source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
 echo "Testing pytorch"
 
 if [ -n "${IN_CIRCLECI}" ]; then
+  # TODO move this to docker
+  pip_install unittest-xml-reporting
+
   if [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda9-* ]]; then
     # TODO: move this to Docker
     sudo apt-get -qq update
diff --git a/test/common_utils.py b/test/common_utils.py
index c009ac31cf77..1defa27118ac 100644
--- a/test/common_utils.py
+++ b/test/common_utils.py
@@ -155,6 +155,13 @@ def repeat_test_for_types(dtypes):
         return call_helper
     return repeat_helper
 
+# Environment variable `IS_PYTORCH_CI` is set in `.jenkins/common.sh`.
+IS_PYTORCH_CI = bool(os.environ.get('IS_PYTORCH_CI'))
+IN_CIRCLECI = bool(os.environ.get('IN_CIRCLECI'))
+TEST_REPORT_SOURCE_OVERRIDE = os.environ.get('TEST_REPORT_SOURCE_OVERRIDE')
+
+PY3 = sys.version_info > (3, 0)
+PY34 = sys.version_info >= (3, 4)
 
 def run_tests(argv=UNITTEST_ARGS):
     if TEST_IN_SUBPROCESS:
@@ -179,18 +186,32 @@ def run_tests(argv=UNITTEST_ARGS):
         assert len(failed_tests) == 0, "{} unit test(s) failed:\n\t{}".format(
             len(failed_tests), '\n\t'.join(failed_tests))
     else:
-        unittest.main(argv=argv)
+        if IN_CIRCLECI:
+            # import here so that non-CI doesn't need xmlrunner installed
+            import xmlrunner
+            # allow users to override the test file location. We need this
+            # because the distributed tests run the same test file multiple
+            # times with different configurations.
+            if TEST_REPORT_SOURCE_OVERRIDE is not None:
+                test_source = TEST_REPORT_SOURCE_OVERRIDE
+            else:
+                test_source = 'python-unittest'
 
-PY3 = sys.version_info > (3, 0)
-PY34 = sys.version_info >= (3, 4)
+            test_report_path = os.path.join('test-reports', test_source)
+            if PY3:
+                os.makedirs(test_report_path, exist_ok=True)
+            else:
+                if not os.path.exists(test_report_path):
+                    os.makedirs(test_report_path)
+
+            unittest.main(argv=argv, testRunner=xmlrunner.XMLTestRunner(output=test_report_path))
+        else:
+            unittest.main(argv=argv)
 
 IS_WINDOWS = sys.platform == "win32"
 IS_MACOS = sys.platform == "darwin"
 IS_PPC = platform.machine() == "ppc64le"
 
-# Environment variable `IS_PYTORCH_CI` is set in `.jenkins/common.sh`.
-IS_PYTORCH_CI = bool(os.environ.get('IS_PYTORCH_CI', 0))
-
 if IS_WINDOWS:
     @contextmanager
     def TemporaryFileName():
diff --git a/test/run_test.py b/test/run_test.py
index 97485350e8a3..e06673ead151 100755
--- a/test/run_test.py
+++ b/test/run_test.py
@@ -102,15 +102,18 @@ DISTRIBUTED_TESTS_CONFIG = {}
 if dist.is_available():
     if dist.is_mpi_available():
         DISTRIBUTED_TESTS_CONFIG['mpi'] = {
-            'WORLD_SIZE': '3'
+            'WORLD_SIZE': '3',
+            'TEST_REPORT_SOURCE_OVERRIDE': 'dist-mpi'
         }
     if dist.is_nccl_available():
         DISTRIBUTED_TESTS_CONFIG['nccl'] = {
-            'WORLD_SIZE': '2' if torch.cuda.device_count() == 2 else '3'
+            'WORLD_SIZE': '2' if torch.cuda.device_count() == 2 else '3',
+            'TEST_REPORT_SOURCE_OVERRIDE': 'dist-nccl'
         }
     if dist.is_gloo_available():
         DISTRIBUTED_TESTS_CONFIG['gloo'] = {
-            'WORLD_SIZE': '2' if torch.cuda.device_count() == 2 else '3'
+            'WORLD_SIZE': '2' if torch.cuda.device_count() == 2 else '3',
+            'TEST_REPORT_SOURCE_OVERRIDE': 'dist-gloo'
         }
 
 # https://stackoverflow.com/questions/2549939/get-signal-names-from-numbers-in-python
diff --git a/test/test_dataloader.py b/test/test_dataloader.py
index 2843f8bf6767..6663a086f507 100644
--- a/test/test_dataloader.py
+++ b/test/test_dataloader.py
@@ -283,11 +283,11 @@ class TestConcatDataset(TestCase):
 # takes in dummy var so this can also be used as a `worker_init_fn`
 def set_faulthander_if_available(_=None):
     if HAS_FAULTHANDLER:
-        faulthandler.enable()
+        faulthandler.enable(sys.__stderr__)
         if not IS_WINDOWS:
             # windows does not have faulthandler.register
             # chain=False prevents the default behavior of killing the process
-            faulthandler.register(signal.SIGUSR1, chain=False)
+            faulthandler.register(signal.SIGUSR1, file=sys.__stderr__, chain=False)
 
 
 set_faulthander_if_available()