Remove print_test_stats, test_history, s3_stat_parser (#92841)

Pritam Damania no longer uses it (and is no longer with FB), and I don't know who else has interest in this Pull Request resolved: https://github.com/pytorch/pytorch/pull/92841 Approved by: https://github.com/malfet, https://github.com/huydhn, https://github.com/ZainRizvi, https://github.com/seemethere
2025-10-20 21:14:14 +08:00 · 2023-01-27 18:11:40 +00:00
parent 975feb606e
commit 27ab1dfc28
16 changed files with 5 additions and 2625 deletions
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@ -480,7 +480,7 @@ test_libtorch() {

    # Make test_reports directory
    # NB: the ending test_libtorch must match the current function name for the current
-    # test reporting process (in print_test_stats.py) to function as expected.
+    # test reporting process to function as expected.
    TEST_REPORTS_DIR=test/test-reports/cpp-unittest/test_libtorch
    mkdir -p $TEST_REPORTS_DIR

@ -528,7 +528,7 @@ test_aot_compilation() {

  # Make test_reports directory
  # NB: the ending test_libtorch must match the current function name for the current
-  # test reporting process (in print_test_stats.py) to function as expected.
+  # test reporting process to function as expected.
  TEST_REPORTS_DIR=test/test-reports/cpp-unittest/test_aot_compilation
  mkdir -p $TEST_REPORTS_DIR
  if [ -f "$TORCH_BIN_DIR"/test_mobile_nnc ]; then "$TORCH_BIN_DIR"/test_mobile_nnc --gtest_output=xml:$TEST_REPORTS_DIR/test_mobile_nnc.xml; fi
@ -542,7 +542,7 @@ test_vulkan() {
    ln -sf "$TORCH_LIB_DIR"/libc10* "$TORCH_TEST_DIR"
    export VK_ICD_FILENAMES=/var/lib/jenkins/swiftshader/swiftshader/build/Linux/vk_swiftshader_icd.json
    # NB: the ending test_vulkan must match the current function name for the current
-    # test reporting process (in print_test_stats.py) to function as expected.
+    # test reporting process to function as expected.
    TEST_REPORTS_DIR=test/test-reports/cpp-vulkan/test_vulkan
    mkdir -p $TEST_REPORTS_DIR
    LD_LIBRARY_PATH=/var/lib/jenkins/swiftshader/swiftshader/build/Linux/ "$TORCH_TEST_DIR"/vulkan_api_test --gtest_output=xml:$TEST_REPORTS_DIR/vulkan_test.xml
@ -559,7 +559,7 @@ test_distributed() {
    ln -sf "$TORCH_LIB_DIR"/libtorch* "$TORCH_BIN_DIR"
    ln -sf "$TORCH_LIB_DIR"/libc10* "$TORCH_BIN_DIR"
    # NB: the ending test_distributed must match the current function name for the current
-    # test reporting process (in print_test_stats.py) to function as expected.
+    # test reporting process to function as expected.
    TEST_REPORTS_DIR=test/test-reports/cpp-distributed/test_distributed
    mkdir -p $TEST_REPORTS_DIR
    "$TORCH_BIN_DIR"/FileStoreTest --gtest_output=xml:$TEST_REPORTS_DIR/FileStoreTest.xml
@ -583,7 +583,7 @@ test_rpc() {
  if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
    echo "Testing RPC C++ tests"
    # NB: the ending test_rpc must match the current function name for the current
-    # test reporting process (in print_test_stats.py) to function as expected.
+    # test reporting process to function as expected.
    ln -sf "$TORCH_LIB_DIR"/libtorch* "$TORCH_BIN_DIR"
    ln -sf "$TORCH_LIB_DIR"/libc10* "$TORCH_BIN_DIR"
    ln -sf "$TORCH_LIB_DIR"/libtbb* "$TORCH_BIN_DIR"
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -781,21 +781,6 @@ jobs:

            chmod a+x .jenkins/pytorch/macos-test.sh
            unbuffer .jenkins/pytorch/macos-test.sh 2>&1 | ts
-      - run:
-          name: Report results
-          no_output_timeout: "5m"
-          command: |
-            set -ex
-            source /Users/distiller/workspace/miniconda3/bin/activate
-            python3 -m pip install boto3==1.19.12
-
-            export JOB_BASE_NAME=$CIRCLE_JOB
-
-            # Using the same IAM user to write stats to our OSS bucket
-            export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_S3_BUCKET_V4}
-            export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET_V4}
-            python -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
-          when: always
      - store_test_results:
          path: test/test-reports

--- a/.circleci/verbatim-sources/job-specs/job-specs-custom.yml
+++ b/.circleci/verbatim-sources/job-specs/job-specs-custom.yml
@ -306,21 +306,6 @@

            chmod a+x .jenkins/pytorch/macos-test.sh
            unbuffer .jenkins/pytorch/macos-test.sh 2>&1 | ts
-      - run:
-          name: Report results
-          no_output_timeout: "5m"
-          command: |
-            set -ex
-            source /Users/distiller/workspace/miniconda3/bin/activate
-            python3 -m pip install boto3==1.19.12
-
-            export JOB_BASE_NAME=$CIRCLE_JOB
-
-            # Using the same IAM user to write stats to our OSS bucket
-            export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_S3_BUCKET_V4}
-            export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET_V4}
-            python -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
-          when: always
      - store_test_results:
          path: test/test-reports

--- a/.github/workflows/_bazel-build-test.yml
+++ b/.github/workflows/_bazel-build-test.yml
@ -180,29 +180,6 @@ jobs:
        with:
          file-suffix: bazel-${{ github.job }}_${{ steps.get-job-id.outputs.job-id }}

-      - name: Upload test statistics
-        if: always()
-        continue-on-error: true
-        env:
-          AWS_DEFAULT_REGION: us-east-1
-          GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
-          BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          PYTORCH_RETRY_TEST_CASES: 1
-          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
-          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-          TAG: ${{ steps.parse-ref.outputs.tag }}
-          WORKFLOW_ID: ${{ github.run_id }}
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          GHA_WORKFLOW_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
-        shell: bash
-        run: |
-          set -x
-          python3 -m pip install -r requirements.txt
-          python3 -m pip install boto3==1.19.12
-          python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
-
      - name: Teardown Linux
        uses: pytorch/test-infra/.github/actions/teardown-linux@main
        if: always()
--- a/.github/workflows/_linux-test.yml
+++ b/.github/workflows/_linux-test.yml
@ -260,31 +260,6 @@ jobs:
          if-no-files-found: ignore
          path: ./**/core.[1-9]*

-      - name: Upload test statistics
-        if: always()
-        continue-on-error: true
-        env:
-          AWS_DEFAULT_REGION: us-east-1
-          GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
-          TEST_CONFIG: ${{ matrix.config }}
-          SHARD_NUMBER: ${{ matrix.shard }}
-          BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          PYTORCH_RETRY_TEST_CASES: 1
-          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
-          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-          TAG: ${{ steps.parse-ref.outputs.tag }}
-          WORKFLOW_ID: ${{ github.run_id }}
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          GHA_WORKFLOW_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
-        shell: bash
-        run: |
-          set -x
-          python3 -m pip install -r requirements.txt
-          python3 -m pip install boto3==1.19.12
-          python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
-
      - name: Teardown Linux
        uses: pytorch/test-infra/.github/actions/teardown-linux@main
        if: always()
--- a/.github/workflows/_mac-test.yml
+++ b/.github/workflows/_mac-test.yml
@ -192,27 +192,3 @@ jobs:
        with:
          use-gha: true
          file-suffix: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }}
-
-      - name: Upload test statistics
-        if: always()
-        continue-on-error: true
-        env:
-          AWS_DEFAULT_REGION: us-east-1
-          GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
-          TEST_CONFIG: ${{ matrix.config }}
-          SHARD_NUMBER: ${{ matrix.shard }}
-          BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          PYTORCH_RETRY_TEST_CASES: 1
-          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
-          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-          TAG: ${{ steps.parse-ref.outputs.tag }}
-          WORKFLOW_ID: ${{ github.run_id }}
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_OSSCI_METRICS_V2_ACCESS_KEY_ID }}
-          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_OSSCI_METRICS_V2_SECRET_ACCESS_KEY }}
-          GHA_WORKFLOW_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
-        run: |
-          set -x
-          ${CONDA_RUN} python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
--- a/.github/workflows/_rocm-test.yml
+++ b/.github/workflows/_rocm-test.yml
@ -238,32 +238,5 @@ jobs:
          use-gha: true
          file-suffix: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }}

-      - name: Upload test statistics
-        if: always()
-        continue-on-error: true
-        env:
-          AWS_DEFAULT_REGION: us-east-1
-          GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
-          TEST_CONFIG: ${{ matrix.config }}
-          SHARD_NUMBER: ${{ matrix.shard }}
-          BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          PYTORCH_RETRY_TEST_CASES: 1
-          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
-          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-          TAG: ${{ steps.parse-ref.outputs.tag }}
-          WORKFLOW_ID: ${{ github.run_id }}
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_OSSCI_METRICS_V2_ACCESS_KEY_ID }}
-          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_OSSCI_METRICS_V2_SECRET_ACCESS_KEY }}
-          GHA_WORKFLOW_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
-        shell: bash
-        run: |
-          set -x
-          python3 -m pip install -r requirements.txt
-          python3 -m pip install boto3==1.19.12
-          python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
-
      - name: Teardown ROCm
        uses: ./.github/actions/teardown-rocm
--- a/.github/workflows/_win-test.yml
+++ b/.github/workflows/_win-test.yml
@ -185,30 +185,6 @@ jobs:
        id: parse-ref
        run: .github/scripts/parse_ref.py

-      - name: Upload test statistics
-        if: always()
-        continue-on-error: true
-        env:
-          AWS_DEFAULT_REGION: us-east-1
-          GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
-          BRANCH: ${{ steps.parse-ref.outputs.branch }}
-          TEST_CONFIG: ${{ matrix.config }}
-          SHARD_NUMBER: ${{ matrix.shard }}
-          BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          PYTORCH_RETRY_TEST_CASES: 1
-          PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
-          SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
-          TAG: ${{ steps.parse-ref.outputs.tag }}
-          WORKFLOW_ID: ${{ github.run_id }}
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          GHA_WORKFLOW_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
-        shell: bash
-        run: |
-          set -x
-          # Windows conda doesn't have python3 binary, only python, but it's python3
-          ${CONDA_RUN} python -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
-
      - name: Teardown Windows
        uses: ./.github/actions/teardown-win
        if: always()
--- a/mypy.ini
+++ b/mypy.ini
@ -124,9 +124,6 @@ warn_unused_ignores = False
 [mypy-tools.generate_torch_version]
 warn_unused_ignores = False

-[mypy-tools.stats.s3_stat_parser]
-warn_unused_ignores = False
-
 #
 # Adding type annotations to caffe2 is probably not worth the effort
 # only work on this if you have a specific reason for it, otherwise
--- a/tools/README.md
+++ b/tools/README.md
@ -39,8 +39,6 @@ Developer tools which you might find useful:
  can conveniently run diffs on them when working on code-generation.
  (See also [generated_dirs.txt](generated_dirs.txt) which
  specifies the list of directories with generated files.)
-* [stats/test_history.py](stats/test_history.py) - Query S3 to display history of a single
-  test across multiple jobs over time.

 Important if you want to run on AMD GPU:

--- a/tools/stats/print_test_stats.py
+++ b/tools/stats/print_test_stats.py
--- a/tools/stats/s3_stat_parser.py
+++ b/tools/stats/s3_stat_parser.py
@ -1,244 +0,0 @@
-import bz2
-import json
-import logging
-import subprocess
-from collections import defaultdict
-from datetime import datetime, timedelta
-from typing import Any, cast, Dict, List, Optional, Tuple, Union
-
-from typing_extensions import Literal, TypedDict
-
-try:
-    import boto3  # type: ignore[import]
-    import botocore  # type: ignore[import]
-
-    HAVE_BOTO3 = True
-except ImportError:
-    HAVE_BOTO3 = False
-
-
-logger = logging.getLogger(__name__)
-
-
-OSSCI_METRICS_BUCKET = "ossci-metrics"
-
-Commit = str  # 40-digit SHA-1 hex string
-Status = Optional[Literal["errored", "failed", "skipped"]]
-
-
-class CaseMeta(TypedDict):
-    seconds: float
-
-
-class Version1Case(CaseMeta):
-    name: str
-    errored: bool
-    failed: bool
-    skipped: bool
-
-
-class Version1Suite(TypedDict):
-    total_seconds: float
-    cases: List[Version1Case]
-
-
-class ReportMetaMeta(TypedDict):
-    build_pr: str
-    build_tag: str
-    build_sha1: Commit
-    build_base_commit: Commit
-    build_branch: str
-    build_job: str
-    build_workflow_id: str
-    build_start_time_epoch: str
-
-
-class ReportMeta(ReportMetaMeta):
-    total_seconds: float
-
-
-class Version1Report(ReportMeta):
-    suites: Dict[str, Version1Suite]
-
-
-class Version2Case(CaseMeta):
-    status: Status
-
-
-class Version2Suite(TypedDict):
-    total_seconds: float
-    cases: Dict[str, Version2Case]
-
-
-class Version2File(TypedDict):
-    total_seconds: float
-    suites: Dict[str, Version2Suite]
-
-
-class VersionedReport(ReportMeta):
-    format_version: int
-
-
-# report: Version2Report implies report['format_version'] == 2
-class Version2Report(VersionedReport):
-    files: Dict[str, Version2File]
-
-
-Report = Union[Version1Report, VersionedReport]
-
-if HAVE_BOTO3:
-    S3_RESOURCE_READ_ONLY = boto3.resource(
-        "s3", config=botocore.config.Config(signature_version=botocore.UNSIGNED)
-    )
-    S3_RESOURCE = boto3.resource("s3")
-
-
-def get_S3_bucket_readonly(bucket_name: str) -> Any:
-    return S3_RESOURCE_READ_ONLY.Bucket(bucket_name)
-
-
-def get_S3_object_from_bucket(bucket_name: str, object: str) -> Any:
-    return S3_RESOURCE.Object(bucket_name, object)
-
-
-def case_status(case: Version1Case) -> Status:
-    for k in {"errored", "failed", "skipped"}:
-        if case[k]:  # type: ignore[literal-required]
-            return cast(Status, k)
-    return None
-
-
-def newify_case(case: Version1Case) -> Version2Case:
-    return {
-        "seconds": case["seconds"],
-        "status": case_status(case),
-    }
-
-
-def get_cases(
-    *,
-    data: Report,
-    filename: Optional[str],
-    suite_name: Optional[str],
-    test_name: Optional[str],
-) -> List[Version2Case]:
-    cases: List[Version2Case] = []
-    if "format_version" not in data:  # version 1 implicitly
-        v1report = cast(Version1Report, data)
-        suites = v1report["suites"]
-        for sname, v1suite in suites.items():
-            if not suite_name or sname == suite_name:
-                for v1case in v1suite["cases"]:
-                    if not test_name or v1case["name"] == test_name:
-                        cases.append(newify_case(v1case))
-    else:
-        v_report = cast(VersionedReport, data)
-        version = v_report["format_version"]
-        if version == 2:
-            v2report = cast(Version2Report, v_report)
-            for fname, v2file in v2report["files"].items():
-                if fname == filename or not filename:
-                    for sname, v2suite in v2file["suites"].items():
-                        if sname == suite_name or not suite_name:
-                            for cname, v2case in v2suite["cases"].items():
-                                if not test_name or cname == test_name:
-                                    cases.append(v2case)
-        else:
-            raise RuntimeError(f"Unknown format version: {version}")
-    return cases
-
-
-def _parse_master_summaries(summaries: Any, jobs: List[str]) -> Dict[str, List[Report]]:
-    summary_dict = defaultdict(list)
-    for summary in summaries:
-        # master summary format: "test_time/{sha}/{job}/file"
-        summary_job = summary.key.split("/")[2]
-        if summary_job in jobs or len(jobs) == 0:
-            binary = summary.get()["Body"].read()
-            string = bz2.decompress(binary).decode("utf-8")
-            summary_dict[summary_job].append(json.loads(string))
-    return summary_dict
-
-
-def _parse_pr_summaries(
-    summaries: Any, job_prefix: str
-) -> Dict[str, List[Tuple[Report, str]]]:
-    summary_dict = defaultdict(list)
-    for summary in summaries:
-        # PR summary format: "pr_test_time/{pr}/{sha}/{job}/file"
-        summary_job = summary.key.split("/")[3]
-        summary_timestamp = summary.key.split("/")[4][: len("YYYY-MM-ddTHH:mm:ss")]
-        if not job_prefix or len(job_prefix) == 0 or summary_job.startswith(job_prefix):
-            binary = summary.get()["Body"].read()
-            string = bz2.decompress(binary).decode("utf-8")
-            summary_dict[summary_job].append((json.loads(string), summary_timestamp))
-    return summary_dict
-
-
-# Collect and decompress S3 test stats summaries into JSON.
-# data stored on S3 buckets are pathed by {sha}/{job} so we also allow
-# optional jobs filter
-def get_test_stats_summaries(
-    *, sha: str, jobs: Optional[List[str]] = None
-) -> Dict[str, List[Report]]:
-    bucket = get_S3_bucket_readonly(OSSCI_METRICS_BUCKET)
-    summaries = bucket.objects.filter(Prefix=f"test_time/{sha}")
-    return _parse_master_summaries(summaries, jobs=list(jobs or []))
-
-
-def get_test_stats_summaries_for_job(
-    *, sha: str, job_prefix: str
-) -> Dict[str, List[Report]]:
-    bucket = get_S3_bucket_readonly(OSSCI_METRICS_BUCKET)
-    summaries = bucket.objects.filter(Prefix=f"test_time/{sha}/{job_prefix}")
-    return _parse_master_summaries(summaries, jobs=list())
-
-
-def get_test_stats_summaries_for_pr(
-    *, pr: str, job_prefix: str
-) -> Dict[str, List[Tuple[Report, str]]]:
-    bucket = get_S3_bucket_readonly(OSSCI_METRICS_BUCKET)
-    summaries = bucket.objects.filter(Prefix=f"pr_test_time/{pr}/")
-    return _parse_pr_summaries(summaries, job_prefix=job_prefix)
-
-
-# This function returns a list of S3 test time reports. This function can run into errors if HAVE_BOTO3 = False
-# or the S3 bucket is somehow unavailable. Even though this function goes through ten commits' reports to find a
-# non-empty report, it is still conceivable (though highly unlikely) for this function to return no reports.
-def get_previous_reports_for_branch(
-    branch: str, ci_job_prefix: str = ""
-) -> List[Report]:
-    commit_date_ts = subprocess.check_output(
-        ["git", "show", "-s", "--format=%ct", "HEAD"], encoding="ascii"
-    ).strip()
-    commit_date = datetime.fromtimestamp(int(commit_date_ts))
-    # We go a day before this current commit to avoiding pulling incomplete reports
-    day_before_commit = str(commit_date - timedelta(days=1)).split(" ")[0]
-    # something like git rev-list --before="2021-03-04" --max-count=10 --remotes="*origin/nightly"
-    commits = subprocess.check_output(
-        [
-            "git",
-            "rev-list",
-            f"--before={day_before_commit}",
-            "--max-count=10",
-            f"--remotes=*{branch}",
-        ],
-        encoding="ascii",
-    ).splitlines()
-
-    reports: List[Report] = []
-    commit_index = 0
-    while len(reports) == 0 and commit_index < len(commits):
-        commit = commits[commit_index]
-        logger.info(f"Grabbing reports from commit: {commit}")
-        summaries = get_test_stats_summaries_for_job(
-            sha=commit, job_prefix=ci_job_prefix
-        )
-        for job_name, summary in summaries.items():
-            reports.append(summary[0])
-            if len(summary) > 1:
-                logger.warning(
-                    f"WARNING: Multiple summary objects found for {commit}/{job_name}"
-                )
-        commit_index += 1
-    return reports
--- a/tools/stats/scribe.py
+++ b/tools/stats/scribe.py
@ -1,61 +0,0 @@
-import base64
-import bz2
-import json
-import os
-from typing import Any
-
-
-_lambda_client = None
-
-
-def sprint(*args: Any) -> None:
-    print("[scribe]", *args)
-
-
-def aws_lambda() -> Any:
-    global _lambda_client
-    # lazy import so that we don't need to introduce extra dependencies
-    import boto3  # type: ignore[import]
-
-    if _lambda_client is None:
-        _lambda_client = boto3.client("lambda")
-
-    return _lambda_client
-
-
-def invoke_lambda(name: str, payload: Any) -> Any:
-    res = aws_lambda().invoke(FunctionName=name, Payload=json.dumps(payload).encode())
-    payload = str(res["Payload"].read().decode())
-    if res.get("FunctionError"):
-        raise Exception(payload)
-    return payload
-
-
-def send_to_scribe(logs: str) -> str:
-    access_token = os.environ.get("SCRIBE_GRAPHQL_ACCESS_TOKEN", "")
-
-    # boto3 can be used when the runner has IAM roles setup
-    # currently it's used as a fallback when SCRIBE_GRAPHQL_ACCESS_TOKEN is empty
-    if access_token == "":
-        return _send_to_scribe_via_boto3(logs)
-
-    return _send_to_scribe_via_http(access_token, logs)
-
-
-def _send_to_scribe_via_boto3(logs: str) -> str:
-    sprint("Scribe access token not provided, sending report via boto3...")
-    event = {"base64_bz2_logs": base64.b64encode(bz2.compress(logs.encode())).decode()}
-    return str(invoke_lambda("gh-ci-scribe-proxy", event))
-
-
-def _send_to_scribe_via_http(access_token: str, logs: str) -> str:
-    # lazy import so that we don't need to introduce extra dependencies
-    import requests  # type: ignore[import]
-
-    sprint("Scribe access token provided, sending report via http...")
-    r = requests.post(
-        "https://graph.facebook.com/scribe_logs",
-        data={"access_token": access_token, "logs": logs},
-    )
-    r.raise_for_status()
-    return str(r.text)
--- a/tools/stats/test_history.py
+++ b/tools/stats/test_history.py
@ -1,330 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import subprocess
-import sys
-from datetime import datetime, timezone
-from signal import SIG_DFL, signal, SIGPIPE
-from typing import Dict, Iterator, List, Optional, Set, Tuple
-
-from tools.stats.s3_stat_parser import get_cases, get_test_stats_summaries, Report
-
-
-def get_git_commit_history(*, path: str, ref: str) -> List[Tuple[str, datetime]]:
-    rc = subprocess.check_output(
-        ["git", "-C", path, "log", "--pretty=format:%H %ct", ref],
-    ).decode("latin-1")
-    return [
-        (x[0], datetime.fromtimestamp(int(x[1]), tz=timezone.utc))
-        for x in [line.split(" ") for line in rc.split("\n")]
-    ]
-
-
-def make_column(
-    *,
-    data: Optional[Report],
-    filename: Optional[str],
-    suite_name: Optional[str],
-    test_name: str,
-    digits: int,
-) -> Tuple[str, int]:
-    decimals = 3
-    num_length = digits + 1 + decimals
-    if data:
-        cases = get_cases(
-            data=data, filename=filename, suite_name=suite_name, test_name=test_name
-        )
-        if cases:
-            case = cases[0]
-            status = case["status"]
-            omitted = len(cases) - 1
-            if status:
-                return f"{status.rjust(num_length)} ", omitted
-            else:
-                return f'{case["seconds"]:{num_length}.{decimals}f}s', omitted
-        else:
-            return f'{"absent".rjust(num_length)} ', 0
-    else:
-        return " " * (num_length + 1), 0
-
-
-def make_columns(
-    *,
-    jobs: List[str],
-    jsons: Dict[str, Report],
-    omitted: Dict[str, int],
-    filename: Optional[str],
-    suite_name: Optional[str],
-    test_name: str,
-    digits: int,
-) -> str:
-    columns = []
-    total_omitted = 0
-    total_suites = 0
-    for job in jobs:
-        data = jsons.get(job)
-        column, omitted_suites = make_column(
-            data=data,
-            filename=filename,
-            suite_name=suite_name,
-            test_name=test_name,
-            digits=digits,
-        )
-        columns.append(column)
-        total_suites += omitted_suites
-        if job in omitted:
-            total_omitted += omitted[job]
-    if total_omitted > 0:
-        columns.append(f"({total_omitted} job re-runs omitted)")
-    if total_suites > 0:
-        columns.append(f"({total_suites} matching suites omitted)")
-    return " ".join(columns)
-
-
-def make_lines(
-    *,
-    jobs: Set[str],
-    jsons: Dict[str, List[Report]],
-    filename: Optional[str],
-    suite_name: Optional[str],
-    test_name: str,
-) -> List[str]:
-    lines = []
-    for job, reports in jsons.items():
-        for data in reports:
-            cases = get_cases(
-                data=data,
-                filename=filename,
-                suite_name=suite_name,
-                test_name=test_name,
-            )
-            if cases:
-                case = cases[0]
-                status = case["status"]
-                line = f'{job} {case["seconds"]}s{f" {status}" if status else ""}'
-                if len(cases) > 1:
-                    line += f" ({len(cases) - 1} matching suites omitted)"
-                lines.append(line)
-            elif job in jobs:
-                lines.append(f"{job} (test not found)")
-    if lines:
-        return lines
-    else:
-        return ["(no reports in S3)"]
-
-
-def history_lines(
-    *,
-    commits: List[Tuple[str, datetime]],
-    jobs: Optional[List[str]],
-    filename: Optional[str],
-    suite_name: Optional[str],
-    test_name: str,
-    delta: int,
-    sha_length: int,
-    mode: str,
-    digits: int,
-) -> Iterator[str]:
-    prev_time = datetime.now(tz=timezone.utc)
-    for sha, time in commits:
-        if (prev_time - time).total_seconds() < delta * 3600:
-            continue
-        prev_time = time
-        if jobs is None:
-            summaries = get_test_stats_summaries(sha=sha)
-        else:
-            summaries = get_test_stats_summaries(sha=sha, jobs=jobs)
-        if mode == "columns":
-            assert jobs is not None
-            # we assume that get_test_stats_summaries here doesn't
-            # return empty lists
-            omitted = {job: len(l) - 1 for job, l in summaries.items() if len(l) > 1}
-            lines = [
-                make_columns(
-                    jobs=jobs,
-                    jsons={job: l[0] for job, l in summaries.items()},
-                    omitted=omitted,
-                    filename=filename,
-                    suite_name=suite_name,
-                    test_name=test_name,
-                    digits=digits,
-                )
-            ]
-        else:
-            assert mode == "multiline"
-            lines = make_lines(
-                jobs=set(jobs or []),
-                jsons=summaries,
-                filename=filename,
-                suite_name=suite_name,
-                test_name=test_name,
-            )
-        for line in lines:
-            yield f"{time:%Y-%m-%d %H:%M:%S}Z {sha[:sha_length]} {line}".rstrip()
-
-
-class HelpFormatter(
-    argparse.ArgumentDefaultsHelpFormatter,
-    argparse.RawDescriptionHelpFormatter,
-):
-    pass
-
-
-def description() -> str:
-    return r"""
-Display the history of a test.
-
-Each line of (non-error) output starts with the timestamp and SHA1 hash
-of the commit it refers to, in this format:
-
-    YYYY-MM-DD hh:mm:ss 0123456789abcdef0123456789abcdef01234567
-
-In multiline mode, each line next includes the name of a CircleCI job,
-followed by the time of the specified test in that job at that commit.
-Example:
-
-    $ tools/stats/test_history.py --mode=multiline --ref=86a961af879 --sha-length=8 \
-      --test=test_composite_compliance_dot_cpu_float32 \
-      --job linux-xenial-py3.7-gcc5.4-test-default1 --job linux-xenial-py3.7-gcc7-test-default1
-    2022-02-18 15:47:37Z 86a961af linux-xenial-py3.7-gcc5.4-test-default1 0.001s
-    2022-02-18 15:47:37Z 86a961af linux-xenial-py3.7-gcc7-test-default1 0.001s
-    2022-02-18 15:12:34Z f5e201e4 linux-xenial-py3.7-gcc5.4-test-default1 0.001s
-    2022-02-18 15:12:34Z f5e201e4 linux-xenial-py3.7-gcc7-test-default1 0.001s
-    2022-02-18 13:14:56Z 1c0df265 linux-xenial-py3.7-gcc5.4-test-default1 0.001s
-    2022-02-18 13:14:56Z 1c0df265 linux-xenial-py3.7-gcc7-test-default1 0.001s
-    2022-02-18 13:14:56Z e73eaffd (no reports in S3)
-    2022-02-18 06:29:12Z 710f12f5 linux-xenial-py3.7-gcc5.4-test-default1 0.001s
-
-Another multiline example, this time with the --all flag:
-
-    $ tools/stats/test_history.py --mode=multiline --all --ref=86a961af879 --delta=12 --sha-length=8 \
-      --test=test_composite_compliance_dot_cuda_float32
-    2022-02-18 03:49:46Z 69389fb5 linux-bionic-cuda10.2-py3.9-gcc7-test-default1 0.001s skipped
-    2022-02-18 03:49:46Z 69389fb5 linux-bionic-cuda10.2-py3.9-gcc7-test-slow1 0.001s skipped
-    2022-02-18 03:49:46Z 69389fb5 linux-xenial-cuda11.3-py3.7-gcc7-test-default1 0.001s skipped
-    2022-02-18 03:49:46Z 69389fb5 periodic-linux-bionic-cuda11.5-py3.7-gcc7-test-default1 0.001s skipped
-    2022-02-18 03:49:46Z 69389fb5 periodic-linux-xenial-cuda10.2-py3-gcc7-slow-gradcheck-test-default1 0.001s skipped
-    2022-02-18 03:49:46Z 69389fb5 periodic-linux-xenial-cuda11.1-py3.7-gcc7-debug-test-default1 0.001s skipped
-
-In columns mode, the name of the job isn't printed, but the order of the
-columns is guaranteed to match the order of the jobs passed on the
-command line. Example:
-
-    $ tools/stats/test_history.py --mode=columns --ref=86a961af879 --sha-length=8 \
-      --test=test_composite_compliance_dot_cpu_float32 \
-      --job linux-xenial-py3.7-gcc5.4-test-default1 --job linux-xenial-py3.7-gcc7-test-default1
-    2022-02-18 15:47:37Z 86a961af    0.001s    0.001s
-    2022-02-18 15:12:34Z f5e201e4    0.001s    0.001s
-    2022-02-18 13:14:56Z 1c0df265    0.001s    0.001s
-    2022-02-18 13:14:56Z e73eaffd
-    2022-02-18 06:29:12Z 710f12f5    0.001s    0.001s
-    2022-02-18 05:20:30Z 51b04f27    0.001s    0.001s
-    2022-02-18 03:49:46Z 69389fb5    0.001s    0.001s
-    2022-02-18 00:19:12Z 056b6260    0.001s    0.001s
-    2022-02-17 23:58:32Z 39fb7714    0.001s    0.001s
-
-Minor note: in columns mode, a blank cell means that no report was found
-in S3, while the word "absent" means that a report was found but the
-indicated test was not found in that report.
-"""
-
-
-def parse_args(raw: List[str]) -> argparse.Namespace:
-    parser = argparse.ArgumentParser(
-        __file__,
-        description=description(),
-        formatter_class=HelpFormatter,
-    )
-    parser.add_argument(
-        "--mode",
-        choices=["columns", "multiline"],
-        help="output format",
-        default="columns",
-    )
-    parser.add_argument(
-        "--pytorch",
-        help="path to local PyTorch clone",
-        default=".",
-    )
-    parser.add_argument(
-        "--ref",
-        help="starting point (most recent Git ref) to display history for",
-        default="master",
-    )
-    parser.add_argument(
-        "--delta",
-        type=int,
-        help="minimum number of hours between commits",
-        default=0,
-    )
-    parser.add_argument(
-        "--sha-length",
-        type=int,
-        help="length of the prefix of the SHA1 hash to show",
-        default=40,
-    )
-    parser.add_argument(
-        "--digits",
-        type=int,
-        help="(columns) number of digits to display before the decimal point",
-        default=4,
-    )
-    parser.add_argument(
-        "--all",
-        action="store_true",
-        help="(multiline) ignore listed jobs, show all jobs for each commit",
-    )
-    parser.add_argument(
-        "--file",
-        help="name of the file containing the test",
-    )
-    parser.add_argument(
-        "--suite",
-        help="name of the suite containing the test",
-    )
-    parser.add_argument("--test", help="name of the test", required=True)
-    parser.add_argument(
-        "--job",
-        help="names of jobs to display columns for, in order",
-        action="append",
-        default=[],
-    )
-    args = parser.parse_args(raw)
-
-    args.jobs = None if args.all else args.job
-    # We dont allow implicit or empty "--jobs", unless "--all" is specified.
-    if args.jobs == []:
-        parser.error("No jobs specified.")
-
-    return args
-
-
-def run(raw: List[str]) -> Iterator[str]:
-    args = parse_args(raw)
-
-    commits = get_git_commit_history(path=args.pytorch, ref=args.ref)
-
-    return history_lines(
-        commits=commits,
-        jobs=args.jobs,
-        filename=args.file,
-        suite_name=args.suite,
-        test_name=args.test,
-        delta=args.delta,
-        mode=args.mode,
-        sha_length=args.sha_length,
-        digits=args.digits,
-    )
-
-
-def main() -> None:
-    for line in run(sys.argv[1:]):
-        print(line, flush=True)
-
-
-if __name__ == "__main__":
-    signal(SIGPIPE, SIG_DFL)  # https://stackoverflow.com/a/30091579
-    try:
-        main()
-    except KeyboardInterrupt:
-        pass
--- a/tools/test/test_stats.py
+++ b/tools/test/test_stats.py
@ -1,683 +0,0 @@
-# -*- coding: utf-8 -*-
-import unittest
-from typing import Dict, List
-
-from tools.stats import print_test_stats
-from tools.stats.s3_stat_parser import (
-    Commit,
-    Report,
-    ReportMetaMeta,
-    Status,
-    Version1Case,
-    Version1Report,
-    Version2Case,
-    Version2Report,
-)
-
-
-def fakehash(char: str) -> str:
-    return char * 40
-
-
-def dummy_meta_meta() -> ReportMetaMeta:
-    return {
-        "build_pr": "",
-        "build_tag": "",
-        "build_sha1": "",
-        "build_base_commit": "",
-        "build_branch": "",
-        "build_job": "",
-        "build_workflow_id": "",
-        "build_start_time_epoch": "",
-    }
-
-
-def makecase(
-    name: str,
-    seconds: float,
-    *,
-    errored: bool = False,
-    failed: bool = False,
-    skipped: bool = False,
-) -> Version1Case:
-    return {
-        "name": name,
-        "seconds": seconds,
-        "errored": errored,
-        "failed": failed,
-        "skipped": skipped,
-    }
-
-
-def make_report_v1(tests: Dict[str, List[Version1Case]]) -> Version1Report:
-    suites = {
-        suite_name: {
-            "total_seconds": sum(case["seconds"] for case in cases),
-            "cases": cases,
-        }
-        for suite_name, cases in tests.items()
-    }
-    return {
-        **dummy_meta_meta(),  # type: ignore[misc]
-        "total_seconds": sum(s["total_seconds"] for s in suites.values()),
-        "suites": suites,
-    }
-
-
-def make_case_v2(seconds: float, status: Status = None) -> Version2Case:
-    return {
-        "seconds": seconds,
-        "status": status,
-    }
-
-
-def make_report_v2(
-    tests: Dict[str, Dict[str, Dict[str, Version2Case]]]
-) -> Version2Report:
-    files = {}
-    for file_name, file_suites in tests.items():
-        suites = {
-            suite_name: {
-                "total_seconds": sum(case["seconds"] for case in cases.values()),
-                "cases": cases,
-            }
-            for suite_name, cases in file_suites.items()
-        }
-        files[file_name] = {
-            "suites": suites,
-            "total_seconds": sum(suite["total_seconds"] for suite in suites.values()),  # type: ignore[type-var]
-        }
-    return {
-        **dummy_meta_meta(),  # type: ignore[misc]
-        "format_version": 2,
-        "total_seconds": sum(s["total_seconds"] for s in files.values()),
-        "files": files,
-    }
-
-
-maxDiff = None
-
-
-class TestPrintTestStats(unittest.TestCase):
-    version1_report: Version1Report = make_report_v1(
-        {
-            # input ordering of the suites is ignored
-            "Grault": [
-                # not printed: status same and time similar
-                makecase("test_grault0", 4.78, failed=True),
-                # status same, but time increased a lot
-                makecase("test_grault2", 1.473, errored=True),
-            ],
-            # individual tests times changed, not overall suite
-            "Qux": [
-                # input ordering of the test cases is ignored
-                makecase("test_qux1", 0.001, skipped=True),
-                makecase("test_qux6", 0.002, skipped=True),
-                # time in bounds, but status changed
-                makecase("test_qux4", 7.158, failed=True),
-                # not printed because it's the same as before
-                makecase("test_qux7", 0.003, skipped=True),
-                makecase("test_qux5", 11.968),
-                makecase("test_qux3", 23.496),
-            ],
-            # new test suite
-            "Bar": [
-                makecase("test_bar2", 3.742, failed=True),
-                makecase("test_bar1", 50.447),
-            ],
-            # overall suite time changed but no individual tests
-            "Norf": [
-                makecase("test_norf1", 3),
-                makecase("test_norf2", 3),
-                makecase("test_norf3", 3),
-                makecase("test_norf4", 3),
-            ],
-            # suite doesn't show up if it doesn't change enough
-            "Foo": [
-                makecase("test_foo1", 42),
-                makecase("test_foo2", 56),
-            ],
-        }
-    )
-
-    version2_report: Version2Report = make_report_v2(
-        {
-            "test_a": {
-                "Grault": {
-                    "test_grault0": make_case_v2(4.78, "failed"),
-                    "test_grault2": make_case_v2(1.473, "errored"),
-                },
-                "Qux": {
-                    "test_qux1": make_case_v2(0.001, "skipped"),
-                    "test_qux6": make_case_v2(0.002, "skipped"),
-                    "test_qux4": make_case_v2(7.158, "failed"),
-                    "test_qux7": make_case_v2(0.003, "skipped"),
-                    "test_qux8": make_case_v2(11.968),
-                    "test_qux3": make_case_v2(23.496),
-                },
-            },
-            "test_b": {
-                "Bar": {
-                    "test_bar2": make_case_v2(3.742, "failed"),
-                    "test_bar1": make_case_v2(50.447),
-                },
-                # overall suite time changed but no individual tests
-                "Norf": {
-                    "test_norf1": make_case_v2(3),
-                    "test_norf2": make_case_v2(3),
-                    "test_norf3": make_case_v2(3),
-                    "test_norf4": make_case_v2(3),
-                },
-            },
-            "test_c": {
-                "Foo": {
-                    "test_foo1": make_case_v2(42),
-                    "test_foo2": make_case_v2(56),
-                },
-            },
-        }
-    )
-
-    def test_simplify(self) -> None:
-        self.assertEqual(
-            {
-                "": {
-                    "Bar": {
-                        "test_bar1": {"seconds": 50.447, "status": None},
-                        "test_bar2": {"seconds": 3.742, "status": "failed"},
-                    },
-                    "Foo": {
-                        "test_foo1": {"seconds": 42, "status": None},
-                        "test_foo2": {"seconds": 56, "status": None},
-                    },
-                    "Grault": {
-                        "test_grault0": {"seconds": 4.78, "status": "failed"},
-                        "test_grault2": {"seconds": 1.473, "status": "errored"},
-                    },
-                    "Norf": {
-                        "test_norf1": {"seconds": 3, "status": None},
-                        "test_norf3": {"seconds": 3, "status": None},
-                        "test_norf2": {"seconds": 3, "status": None},
-                        "test_norf4": {"seconds": 3, "status": None},
-                    },
-                    "Qux": {
-                        "test_qux1": {"seconds": 0.001, "status": "skipped"},
-                        "test_qux3": {"seconds": 23.496, "status": None},
-                        "test_qux4": {"seconds": 7.158, "status": "failed"},
-                        "test_qux5": {"seconds": 11.968, "status": None},
-                        "test_qux6": {"seconds": 0.002, "status": "skipped"},
-                        "test_qux7": {"seconds": 0.003, "status": "skipped"},
-                    },
-                },
-            },
-            print_test_stats.simplify(self.version1_report),
-        )
-
-        self.assertEqual(
-            {
-                "test_a": {
-                    "Grault": {
-                        "test_grault0": {"seconds": 4.78, "status": "failed"},
-                        "test_grault2": {"seconds": 1.473, "status": "errored"},
-                    },
-                    "Qux": {
-                        "test_qux1": {"seconds": 0.001, "status": "skipped"},
-                        "test_qux3": {"seconds": 23.496, "status": None},
-                        "test_qux4": {"seconds": 7.158, "status": "failed"},
-                        "test_qux6": {"seconds": 0.002, "status": "skipped"},
-                        "test_qux7": {"seconds": 0.003, "status": "skipped"},
-                        "test_qux8": {"seconds": 11.968, "status": None},
-                    },
-                },
-                "test_b": {
-                    "Bar": {
-                        "test_bar1": {"seconds": 50.447, "status": None},
-                        "test_bar2": {"seconds": 3.742, "status": "failed"},
-                    },
-                    "Norf": {
-                        "test_norf1": {"seconds": 3, "status": None},
-                        "test_norf2": {"seconds": 3, "status": None},
-                        "test_norf3": {"seconds": 3, "status": None},
-                        "test_norf4": {"seconds": 3, "status": None},
-                    },
-                },
-                "test_c": {
-                    "Foo": {
-                        "test_foo1": {"seconds": 42, "status": None},
-                        "test_foo2": {"seconds": 56, "status": None},
-                    },
-                },
-            },
-            print_test_stats.simplify(self.version2_report),
-        )
-
-    def test_analysis(self) -> None:
-        head_report = self.version1_report
-
-        base_reports: Dict[Commit, List[Report]] = {
-            # bbbb has no reports, so base is cccc instead
-            fakehash("b"): [],
-            fakehash("c"): [
-                make_report_v1(
-                    {
-                        "Baz": [
-                            makecase("test_baz2", 13.605),
-                            # no recent suites have & skip this test
-                            makecase("test_baz1", 0.004, skipped=True),
-                        ],
-                        "Foo": [
-                            makecase("test_foo1", 43),
-                            # test added since dddd
-                            makecase("test_foo2", 57),
-                        ],
-                        "Grault": [
-                            makecase("test_grault0", 4.88, failed=True),
-                            makecase("test_grault1", 11.967, failed=True),
-                            makecase("test_grault2", 0.395, errored=True),
-                            makecase("test_grault3", 30.460),
-                        ],
-                        "Norf": [
-                            makecase("test_norf1", 2),
-                            makecase("test_norf2", 2),
-                            makecase("test_norf3", 2),
-                            makecase("test_norf4", 2),
-                        ],
-                        "Qux": [
-                            makecase("test_qux3", 4.978, errored=True),
-                            makecase("test_qux7", 0.002, skipped=True),
-                            makecase("test_qux2", 5.618),
-                            makecase("test_qux4", 7.766, errored=True),
-                            makecase("test_qux6", 23.589, failed=True),
-                        ],
-                    }
-                ),
-            ],
-            fakehash("d"): [
-                make_report_v1(
-                    {
-                        "Foo": [
-                            makecase("test_foo1", 40),
-                            # removed in cccc
-                            makecase("test_foo3", 17),
-                        ],
-                        "Baz": [
-                            # not skipped, so not included in stdev
-                            makecase("test_baz1", 3.14),
-                        ],
-                        "Qux": [
-                            makecase("test_qux7", 0.004, skipped=True),
-                            makecase("test_qux2", 6.02),
-                            makecase("test_qux4", 20.932),
-                        ],
-                        "Norf": [
-                            makecase("test_norf1", 3),
-                            makecase("test_norf2", 3),
-                            makecase("test_norf3", 3),
-                            makecase("test_norf4", 3),
-                        ],
-                        "Grault": [
-                            makecase("test_grault0", 5, failed=True),
-                            makecase("test_grault1", 14.325, failed=True),
-                            makecase("test_grault2", 0.31, errored=True),
-                        ],
-                    }
-                ),
-            ],
-            fakehash("e"): [],
-            fakehash("f"): [
-                make_report_v1(
-                    {
-                        "Foo": [
-                            makecase("test_foo3", 24),
-                            makecase("test_foo1", 43),
-                        ],
-                        "Baz": [
-                            makecase("test_baz2", 16.857),
-                        ],
-                        "Qux": [
-                            makecase("test_qux2", 6.422),
-                            makecase("test_qux4", 6.382, errored=True),
-                        ],
-                        "Norf": [
-                            makecase("test_norf1", 0.9),
-                            makecase("test_norf3", 0.9),
-                            makecase("test_norf2", 0.9),
-                            makecase("test_norf4", 0.9),
-                        ],
-                        "Grault": [
-                            makecase("test_grault0", 4.7, failed=True),
-                            makecase("test_grault1", 13.146, failed=True),
-                            makecase("test_grault2", 0.48, errored=True),
-                        ],
-                    }
-                ),
-            ],
-        }
-
-        simpler_head = print_test_stats.simplify(head_report)
-        simpler_base = {}
-        for commit, reports in base_reports.items():
-            simpler_base[commit] = [print_test_stats.simplify(r) for r in reports]
-        analysis = print_test_stats.analyze(
-            head_report=simpler_head,
-            base_reports=simpler_base,
-        )
-
-        self.assertEqual(
-            """\
-
- class Baz:
-     # was   15.23s ±   2.30s
-
-     def test_baz1: ...
-         # was   0.004s           (skipped)
-
-     def test_baz2: ...
-         # was  15.231s ±  2.300s
-
-
-  class Grault:
-      # was   48.86s ±   1.19s
-      # now    6.25s
-
-    - def test_grault1: ...
-    -     # was  13.146s ±  1.179s (failed)
-
-    - def test_grault3: ...
-    -     # was  30.460s
-
-
-  class Qux:
-      # was   41.66s ±   1.06s
-      # now   42.63s
-
-    - def test_qux2: ...
-    -     # was   6.020s ±  0.402s
-
-    ! def test_qux3: ...
-    !     # was   4.978s           (errored)
-    !     # now  23.496s
-
-    ! def test_qux4: ...
-    !     # was   7.074s ±  0.979s (errored)
-    !     # now   7.158s           (failed)
-
-    ! def test_qux6: ...
-    !     # was  23.589s           (failed)
-    !     # now   0.002s           (skipped)
-
-    + def test_qux1: ...
-    +     # now   0.001s           (skipped)
-
-    + def test_qux5: ...
-    +     # now  11.968s
-
-
-+ class Bar:
-+     # now   54.19s
-+
-+     def test_bar1: ...
-+         # now  50.447s
-+
-+     def test_bar2: ...
-+         # now   3.742s           (failed)
-
-""",
-            print_test_stats.anomalies(analysis),
-        )
-
-    def test_graph(self) -> None:
-        # HEAD is on master
-        self.assertEqual(
-            """\
-Commit graph (base is most recent master ancestor with at least one S3 report):
-
-    : (master)
-    |
-    * aaaaaaaaaa (HEAD)              total time   502.99s
-    * bbbbbbbbbb (base)   1 report,  total time    47.84s
-    * cccccccccc          1 report,  total time   332.50s
-    * dddddddddd          0 reports
-    |
-    :
-""",
-            print_test_stats.graph(
-                head_sha=fakehash("a"),
-                head_seconds=502.99,
-                base_seconds={
-                    fakehash("b"): [47.84],
-                    fakehash("c"): [332.50],
-                    fakehash("d"): [],
-                },
-                on_master=True,
-            ),
-        )
-
-        self.assertEqual(
-            """\
-Commit graph (base is most recent master ancestor with at least one S3 report):
-
-    : (master)
-    |
-    | * aaaaaaaaaa (HEAD)            total time  9988.77s
-    |/
-    * bbbbbbbbbb (base) 121 reports, total time  7654.32s ±   55.55s
-    * cccccccccc         20 reports, total time  5555.55s ±  253.19s
-    * dddddddddd          1 report,  total time  1234.56s
-    |
-    :
-""",
-            print_test_stats.graph(
-                head_sha=fakehash("a"),
-                head_seconds=9988.77,
-                base_seconds={
-                    fakehash("b"): [7598.77] * 60 + [7654.32] + [7709.87] * 60,
-                    fakehash("c"): [5308.77] * 10 + [5802.33] * 10,
-                    fakehash("d"): [1234.56],
-                },
-                on_master=False,
-            ),
-        )
-
-        self.assertEqual(
-            """\
-Commit graph (base is most recent master ancestor with at least one S3 report):
-
-    : (master)
-    |
-    | * aaaaaaaaaa (HEAD)            total time    25.52s
-    | |
-    | : (5 commits)
-    |/
-    * bbbbbbbbbb          0 reports
-    * cccccccccc          0 reports
-    * dddddddddd (base)  15 reports, total time    58.92s ±   25.82s
-    |
-    :
-""",
-            print_test_stats.graph(
-                head_sha=fakehash("a"),
-                head_seconds=25.52,
-                base_seconds={
-                    fakehash("b"): [],
-                    fakehash("c"): [],
-                    fakehash("d"): [52.25] * 14 + [152.26],
-                },
-                on_master=False,
-                ancestry_path=5,
-            ),
-        )
-
-        self.assertEqual(
-            """\
-Commit graph (base is most recent master ancestor with at least one S3 report):
-
-    : (master)
-    |
-    | * aaaaaaaaaa (HEAD)            total time     0.08s
-    |/|
-    | : (1 commit)
-    |
-    * bbbbbbbbbb          0 reports
-    * cccccccccc (base)   1 report,  total time     0.09s
-    * dddddddddd          3 reports, total time     0.10s ±    0.05s
-    |
-    :
-""",
-            print_test_stats.graph(
-                head_sha=fakehash("a"),
-                head_seconds=0.08,
-                base_seconds={
-                    fakehash("b"): [],
-                    fakehash("c"): [0.09],
-                    fakehash("d"): [0.05, 0.10, 0.15],
-                },
-                on_master=False,
-                other_ancestors=1,
-            ),
-        )
-
-        self.assertEqual(
-            """\
-Commit graph (base is most recent master ancestor with at least one S3 report):
-
-    : (master)
-    |
-    | * aaaaaaaaaa (HEAD)            total time     5.98s
-    | |
-    | : (1 commit)
-    |/|
-    | : (7 commits)
-    |
-    * bbbbbbbbbb (base)   2 reports, total time     6.02s ±    1.71s
-    * cccccccccc          0 reports
-    * dddddddddd         10 reports, total time     5.84s ±    0.92s
-    |
-    :
-""",
-            print_test_stats.graph(
-                head_sha=fakehash("a"),
-                head_seconds=5.98,
-                base_seconds={
-                    fakehash("b"): [4.81, 7.23],
-                    fakehash("c"): [],
-                    fakehash("d"): [4.97] * 5 + [6.71] * 5,
-                },
-                on_master=False,
-                ancestry_path=1,
-                other_ancestors=7,
-            ),
-        )
-
-    def test_regression_info(self) -> None:
-        self.assertEqual(
-            """\
----- Historic stats comparison result ------
-
-    job: foo_job
-    commit: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
-
-Commit graph (base is most recent master ancestor with at least one S3 report):
-
-    : (master)
-    |
-    | * aaaaaaaaaa (HEAD)            total time     3.02s
-    |/
-    * bbbbbbbbbb (base)   1 report,  total time    41.00s
-    * cccccccccc          1 report,  total time    43.00s
-    |
-    :
-
-Removed  (across    1 suite)      1 test,  totaling -   1.00s
-Modified (across    1 suite)      1 test,  totaling -  41.48s ±   2.12s
-Added    (across    1 suite)      1 test,  totaling +   3.00s
-""",
-            print_test_stats.regression_info(
-                head_sha=fakehash("a"),
-                head_report=make_report_v1(
-                    {
-                        "Foo": [
-                            makecase("test_foo", 0.02, skipped=True),
-                            makecase("test_baz", 3),
-                        ]
-                    }
-                ),
-                base_reports={
-                    fakehash("b"): [
-                        make_report_v1(
-                            {
-                                "Foo": [
-                                    makecase("test_foo", 40),
-                                    makecase("test_bar", 1),
-                                ],
-                            }
-                        ),
-                    ],
-                    fakehash("c"): [
-                        make_report_v1(
-                            {
-                                "Foo": [
-                                    makecase("test_foo", 43),
-                                ],
-                            }
-                        ),
-                    ],
-                },
-                job_name="foo_job",
-                on_master=False,
-                ancestry_path=0,
-                other_ancestors=0,
-            ),
-        )
-
-    def test_regression_info_new_job(self) -> None:
-        self.assertEqual(
-            """\
----- Historic stats comparison result ------
-
-    job: foo_job
-    commit: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
-
-Commit graph (base is most recent master ancestor with at least one S3 report):
-
-    : (master)
-    |
-    | * aaaaaaaaaa (HEAD)            total time     3.02s
-    | |
-    | : (3 commits)
-    |/|
-    | : (2 commits)
-    |
-    * bbbbbbbbbb          0 reports
-    * cccccccccc          0 reports
-    |
-    :
-
-Removed  (across    0 suites)     0 tests, totaling     0.00s
-Modified (across    0 suites)     0 tests, totaling     0.00s
-Added    (across    1 suite)      2 tests, totaling +   3.02s
-""",
-            print_test_stats.regression_info(
-                head_sha=fakehash("a"),
-                head_report=make_report_v1(
-                    {
-                        "Foo": [
-                            makecase("test_foo", 0.02, skipped=True),
-                            makecase("test_baz", 3),
-                        ]
-                    }
-                ),
-                base_reports={
-                    fakehash("b"): [],
-                    fakehash("c"): [],
-                },
-                job_name="foo_job",
-                on_master=False,
-                ancestry_path=3,
-                other_ancestors=2,
-            ),
-        )
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tools/test/test_test_history.py
+++ b/tools/test/test_test_history.py
@ -1,74 +0,0 @@
-import itertools
-import re
-import shlex
-import unittest
-from typing import List, Optional
-
-from tools.stats import test_history
-from typing_extensions import TypedDict
-
-
-class Example(TypedDict):
-    cmd: str
-    args: List[str]
-    lines: List[str]
-
-
-def parse_block(block: List[str]) -> Optional[Example]:
-    if block:
-        match = re.match(r"^\$ ([^ ]+) (.*)$", block[0])
-        if match:
-            cmd, first = match.groups()
-            args = []
-            for i, line in enumerate([first] + block[1:]):
-                if line.endswith("\\"):
-                    args.append(line[:-1])
-                else:
-                    args.append(line)
-                    break
-            return {
-                "cmd": cmd,
-                "args": shlex.split("".join(args)),
-                "lines": block[i + 1 :],
-            }
-    return None
-
-
-def parse_description(description: str) -> List[Example]:
-    examples: List[Example] = []
-    for block in description.split("\n\n"):
-        matches = [re.match(r"^    (.*)$", line) for line in block.splitlines()]
-        if all(matches):
-            lines = []
-            for match in matches:
-                assert match
-                (line,) = match.groups()
-                lines.append(line)
-            example = parse_block(lines)
-            if example:
-                examples.append(example)
-    return examples
-
-
-@unittest.skip("Skipping as this test is fragile, issue #73083")
-class TestTestHistory(unittest.TestCase):
-    maxDiff = None
-
-    def test_help_examples(self) -> None:
-        examples = parse_description(test_history.description())
-        self.assertEqual(len(examples), 3)
-        for i, example in enumerate(examples):
-            with self.subTest(i=i):
-                self.assertTrue(test_history.__file__.endswith(example["cmd"]))
-                expected = example["lines"]
-                actual = list(
-                    itertools.islice(
-                        test_history.run(example["args"]),
-                        len(expected),
-                    )
-                )
-                self.assertEqual(actual, expected)
-
-
-if __name__ == "__main__":
-    unittest.main()