Add script to display history for a single test across multiple jobs over time (#52000)

Summary: Adapted from this gist: https://gist.github.com/malfet/1c34f261a28ae7af61210174394eaece Pull Request resolved: https://github.com/pytorch/pytorch/pull/52000 Test Plan: Example shell session here: https://pastebin.com/HYgWZBFB Reviewed By: walterddr Differential Revision: D26372191 Pulled By: samestep fbshipit-source-id: cdc9a27e1b4a0b3123a70e693b17d524e7c6cb95
2025-10-20 21:14:14 +08:00 · 2021-02-11 13:11:32 -08:00
parent 8908874003
commit deb74edb28
3 changed files with 242 additions and 0 deletions
--- a/mypy-strict.ini
+++ b/mypy-strict.ini
@ -37,6 +37,7 @@ strict_equality = True
 files = tools/codegen/gen.py,
    tools/autograd/*.py,
    tools/pyi/*.py,
+    tools/test_history.py,
    torch/testing/_internal/mypy_wrapper.py,
    torch/testing/_internal/print_test_stats.py,
    torch/utils/benchmark/utils/common.py,
--- a/tools/README.md
+++ b/tools/README.md
@ -45,6 +45,8 @@ Developer tools which you might find useful:
  can conveniently run diffs on them when working on code-generation.
  (See also [generated_dirs.txt](generated_dirs.txt) which
  specifies the list of directories with generated files.)
+* [test_history.py](test_history.py) - Query S3 to display history of a single
+  test across multiple jobs over time.

 Important if you want to run on AMD GPU:

--- a/tools/test_history.py
+++ b/tools/test_history.py
@ -0,0 +1,239 @@
+#!/usr/bin/env python3
+
+import argparse
+import bz2
+import json
+import subprocess
+from datetime import datetime
+from typing import Any, Dict, List, Optional, Tuple
+
+import boto3  # type: ignore[import]
+
+
+def get_git_commit_history(
+    *,
+    path: str,
+    ref: str
+) -> List[Tuple[str, datetime]]:
+    rc = subprocess.check_output(
+        ['git', '-C', path, 'log', '--pretty=format:%H %ct', ref],
+    ).decode("latin-1")
+    return [
+        (x[0], datetime.fromtimestamp(int(x[1])))
+        for x in [line.split(" ") for line in rc.split("\n")]
+    ]
+
+
+def get_ossci_jsons(
+    *,
+    bucket: Any,
+    sha: str,
+    jobs: Optional[List[str]]
+) -> Dict[str, Any]:
+    prefix = f"test_time/{sha}/"
+    objs: List[Any]
+    if jobs is None:
+        objs = list(bucket.objects.filter(Prefix=prefix))
+    else:
+        objs = []
+        for job in jobs:
+            objs.extend(list(bucket.objects.filter(Prefix=f"{prefix}{job}/")))
+    # initial pass to avoid downloading more than necessary
+    # in the case where there are multiple reports for a single sha+job
+    uniqueified = {obj.key.split('/')[2]: obj for obj in objs}
+    return {
+        job: json.loads(bz2.decompress(obj.get()['Body'].read()))
+        for job, obj in uniqueified.items()
+    }
+
+
+def get_case(
+    *,
+    data: Any,
+    suite_name: str,
+    test_name: str,
+) -> Optional[Dict[str, Any]]:
+    suite = data.get('suites', {}).get(suite_name)
+    if suite:
+        testcase_times = {
+            case['name']: case
+            for case in suite['cases']
+        }
+        return testcase_times.get(test_name)
+    return None
+
+
+def case_status(case: Dict[str, Any]) -> Optional[str]:
+    for k in {'errored', 'failed', 'skipped'}:
+        if case[k]:
+            return k
+    return None
+
+
+def make_column(
+    *,
+    data: Any,
+    suite_name: str,
+    test_name: str,
+    digits: int,
+) -> str:
+    decimals = 3
+    num_length = digits + 1 + decimals
+    case = get_case(data=data, suite_name=suite_name, test_name=test_name)
+    if case:
+        status = case_status(case)
+        if status:
+            return f'{status.rjust(num_length)} '
+        else:
+            return f'{case["seconds"]:{num_length}.{decimals}f}s'
+    return ' ' * (num_length + 1)
+
+
+def make_columns(
+    *,
+    bucket: Any,
+    sha: str,
+    jobs: List[str],
+    suite_name: str,
+    test_name: str,
+    digits: int,
+) -> str:
+    jsons = get_ossci_jsons(bucket=bucket, sha=sha, jobs=jobs)
+    return ' '.join(
+        make_column(
+            data=jsons.get(job, {}),
+            suite_name=suite_name,
+            test_name=test_name,
+            digits=digits,
+        )
+        for job in jobs
+    )
+
+
+def make_lines(
+    *,
+    bucket: Any,
+    sha: str,
+    jobs: Optional[List[str]],
+    suite_name: str,
+    test_name: str,
+) -> List[str]:
+    jsons = get_ossci_jsons(bucket=bucket, sha=sha, jobs=jobs)
+    lines = []
+    for job, data in jsons.items():
+        case = get_case(data=data, suite_name=suite_name, test_name=test_name)
+        if case:
+            status = case_status(case)
+            lines.append(f'{job} {case["seconds"]} {status or ""}')
+    return lines
+
+
+def display_history(
+    *,
+    bucket: Any,
+    commits: List[Tuple[str, datetime]],
+    jobs: Optional[List[str]],
+    suite_name: str,
+    test_name: str,
+    delta: int,
+    mode: str,
+    digits: int,
+) -> None:
+    prev_time = datetime.now()
+    for sha, time in commits:
+        if (prev_time - time).total_seconds() < delta * 3600:
+            continue
+        prev_time = time
+        lines: List[str]
+        if mode == 'columns':
+            assert jobs is not None
+            lines = [make_columns(
+                bucket=bucket,
+                sha=sha,
+                jobs=jobs,
+                suite_name=suite_name,
+                test_name=test_name,
+                digits=digits,
+            )]
+        else:
+            assert mode == 'multiline'
+            lines = make_lines(
+                bucket=bucket,
+                sha=sha,
+                jobs=jobs,
+                suite_name=suite_name,
+                test_name=test_name,
+            )
+        for line in lines:
+            print(f"{time} {sha} {line}".rstrip())
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        __file__,
+        description='Display the history of a test.',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        'mode',
+        choices=['columns', 'multiline'],
+        help='output format',
+    )
+    parser.add_argument(
+        '--pytorch',
+        help='path to local PyTorch clone',
+        default='.',
+    )
+    parser.add_argument(
+        '--ref',
+        help='starting point (most recent Git ref) to display history for',
+        default='master',
+    )
+    parser.add_argument(
+        '--delta',
+        type=int,
+        help='minimum number of hours between rows',
+        default=12,
+    )
+    parser.add_argument(
+        '--digits',
+        type=int,
+        help='(columns) number of digits to display before the decimal point',
+        default=4,
+    )
+    parser.add_argument(
+        '--all',
+        action='store_true',
+        help='(multiline) ignore listed jobs, show all jobs for each commit',
+    )
+    parser.add_argument(
+        'suite',
+        help='name of the suite containing the test',
+    )
+    parser.add_argument(
+        'test',
+        help='name of the test',
+    )
+    parser.add_argument(
+        'job',
+        nargs='*',
+        help='names of jobs to display columns for, in order',
+        default=[],
+    )
+    args = parser.parse_args()
+
+    commits = get_git_commit_history(path=args.pytorch, ref=args.ref)
+
+    s3 = boto3.resource("s3")
+    bucket = s3.Bucket('ossci-metrics')
+
+    display_history(
+        bucket=bucket,
+        commits=commits,
+        jobs=None if args.all else args.job,
+        suite_name=args.suite,
+        test_name=args.test,
+        delta=args.delta,
+        mode=args.mode,
+        digits=args.digits,
+    )