Add script to display history for a single test across multiple jobs over time (#52000)

Summary:
Adapted from this gist: https://gist.github.com/malfet/1c34f261a28ae7af61210174394eaece

Pull Request resolved: https://github.com/pytorch/pytorch/pull/52000

Test Plan: Example shell session here: https://pastebin.com/HYgWZBFB

Reviewed By: walterddr

Differential Revision: D26372191

Pulled By: samestep

fbshipit-source-id: cdc9a27e1b4a0b3123a70e693b17d524e7c6cb95
This commit is contained in:
Sam Estep
2021-02-11 13:11:32 -08:00
committed by Facebook GitHub Bot
parent 8908874003
commit deb74edb28
3 changed files with 242 additions and 0 deletions

View File

@ -37,6 +37,7 @@ strict_equality = True
files = tools/codegen/gen.py,
tools/autograd/*.py,
tools/pyi/*.py,
tools/test_history.py,
torch/testing/_internal/mypy_wrapper.py,
torch/testing/_internal/print_test_stats.py,
torch/utils/benchmark/utils/common.py,

View File

@ -45,6 +45,8 @@ Developer tools which you might find useful:
can conveniently run diffs on them when working on code-generation.
(See also [generated_dirs.txt](generated_dirs.txt) which
specifies the list of directories with generated files.)
* [test_history.py](test_history.py) - Query S3 to display history of a single
test across multiple jobs over time.
Important if you want to run on AMD GPU:

239
tools/test_history.py Executable file
View File

@ -0,0 +1,239 @@
#!/usr/bin/env python3
import argparse
import bz2
import json
import subprocess
from datetime import datetime
from typing import Any, Dict, List, Optional, Tuple
import boto3 # type: ignore[import]
def get_git_commit_history(
*,
path: str,
ref: str
) -> List[Tuple[str, datetime]]:
rc = subprocess.check_output(
['git', '-C', path, 'log', '--pretty=format:%H %ct', ref],
).decode("latin-1")
return [
(x[0], datetime.fromtimestamp(int(x[1])))
for x in [line.split(" ") for line in rc.split("\n")]
]
def get_ossci_jsons(
*,
bucket: Any,
sha: str,
jobs: Optional[List[str]]
) -> Dict[str, Any]:
prefix = f"test_time/{sha}/"
objs: List[Any]
if jobs is None:
objs = list(bucket.objects.filter(Prefix=prefix))
else:
objs = []
for job in jobs:
objs.extend(list(bucket.objects.filter(Prefix=f"{prefix}{job}/")))
# initial pass to avoid downloading more than necessary
# in the case where there are multiple reports for a single sha+job
uniqueified = {obj.key.split('/')[2]: obj for obj in objs}
return {
job: json.loads(bz2.decompress(obj.get()['Body'].read()))
for job, obj in uniqueified.items()
}
def get_case(
*,
data: Any,
suite_name: str,
test_name: str,
) -> Optional[Dict[str, Any]]:
suite = data.get('suites', {}).get(suite_name)
if suite:
testcase_times = {
case['name']: case
for case in suite['cases']
}
return testcase_times.get(test_name)
return None
def case_status(case: Dict[str, Any]) -> Optional[str]:
for k in {'errored', 'failed', 'skipped'}:
if case[k]:
return k
return None
def make_column(
*,
data: Any,
suite_name: str,
test_name: str,
digits: int,
) -> str:
decimals = 3
num_length = digits + 1 + decimals
case = get_case(data=data, suite_name=suite_name, test_name=test_name)
if case:
status = case_status(case)
if status:
return f'{status.rjust(num_length)} '
else:
return f'{case["seconds"]:{num_length}.{decimals}f}s'
return ' ' * (num_length + 1)
def make_columns(
*,
bucket: Any,
sha: str,
jobs: List[str],
suite_name: str,
test_name: str,
digits: int,
) -> str:
jsons = get_ossci_jsons(bucket=bucket, sha=sha, jobs=jobs)
return ' '.join(
make_column(
data=jsons.get(job, {}),
suite_name=suite_name,
test_name=test_name,
digits=digits,
)
for job in jobs
)
def make_lines(
*,
bucket: Any,
sha: str,
jobs: Optional[List[str]],
suite_name: str,
test_name: str,
) -> List[str]:
jsons = get_ossci_jsons(bucket=bucket, sha=sha, jobs=jobs)
lines = []
for job, data in jsons.items():
case = get_case(data=data, suite_name=suite_name, test_name=test_name)
if case:
status = case_status(case)
lines.append(f'{job} {case["seconds"]} {status or ""}')
return lines
def display_history(
*,
bucket: Any,
commits: List[Tuple[str, datetime]],
jobs: Optional[List[str]],
suite_name: str,
test_name: str,
delta: int,
mode: str,
digits: int,
) -> None:
prev_time = datetime.now()
for sha, time in commits:
if (prev_time - time).total_seconds() < delta * 3600:
continue
prev_time = time
lines: List[str]
if mode == 'columns':
assert jobs is not None
lines = [make_columns(
bucket=bucket,
sha=sha,
jobs=jobs,
suite_name=suite_name,
test_name=test_name,
digits=digits,
)]
else:
assert mode == 'multiline'
lines = make_lines(
bucket=bucket,
sha=sha,
jobs=jobs,
suite_name=suite_name,
test_name=test_name,
)
for line in lines:
print(f"{time} {sha} {line}".rstrip())
if __name__ == "__main__":
parser = argparse.ArgumentParser(
__file__,
description='Display the history of a test.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
'mode',
choices=['columns', 'multiline'],
help='output format',
)
parser.add_argument(
'--pytorch',
help='path to local PyTorch clone',
default='.',
)
parser.add_argument(
'--ref',
help='starting point (most recent Git ref) to display history for',
default='master',
)
parser.add_argument(
'--delta',
type=int,
help='minimum number of hours between rows',
default=12,
)
parser.add_argument(
'--digits',
type=int,
help='(columns) number of digits to display before the decimal point',
default=4,
)
parser.add_argument(
'--all',
action='store_true',
help='(multiline) ignore listed jobs, show all jobs for each commit',
)
parser.add_argument(
'suite',
help='name of the suite containing the test',
)
parser.add_argument(
'test',
help='name of the test',
)
parser.add_argument(
'job',
nargs='*',
help='names of jobs to display columns for, in order',
default=[],
)
args = parser.parse_args()
commits = get_git_commit_history(path=args.pytorch, ref=args.ref)
s3 = boto3.resource("s3")
bucket = s3.Bucket('ossci-metrics')
display_history(
bucket=bucket,
commits=commits,
jobs=None if args.all else args.job,
suite_name=args.suite,
test_name=args.test,
delta=args.delta,
mode=args.mode,
digits=args.digits,
)