Utility for running delta comparisons between two flag configs (#95411)

Signed-off-by: Edward Z. Yang <ezyang@meta.com>

Pull Request resolved: https://github.com/pytorch/pytorch/pull/95411
Approved by: https://github.com/Chillee
This commit is contained in:
Edward Z. Yang
2023-02-23 13:38:37 -08:00
committed by PyTorch MergeBot
parent 69d62373aa
commit b8151d2ba9
4 changed files with 98 additions and 31 deletions

1
.gitignore vendored
View File

@ -356,3 +356,4 @@ venv/
# Log files
*.log
sweep/

View File

@ -0,0 +1,49 @@
# This script takes csvs produced by parse_logs.py and combines them
# into a single CSV file
import ast
import csv
import sys
from collections import defaultdict
assert len(sys.argv) == 3
RESULTS = defaultdict(dict)
for side, f in zip(["static", "dynamic"], sys.argv[1:]):
with open(f, "r") as f:
reader = csv.DictReader(f)
for row in reader:
RESULTS[(row["bench"], row["name"])][side] = row
fields = ["frame_time", "graph_breaks"]
out = csv.DictWriter(
sys.stdout,
["bench", "name"] + [f"delta_{n}" for n in fields] + ["static_url", "dynamic_url"],
dialect="excel",
)
out.writeheader()
for (bench, name), sides in RESULTS.items():
if "static" not in sides:
continue
if "dynamic" not in sides:
continue
if not name:
out.writerow(
{
"static_url": sides["static"]["explain"],
"dynamic_url": sides["dynamic"]["explain"],
}
)
continue
row = {"bench": bench, "name": name}
for f in fields:
try:
static = ast.literal_eval(sides["static"][f])
dynamic = ast.literal_eval(sides["dynamic"][f])
except SyntaxError:
continue
row[f"delta_{f}"] = dynamic - static
out.writerow(row)

View File

@ -1,7 +1,6 @@
import csv
import os
import re
import subprocess
import sys
# This script takes the logs produced by the benchmark scripts (e.g.,
@ -24,11 +23,6 @@ m = re.search(r"https://gist.github.com/[a-f0-9]+", full_log)
if m is not None:
gist_url = m.group(0)
# Record the current commit hash for ease of reproducibility
hash = subprocess.check_output(
"git rev-parse HEAD".split(" "), encoding="utf-8"
).rstrip()
# Split the log into an entry per benchmark
entries = re.split(
r"(?:cuda (?:train|eval) +([^ ]+)|WARNING:root:([^ ]+) failed to load)", full_log
@ -45,24 +39,26 @@ def chunker(seq, size):
c = 0
i = 0
out = csv.writer(sys.stdout, dialect="excel")
out.writerow(
out = csv.DictWriter(
sys.stdout,
[
"",
hash,
"",
"",
"",
"",
gist_url,
"bench",
"name",
"result",
"component",
"context",
"explain",
"frame_time",
"backend_time",
"graph_count",
"op_count",
"graph_breaks",
"unique_graph_breaks",
]
],
dialect="excel",
)
out.writeheader()
out.writerow({"explain": gist_url})
# Sometimes backtraces will be in third party code, which results
# in very long file names. Delete the absolute path in this case.
@ -179,21 +175,20 @@ for name, name2, log in chunker(entries, 3):
context = ""
out.writerow(
[
bench,
name,
"",
r,
component,
context,
explain,
frame_time,
backend_time,
graph_count,
op_count,
graph_breaks,
unique_graph_breaks,
]
{
"bench": bench,
"name": name,
"result": r,
"component": component,
"context": context,
"explain": explain,
"frame_time": frame_time,
"backend_time": backend_time,
"graph_count": graph_count,
"op_count": op_count,
"graph_breaks": graph_breaks,
"unique_graph_breaks": unique_graph_breaks,
}
)
i += 1

22
benchmarks/dynamo/run_delta.sh Executable file
View File

@ -0,0 +1,22 @@
#!/bin/bash
set -x
# Some QoL for people running this script on Meta servers
if getent hosts fwdproxy; then
export https_proxy=http://fwdproxy:8080 http_proxy=http://fwdproxy:8080 no_proxy=.fbcdn.net,.facebook.com,.thefacebook.com,.tfbnw.net,.fb.com,.fburl.com,.facebook.net,.sb.fbsbx.com,localhost
fi
WORK="$PWD"
cd "$(dirname "$BASH_SOURCE")"/../..
ROOT="$PWD"
mkdir -p "$WORK/sweep/static"
mkdir -p "$WORK/sweep/dynamic"
(cd "$WORK/sweep/static" && "$ROOT/benchmarks/dynamo/run_all.sh" "$@")
(cd "$WORK/sweep/dynamic" && "$ROOT/benchmarks/dynamo/run_all.sh" "$@" --dynamic-shapes)
python benchmarks/dynamo/combine_csv.py "$WORK/sweep/static/final.csv" "$WORK/sweep/dynamic/final.csv" > "$WORK/delta.csv"
gh gist create "$WORK/delta.csv"