Utility for running delta comparisons between two flag configs (#95411)

Signed-off-by: Edward Z. Yang <ezyang@meta.com>

Pull Request resolved: https://github.com/pytorch/pytorch/pull/95411
Approved by: https://github.com/Chillee
This commit is contained in:
Edward Z. Yang
2023-02-23 13:38:37 -08:00
committed by PyTorch MergeBot
parent 69d62373aa
commit b8151d2ba9
4 changed files with 98 additions and 31 deletions

1
.gitignore vendored
View File

@ -356,3 +356,4 @@ venv/
# Log files # Log files
*.log *.log
sweep/

View File

@ -0,0 +1,49 @@
# This script takes csvs produced by parse_logs.py and combines them
# into a single CSV file
import ast
import csv
import sys
from collections import defaultdict
assert len(sys.argv) == 3
RESULTS = defaultdict(dict)
for side, f in zip(["static", "dynamic"], sys.argv[1:]):
with open(f, "r") as f:
reader = csv.DictReader(f)
for row in reader:
RESULTS[(row["bench"], row["name"])][side] = row
fields = ["frame_time", "graph_breaks"]
out = csv.DictWriter(
sys.stdout,
["bench", "name"] + [f"delta_{n}" for n in fields] + ["static_url", "dynamic_url"],
dialect="excel",
)
out.writeheader()
for (bench, name), sides in RESULTS.items():
if "static" not in sides:
continue
if "dynamic" not in sides:
continue
if not name:
out.writerow(
{
"static_url": sides["static"]["explain"],
"dynamic_url": sides["dynamic"]["explain"],
}
)
continue
row = {"bench": bench, "name": name}
for f in fields:
try:
static = ast.literal_eval(sides["static"][f])
dynamic = ast.literal_eval(sides["dynamic"][f])
except SyntaxError:
continue
row[f"delta_{f}"] = dynamic - static
out.writerow(row)

View File

@ -1,7 +1,6 @@
import csv import csv
import os import os
import re import re
import subprocess
import sys import sys
# This script takes the logs produced by the benchmark scripts (e.g., # This script takes the logs produced by the benchmark scripts (e.g.,
@ -24,11 +23,6 @@ m = re.search(r"https://gist.github.com/[a-f0-9]+", full_log)
if m is not None: if m is not None:
gist_url = m.group(0) gist_url = m.group(0)
# Record the current commit hash for ease of reproducibility
hash = subprocess.check_output(
"git rev-parse HEAD".split(" "), encoding="utf-8"
).rstrip()
# Split the log into an entry per benchmark # Split the log into an entry per benchmark
entries = re.split( entries = re.split(
r"(?:cuda (?:train|eval) +([^ ]+)|WARNING:root:([^ ]+) failed to load)", full_log r"(?:cuda (?:train|eval) +([^ ]+)|WARNING:root:([^ ]+) failed to load)", full_log
@ -45,24 +39,26 @@ def chunker(seq, size):
c = 0 c = 0
i = 0 i = 0
out = csv.writer(sys.stdout, dialect="excel") out = csv.DictWriter(
out.writerow( sys.stdout,
[ [
"", "bench",
hash, "name",
"", "result",
"", "component",
"", "context",
"", "explain",
gist_url,
"frame_time", "frame_time",
"backend_time", "backend_time",
"graph_count", "graph_count",
"op_count", "op_count",
"graph_breaks", "graph_breaks",
"unique_graph_breaks", "unique_graph_breaks",
] ],
dialect="excel",
) )
out.writeheader()
out.writerow({"explain": gist_url})
# Sometimes backtraces will be in third party code, which results # Sometimes backtraces will be in third party code, which results
# in very long file names. Delete the absolute path in this case. # in very long file names. Delete the absolute path in this case.
@ -179,21 +175,20 @@ for name, name2, log in chunker(entries, 3):
context = "" context = ""
out.writerow( out.writerow(
[ {
bench, "bench": bench,
name, "name": name,
"", "result": r,
r, "component": component,
component, "context": context,
context, "explain": explain,
explain, "frame_time": frame_time,
frame_time, "backend_time": backend_time,
backend_time, "graph_count": graph_count,
graph_count, "op_count": op_count,
op_count, "graph_breaks": graph_breaks,
graph_breaks, "unique_graph_breaks": unique_graph_breaks,
unique_graph_breaks, }
]
) )
i += 1 i += 1

22
benchmarks/dynamo/run_delta.sh Executable file
View File

@ -0,0 +1,22 @@
#!/bin/bash
set -x
# Some QoL for people running this script on Meta servers
if getent hosts fwdproxy; then
export https_proxy=http://fwdproxy:8080 http_proxy=http://fwdproxy:8080 no_proxy=.fbcdn.net,.facebook.com,.thefacebook.com,.tfbnw.net,.fb.com,.fburl.com,.facebook.net,.sb.fbsbx.com,localhost
fi
WORK="$PWD"
cd "$(dirname "$BASH_SOURCE")"/../..
ROOT="$PWD"
mkdir -p "$WORK/sweep/static"
mkdir -p "$WORK/sweep/dynamic"
(cd "$WORK/sweep/static" && "$ROOT/benchmarks/dynamo/run_all.sh" "$@")
(cd "$WORK/sweep/dynamic" && "$ROOT/benchmarks/dynamo/run_all.sh" "$@" --dynamic-shapes)
python benchmarks/dynamo/combine_csv.py "$WORK/sweep/static/final.csv" "$WORK/sweep/dynamic/final.csv" > "$WORK/delta.csv"
gh gist create "$WORK/delta.csv"