Files
pytorch/benchmarks/dynamo/check_graph_breaks.py
LifengWang 838924436e update the baseline for nightly max_autotune tests (#154973)
Hi @desertfire, according to the latest test [results](https://github.com/pytorch/pytorch/actions/runs/15385952839) from the inductor nightly for max_autotune tests, we plan to update the baseline data:

In the latest nightly test, two models require baseline updates:

- vision_maskrcnn: This model shows improved graph breaks, so I’ve updated the baseline accordingly.
- detectron2_fcos_r_50_fpn: This model has a different number of graph breaks. However, since its accuracy result still shows fail_accuracy, so I skipped the graph break check for this model.

```
vision_maskrcnn                     IMPROVED:           graph_breaks=29, expected=30
Improvement: 1 models have fixed dynamo graph breaks:
    vision_maskrcnn
```

```
detectron2_fcos_r_50_fpn            XFAIL
detectron2_fcos_r_50_fpn            FAIL:               graph_breaks=24, expected=22
Error: 1 models have new dynamo graph breaks:
    detectron2_fcos_r_50_fpn
```

Pull Request resolved: https://github.com/pytorch/pytorch/pull/154973
Approved by: https://github.com/desertfire
2025-07-31 11:38:55 +00:00

136 lines
3.9 KiB
Python

import argparse
import os
import sys
import textwrap
import pandas as pd
# Hack to have something similar to DISABLED_TEST. These models are flaky.
flaky_models = {
"yolov3",
"gluon_inception_v3",
"detectron2_maskrcnn_r_101_c4",
"XGLMForCausalLM", # discovered in https://github.com/pytorch/pytorch/pull/128148
"detectron2_fcos_r_50_fpn",
}
def get_field(csv, model_name: str, field: str):
try:
return csv.loc[csv["name"] == model_name][field].item()
except Exception:
return None
def check_graph_breaks(actual_csv, expected_csv, expected_filename):
failed = []
improved = []
if "rocm" in expected_filename:
flaky_models.update(
{
"alexnet",
"cait_m36_384",
"demucs",
"densenet121",
"detectron2_fcos_r_50_fpn",
"doctr_det_predictor",
"doctr_reco_predictor",
"hf_BigBird",
"hf_Longformer",
"hf_Reformer",
"hf_Roberta_base",
"hf_T5",
"hf_T5_base",
"levit_128",
"llava",
"microbench_unbacked_tolist_sum",
"sam",
"sam_fast",
"stable_diffusion_text_encoder",
"stable_diffusion_unet",
"timm_efficientdet",
"timm_nfnet",
"torchrec_dlrm",
"vgg16",
}
)
for model in actual_csv["name"]:
graph_breaks = get_field(actual_csv, model, "graph_breaks")
expected_graph_breaks = get_field(expected_csv, model, "graph_breaks")
flaky = model in flaky_models
if expected_graph_breaks is None:
status = "MISSING:"
improved.append(model)
elif graph_breaks == expected_graph_breaks:
status = "PASS_BUT_FLAKY" if flaky else "PASS"
print(f"{model:34} {status}")
continue
elif graph_breaks > expected_graph_breaks:
if flaky:
status = "FAIL_BUT_FLAKY:"
else:
status = "FAIL:"
failed.append(model)
elif graph_breaks < expected_graph_breaks:
if flaky:
status = "IMPROVED_BUT_FLAKY:"
else:
status = "IMPROVED:"
improved.append(model)
print(
f"{model:34} {status:19} graph_breaks={graph_breaks}, expected={expected_graph_breaks}"
)
msg = ""
if failed or improved:
if failed:
msg += textwrap.dedent(
f"""
Error: {len(failed)} models have new dynamo graph breaks:
{" ".join(failed)}
"""
)
if improved:
msg += textwrap.dedent(
f"""
Improvement: {len(improved)} models have fixed dynamo graph breaks:
{" ".join(improved)}
"""
)
sha = os.getenv("SHA1", "{your CI commit sha}")
msg += textwrap.dedent(
f"""
If this change is expected, you can update `{expected_filename}` to reflect the new baseline.
from pytorch/pytorch root, run
`python benchmarks/dynamo/ci_expected_accuracy/update_expected.py {sha}`
and then `git add` the resulting local changes to expected CSVs to your commit.
"""
)
return failed or improved, msg
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--actual", type=str, required=True)
parser.add_argument("--expected", type=str, required=True)
args = parser.parse_args()
actual = pd.read_csv(args.actual)
expected = pd.read_csv(args.expected)
failed, msg = check_graph_breaks(actual, expected, args.expected)
if failed:
print(msg)
sys.exit(1)
if __name__ == "__main__":
main()