Compare commits

...

3 Commits

Author SHA1 Message Date
7638aa48e7 skip hf models 2025-10-07 10:25:03 -07:00
09db7828a2 lint 2025-10-07 10:22:37 -07:00
6bf2dab712 cleanup torchbench models 2025-10-07 10:22:09 -07:00
46 changed files with 18 additions and 1999 deletions

View File

@ -256,7 +256,7 @@ test_torchbench_smoketest() {
local device=mps
local dtypes=(undefined float16 bfloat16 notset)
local dtype=${dtypes[$1]}
local models=(hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor timm_resnet timm_vovnet vgg16)
local models=(llama BERT_pytorch dcgan yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor timm_resnet timm_vovnet vgg16)
for backend in eager inductor; do
@ -319,7 +319,7 @@ test_aoti_torchbench_smoketest() {
local device=mps
local dtypes=(undefined float16 bfloat16 notset)
local dtype=${dtypes[$1]}
local models=(hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor timm_resnet timm_vovnet vgg16)
local models=(llama BERT_pytorch dcgan yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor timm_resnet timm_vovnet vgg16)
echo "Launching torchbench inference performance run for AOT Inductor and dtype ${dtype}"
local dtype_arg="--${dtype}"

View File

@ -838,7 +838,7 @@ test_dynamo_benchmark() {
elif [[ "${suite}" == "timm_models" ]]; then
export TORCHBENCH_ONLY_MODELS="inception_v3"
elif [[ "${suite}" == "torchbench" ]]; then
export TORCHBENCH_ONLY_MODELS="hf_Bert"
export TORCHBENCH_ONLY_MODELS="BERT_pytorch"
fi
fi
test_single_dynamo_benchmark "dashboard" "$suite" "$shard_id" "$@"
@ -869,13 +869,13 @@ test_inductor_torchbench_smoketest_perf() {
mkdir -p "$TEST_REPORTS_DIR"
python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --float16 --training \
--batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" --only hf_Bert \
--batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" --only BERT_pytorch \
--output "$TEST_REPORTS_DIR/inductor_training_smoketest.csv"
# The threshold value needs to be actively maintained to make this check useful
python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" -t 1.4
# Check memory compression ratio for a few models
for test in hf_Albert timm_vision_transformer; do
for test in timm_vision_transformer; do
python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --amp --training \
--disable-cudagraphs --batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" \
--only $test --output "$TEST_REPORTS_DIR/inductor_training_smoketest_$test.csv"

View File

@ -25,15 +25,6 @@ drq
fambench_dlrm
fambench_xlmr
fastNLP_Bert
hf_Albert
hf_Bart
hf_Bert
hf_BigBird
hf_DistilBert
hf_GPT2
hf_Longformer
hf_Reformer
hf_T5
maml
maml_omniglot
mnasnet1_0

View File

@ -23,7 +23,6 @@ TORCHBENCH_MODELS: list[str] = [
"resnet50",
"moco",
"llama",
"hf_T5",
]
HUGGINGFACE_MODELS: list[str] = [
"AllenaiLongformerBase",

View File

@ -45,13 +45,6 @@ def check_accuracy(actual_csv, expected_csv, expected_filename):
"doctr_reco_predictor",
"dpn107",
"fbnetv3_b",
"hf_BigBird",
"hf_Longformer",
"hf_Reformer",
"hf_Roberta_base",
"hf_T5",
"hf_T5_base",
"hf_T5_generate",
"levit_128",
"llava",
"microbench_unbacked_tolist_sum",

View File

@ -38,12 +38,6 @@ def check_graph_breaks(actual_csv, expected_csv, expected_filename):
"detectron2_fcos_r_50_fpn",
"doctr_det_predictor",
"doctr_reco_predictor",
"hf_BigBird",
"hf_Longformer",
"hf_Reformer",
"hf_Roberta_base",
"hf_T5",
"hf_T5_base",
"levit_128",
"llava",
"microbench_unbacked_tolist_sum",

View File

@ -130,70 +130,6 @@ functorch_maml_omniglot,pass,0
hf_Albert,pass,0
hf_Bart,pass,0
hf_Bert,pass,0
hf_Bert_large,pass,0
hf_BigBird,pass,0
hf_DistilBert,pass,0
hf_GPT2,pass,0
hf_GPT2_large,pass_due_to_skip,0
hf_Reformer,pass,5
hf_Roberta_base,pass,0
hf_T5,pass,0
hf_T5_base,eager_fail_to_run,0
hf_T5_generate,pass,7
hf_T5_large,pass_due_to_skip,0
hf_Whisper,pass,0
hf_distil_whisper,pass,0
lennard_jones,pass,0

1 name accuracy graph_breaks
130
131
132
133
134
135

View File

@ -78,62 +78,6 @@ functorch_maml_omniglot,pass,7
hf_Albert,pass,6
hf_Bart,pass,6
hf_Bert,pass,6
hf_Bert_large,pass,6
hf_BigBird,pass,6
hf_DistilBert,pass,6
hf_GPT2,pass,8
hf_GPT2_large,pass_due_to_skip,0
hf_Reformer,pass,20
hf_Roberta_base,pass,6
hf_T5_base,eager_2nd_run_OOM,0
hf_T5_large,pass_due_to_skip,0
hf_Whisper,pass,6
hf_distil_whisper,model_fail_to_load,0
lennard_jones,pass,7

1 name accuracy graph_breaks
78
79
80
81
82
83

View File

@ -118,62 +118,6 @@ functorch_maml_omniglot,pass,0
hf_Albert,pass,0
hf_Bart,pass,0
hf_Bert,pass,0
hf_Bert_large,pass,0
hf_BigBird,fail_accuracy,0
hf_DistilBert,pass,0
hf_GPT2,pass,0
hf_GPT2_large,pass_due_to_skip,0
hf_Roberta_base,pass,0
hf_T5,pass,0
hf_T5_base,eager_fail_to_run,0
hf_T5_large,pass_due_to_skip,0
hf_Whisper,pass,0
hf_distil_whisper,pass,0
lennard_jones,pass,0

1 name accuracy graph_breaks
118
119
120
121
122
123

View File

@ -114,58 +114,6 @@ functorch_maml_omniglot,pass,0
hf_Albert,pass,0
hf_Bart,pass,0
hf_Bert,pass,0
hf_Bert_large,pass,0
hf_BigBird,pass,0
hf_DistilBert,pass,0
hf_GPT2,pass,0
hf_GPT2_large,pass_due_to_skip,0
hf_Roberta_base,pass,0
hf_T5,pass,0
hf_T5_base,pass,0
hf_T5_large,pass_due_to_skip,0
hf_distil_whisper,pass,0
lennard_jones,pass,0

1 name accuracy graph_breaks
114
115
116
117
118
119

View File

@ -114,58 +114,6 @@ functorch_maml_omniglot,pass,0
hf_Albert,pass,0
hf_Bart,pass,0
hf_Bert,pass,0
hf_Bert_large,pass,0
hf_BigBird,pass,0
hf_DistilBert,pass,0
hf_GPT2,pass,0
hf_GPT2_large,pass_due_to_skip,0
hf_Roberta_base,pass,0
hf_T5,pass,0
hf_T5_base,pass,0
hf_T5_large,pass_due_to_skip,0
hf_distil_whisper,pass,0
lennard_jones,pass,0

1 name accuracy graph_breaks
114
115
116
117
118
119

View File

@ -122,66 +122,6 @@ functorch_maml_omniglot,pass,0
hf_Albert,pass,0
hf_Bart,pass,0
hf_Bert,pass,0
hf_Bert_large,pass,0
hf_BigBird,pass,27
hf_DistilBert,pass,0
hf_GPT2,pass,0
hf_GPT2_large,pass_due_to_skip,0
hf_Longformer,pass,4
hf_Reformer,pass,5
hf_Roberta_base,pass,0
hf_T5,pass,0
hf_T5_base,pass,0
hf_T5_large,pass_due_to_skip,0
hf_distil_whisper,pass,0
lennard_jones,pass,0

1 name accuracy graph_breaks
122
123
124
125
126
127

View File

@ -122,66 +122,6 @@ functorch_maml_omniglot,pass,0
hf_Albert,pass,0
hf_Bart,pass,0
hf_Bert,pass,0
hf_Bert_large,pass,0
hf_BigBird,pass,27
hf_DistilBert,pass,0
hf_GPT2,pass,0
hf_GPT2_large,pass_due_to_skip,0
hf_Longformer,pass,4
hf_Reformer,pass,5
hf_Roberta_base,pass,0
hf_T5,pass,0
hf_T5_base,pass,0
hf_T5_large,pass_due_to_skip,0
hf_distil_whisper,pass,0
lennard_jones,pass,0

1 name accuracy graph_breaks
122
123
124
125
126
127

View File

@ -122,66 +122,6 @@ functorch_maml_omniglot,pass,0
hf_Albert,pass,0
hf_Bart,pass,0
hf_Bert,pass,0
hf_Bert_large,pass,0
hf_BigBird,pass,27
hf_DistilBert,pass,0
hf_GPT2,pass,0
hf_GPT2_large,pass_due_to_skip,0
hf_Longformer,pass,4
hf_Reformer,pass,5
hf_Roberta_base,pass,0
hf_T5,pass,0
hf_T5_base,pass,0
hf_T5_large,pass_due_to_skip,0
hf_distil_whisper,pass,0
lennard_jones,pass,0

1 name accuracy graph_breaks
122
123
124
125
126
127

View File

@ -130,70 +130,6 @@ functorch_maml_omniglot,pass,0
hf_Albert,pass,0
hf_Bart,pass,0
hf_Bert,pass,0
hf_Bert_large,pass,0
hf_BigBird,pass,0
hf_DistilBert,pass,0
hf_GPT2,pass,0
hf_GPT2_large,pass_due_to_skip,0
hf_Reformer,pass,5
hf_Roberta_base,pass,0
hf_T5,pass,0
hf_T5_base,eager_fail_to_run,0
hf_T5_generate,pass,7
hf_T5_large,pass_due_to_skip,0
hf_Whisper,pass,0
hf_distil_whisper,pass,0
lennard_jones,pass,0

1 name accuracy graph_breaks
130
131
132
133
134
135

View File

@ -78,62 +78,6 @@ functorch_maml_omniglot,pass,7
hf_Albert,pass,6
hf_Bart,pass,6
hf_Bert,pass,6
hf_Bert_large,pass,6
hf_BigBird,pass,6
hf_DistilBert,pass,6
hf_GPT2,pass,8
hf_GPT2_large,pass_due_to_skip,0
hf_Reformer,pass,20
hf_Roberta_base,pass,6
hf_T5_base,eager_2nd_run_OOM,0
hf_T5_large,pass_due_to_skip,0
hf_Whisper,pass,6
hf_distil_whisper,model_fail_to_load,0
lennard_jones,pass,7

1 name accuracy graph_breaks
78
79
80
81
82
83

View File

@ -98,58 +98,6 @@ functorch_maml_omniglot,pass,0
hf_Albert,pass,0
hf_Bart,pass,0
hf_Bert,pass,0
hf_Bert_large,pass,0
hf_BigBird,pass,0
hf_DistilBert,pass,0
hf_GPT2,pass,0
hf_GPT2_large,pass_due_to_skip,0
hf_Roberta_base,pass,0
hf_T5,pass,0
hf_T5_base,pass,0
hf_T5_large,pass_due_to_skip,0
hf_distil_whisper,pass,0
lennard_jones,pass,0

1 name accuracy graph_breaks
98
99
100
101
102
103

View File

@ -98,58 +98,6 @@ functorch_maml_omniglot,pass,0
hf_Albert,pass,0
hf_Bart,pass,0
hf_Bert,pass,0
hf_Bert_large,pass,0
hf_BigBird,pass,0
hf_DistilBert,pass,0
hf_GPT2,pass,0
hf_GPT2_large,pass_due_to_skip,0
hf_Roberta_base,pass,0
hf_T5,pass,0
hf_T5_base,pass,0
hf_T5_large,pass_due_to_skip,0
hf_distil_whisper,pass,0
lennard_jones,pass,0

1 name accuracy graph_breaks
98
99
100
101
102
103

View File

@ -106,66 +106,6 @@ functorch_maml_omniglot,pass,0
hf_Albert,pass,0
hf_Bart,pass,0
hf_Bert,pass,0
hf_Bert_large,pass,0
hf_BigBird,pass,27
hf_DistilBert,pass,0
hf_GPT2,pass,0
hf_GPT2_large,pass_due_to_skip,0
hf_Longformer,pass,4
hf_Reformer,pass,5
hf_Roberta_base,pass,0
hf_T5,pass,0
hf_T5_base,pass,0
hf_T5_large,pass_due_to_skip,0
hf_distil_whisper,pass,0
lennard_jones,pass,0

1 name accuracy graph_breaks
106
107
108
109
110
111

View File

@ -122,66 +122,6 @@ functorch_maml_omniglot,pass,0
hf_Albert,pass,0
hf_Bart,pass,0
hf_Bert,pass,0
hf_Bert_large,pass,0
hf_BigBird,pass,25
hf_DistilBert,pass,0
hf_GPT2,pass,0
hf_GPT2_large,pass_due_to_skip,0
hf_Longformer,pass,4
hf_Reformer,pass,8
hf_Roberta_base,pass,0
hf_T5,pass,0
hf_T5_base,pass,0
hf_T5_large,pass_due_to_skip,0
hf_distil_whisper,pass,0
lennard_jones,pass,0

1 name accuracy graph_breaks
122
123
124
125
126
127

View File

@ -130,70 +130,6 @@ functorch_maml_omniglot,pass,0
hf_Albert,pass,0
hf_Bart,pass,0
hf_Bert,pass,0
hf_Bert_large,pass,0
hf_BigBird,fail_accuracy,0
hf_DistilBert,pass,0
hf_GPT2,pass,0
hf_GPT2_large,pass_due_to_skip,0
hf_Reformer,pass,5
hf_Roberta_base,pass,0
hf_T5,pass,0
hf_T5_base,eager_fail_to_run,0
hf_T5_generate,pass,7
hf_T5_large,pass_due_to_skip,0
hf_Whisper,pass,0
hf_distil_whisper,pass,0
lennard_jones,pass,0

1 name accuracy graph_breaks
130
131
132
133
134
135

View File

@ -78,62 +78,6 @@ functorch_maml_omniglot,pass,7
hf_Albert,pass,6
hf_Bart,pass,6
hf_Bert,pass,6
hf_Bert_large,pass,6
hf_BigBird,pass,6
hf_DistilBert,pass,6
hf_GPT2,pass,8
hf_GPT2_large,pass_due_to_skip,0
hf_Reformer,pass,20
hf_Roberta_base,pass,6
hf_T5_base,eager_2nd_run_OOM,0
hf_T5_large,pass_due_to_skip,0
hf_Whisper,pass,6
hf_distil_whisper,model_fail_to_load,0
lennard_jones,pass,7

1 name accuracy graph_breaks
78
79
80
81
82
83

View File

@ -130,70 +130,6 @@ functorch_maml_omniglot,pass,0
hf_Albert,pass,0
hf_Bart,pass,0
hf_Bert,pass,0
hf_Bert_large,pass,0
hf_BigBird,pass,0
hf_DistilBert,pass,0
hf_GPT2,pass,0
hf_GPT2_large,pass_due_to_skip,0
hf_Reformer,pass,5
hf_Roberta_base,pass,0
hf_T5,pass,0
hf_T5_base,eager_fail_to_run,0
hf_T5_generate,pass,7
hf_T5_large,pass_due_to_skip,0
hf_Whisper,pass,0
hf_distil_whisper,pass,0
lennard_jones,pass,0

1 name accuracy graph_breaks
130
131
132
133
134
135

View File

@ -78,62 +78,6 @@ functorch_maml_omniglot,pass,7
hf_Albert,pass,6
hf_Bart,pass,6
hf_Bert,pass,6
hf_Bert_large,pass,6
hf_BigBird,pass,6
hf_DistilBert,pass,6
hf_GPT2,pass,8
hf_GPT2_large,pass_due_to_skip,0
hf_Reformer,pass,20
hf_Roberta_base,pass,6
hf_T5_base,eager_2nd_run_OOM,0
hf_T5_large,pass_due_to_skip,0
hf_Whisper,pass,6
hf_distil_whisper,model_fail_to_load,0
lennard_jones,pass,7

1 name accuracy graph_breaks
78
79
80
81
82
83

View File

@ -130,70 +130,6 @@ functorch_maml_omniglot,pass,0
hf_Albert,pass,0
hf_Bart,pass,0
hf_Bert,pass,0
hf_Bert_large,pass,0
hf_BigBird,fail_accuracy,0
hf_DistilBert,pass,0
hf_GPT2,pass,0
hf_GPT2_large,pass_due_to_skip,0
hf_Reformer,pass,5
hf_Roberta_base,pass,0
hf_T5,pass,0
hf_T5_base,eager_fail_to_run,0
hf_T5_generate,pass,7
hf_T5_large,pass_due_to_skip,0
hf_Whisper,pass,0
hf_distil_whisper,pass,0
lennard_jones,pass,0

1 name accuracy graph_breaks
130
131
132
133
134
135

View File

@ -78,62 +78,6 @@ functorch_maml_omniglot,pass,7
hf_Albert,pass,6
hf_Bart,pass,6
hf_Bert,pass,6
hf_Bert_large,pass,6
hf_BigBird,pass,6
hf_DistilBert,pass,6
hf_GPT2,pass,8
hf_GPT2_large,pass_due_to_skip,0
hf_Reformer,pass,20
hf_Roberta_base,pass,6
hf_T5_base,eager_2nd_run_OOM,0
hf_T5_large,pass_due_to_skip,0
hf_Whisper,pass,6
hf_distil_whisper,model_fail_to_load,0
lennard_jones,pass,7

1 name accuracy graph_breaks
78
79
80
81
82
83

View File

@ -130,73 +130,6 @@ functorch_maml_omniglot,pass,0
hf_Albert,pass,0
hf_Bart,pass,0
hf_Bert,pass,0
hf_Bert_large,pass,0
hf_BigBird,pass,9
hf_DistilBert,pass,0
hf_GPT2,pass,0
hf_GPT2_large,pass_due_to_skip,0
hf_Longformer,pass,4
hf_Reformer,pass,8
hf_Roberta_base,pass,0
hf_T5,pass,0
hf_T5_base,pass,0
hf_T5_generate,pass,7
hf_T5_large,pass_due_to_skip,0
hf_Whisper,pass,0
hf_distil_whisper,pass,0
lennard_jones,pass,0

1 name accuracy graph_breaks
130
131
132
133
134
135

View File

@ -78,70 +78,6 @@ functorch_maml_omniglot,pass,7
hf_Albert,pass,6
hf_Bart,pass,6
hf_Bert,pass,6
hf_Bert_large,pass,6
hf_BigBird,pass,6
hf_DistilBert,pass,6
hf_GPT2,pass,8
hf_GPT2_large,pass_due_to_skip,0
hf_Longformer,pass,4
hf_Reformer,pass,25
hf_Roberta_base,pass,6
hf_T5,pass,0
hf_T5_base,eager_2nd_run_OOM,0
hf_T5_large,pass_due_to_skip,0
hf_Whisper,pass,6
hf_distil_whisper,model_fail_to_load,0
lennard_jones,pass,7

1 name accuracy graph_breaks
78
79
80
81
82
83

View File

@ -118,62 +118,6 @@ functorch_maml_omniglot,pass,0
hf_Albert,pass,0
hf_Bart,pass,0
hf_Bert,pass,0
hf_Bert_large,pass,0
hf_BigBird,fail_accuracy,0
hf_DistilBert,pass,0
hf_GPT2,pass,0
hf_GPT2_large,pass_due_to_skip,0
hf_Roberta_base,pass,0
hf_T5,pass,0
hf_T5_base,eager_fail_to_run,0
hf_T5_large,pass_due_to_skip,0
hf_Whisper,pass,0
hf_distil_whisper,pass,0
lennard_jones,pass,0

1 name accuracy graph_breaks
118
119
120
121
122
123

View File

@ -130,73 +130,6 @@ functorch_maml_omniglot,pass,0
hf_Albert,pass,0
hf_Bart,pass,0
hf_Bert,pass,0
hf_Bert_large,pass,0
hf_BigBird,pass,9
hf_DistilBert,pass,0
hf_GPT2,pass,0
hf_GPT2_large,pass_due_to_skip,0
hf_Longformer,pass,4
hf_Reformer,pass,8
hf_Roberta_base,pass,0
hf_T5,pass,0
hf_T5_base,pass,0
hf_T5_generate,pass,7
hf_T5_large,pass_due_to_skip,0
hf_Whisper,pass,0
hf_distil_whisper,pass,0
lennard_jones,pass,0

1 name accuracy graph_breaks
130
131
132
133
134
135

View File

@ -78,70 +78,6 @@ functorch_maml_omniglot,pass,7
hf_Albert,pass,6
hf_Bart,pass,6
hf_Bert,pass,6
hf_Bert_large,pass,6
hf_BigBird,fail_to_run,3
hf_DistilBert,pass,6
hf_GPT2,pass,8
hf_GPT2_large,pass_due_to_skip,0
hf_Longformer,pass,4
hf_Reformer,pass,25
hf_Roberta_base,pass,6
hf_T5,pass,0
hf_T5_base,eager_2nd_run_OOM,0
hf_T5_large,pass_due_to_skip,0
hf_Whisper,pass,6
hf_distil_whisper,model_fail_to_load,0
lennard_jones,pass,7

1 name accuracy graph_breaks
78
79
80
81
82
83

View File

@ -130,74 +130,6 @@ functorch_maml_omniglot,pass,0
hf_Albert,pass,0
hf_Bart,pass,0
hf_Bert,pass,0
hf_Bert_large,pass,0
hf_BigBird,fail_to_run,0
hf_DistilBert,pass,0
hf_GPT2,pass,0
hf_GPT2_large,pass_due_to_skip,0
hf_Longformer,pass,4
hf_Reformer,pass,5
hf_Roberta_base,pass,0
hf_T5,pass,0
hf_T5_base,eager_fail_to_run,0
hf_T5_generate,pass,7
hf_T5_large,pass_due_to_skip,0
hf_Whisper,pass,0
hf_distil_whisper,pass,0
lennard_jones,pass,0

1 name accuracy graph_breaks
130
131
132
133
134
135

View File

@ -78,70 +78,6 @@ functorch_maml_omniglot,pass,7
hf_Albert,pass,6
hf_Bart,pass,6
hf_Bert,pass,6
hf_Bert_large,pass,6
hf_BigBird,fail_to_run,3
hf_DistilBert,pass,6
hf_GPT2,pass,8
hf_GPT2_large,pass_due_to_skip,0
hf_Longformer,pass,10
hf_Reformer,pass,20
hf_Roberta_base,pass,6
hf_T5,pass,5
hf_T5_base,eager_2nd_run_OOM,0
hf_T5_large,pass_due_to_skip,0
hf_Whisper,pass,6
hf_distil_whisper,model_fail_to_load,0
lennard_jones,pass,7

1 name accuracy graph_breaks
78
79
80
81
82
83

View File

@ -130,73 +130,6 @@ functorch_maml_omniglot,pass,0
hf_Albert,pass,0
hf_Bart,pass,0
hf_Bert,pass,0
hf_Bert_large,pass,0
hf_BigBird,pass,9
hf_DistilBert,pass,0
hf_GPT2,pass,0
hf_GPT2_large,pass_due_to_skip,0
hf_Longformer,pass,4
hf_Reformer,pass,8
hf_Roberta_base,pass,0
hf_T5,pass,0
hf_T5_base,pass,0
hf_T5_generate,pass,7
hf_T5_large,pass_due_to_skip,0
hf_Whisper,pass,0
hf_distil_whisper,pass,0
lennard_jones,pass,0

1 name accuracy graph_breaks
130
131
132
133
134
135

View File

@ -78,70 +78,6 @@ functorch_maml_omniglot,pass,7
hf_Albert,pass,6
hf_Bart,pass,6
hf_Bert,pass,6
hf_Bert_large,pass,6
hf_BigBird,pass,15
hf_DistilBert,pass,6
hf_GPT2,pass,8
hf_GPT2_large,pass_due_to_skip,0
hf_Longformer,pass,4
hf_Reformer,pass,25
hf_Roberta_base,pass,6
hf_T5,pass,0
hf_T5_base,eager_2nd_run_OOM,0
hf_T5_large,pass_due_to_skip,0
hf_Whisper,pass,6
hf_distil_whisper,model_fail_to_load,0
lennard_jones,pass,7

1 name accuracy graph_breaks
78
79
80
81
82
83

View File

@ -130,66 +130,6 @@ functorch_maml_omniglot,pass,0
hf_Albert,pass,0
hf_Bart,pass,0
hf_Bert,pass,0
hf_Bert_large,pass,0
hf_BigBird,fail_accuracy,0
hf_DistilBert,pass,0
hf_GPT2,pass,0
hf_GPT2_large,pass_due_to_skip,0
hf_Reformer,pass,8
hf_T5,pass,0
hf_T5_base,eager_fail_to_run,0
hf_T5_generate,pass,11
hf_T5_large,pass_due_to_skip,0
hf_Whisper,pass,0
hf_distil_whisper,pass,0
lennard_jones,pass,0

1 name accuracy graph_breaks
130
131
132
133
134
135

View File

@ -78,58 +78,6 @@ functorch_maml_omniglot,pass,7
hf_Albert,pass,6
hf_Bart,pass,6
hf_Bert,pass,6
hf_Bert_large,pass,6
hf_BigBird,pass,6
hf_DistilBert,pass,6
hf_GPT2,pass,8
hf_GPT2_large,pass_due_to_skip,0
hf_Reformer,pass,25
hf_T5_base,eager_2nd_run_OOM,0
hf_T5_large,pass_due_to_skip,0
hf_Whisper,pass,6
hf_distil_whisper,model_fail_to_load,0
lennard_jones,pass,7

1 name accuracy graph_breaks
78
79
80
81
82
83

View File

@ -155,7 +155,6 @@ CI_SKIP_DYNAMIC_BATCH_ONLY = {
"detectron2_fasterrcnn_r_50_c4",
"detectron2_fasterrcnn_r_50_dc5",
"detectron2_fasterrcnn_r_50_fpn",
"hf_T5_generate",
"Reformer",
"llama",
}.union(INTERNAL_CI_SKIP_DYNAMIC_BATCH_ONLY)
@ -188,7 +187,6 @@ BENCHMARK_USE_SGD = {
"timm_vision_transformer",
"timm_vovnet",
"vgg16",
"hf_T5", # Fails dynamic https://github.com/pytorch/pytorch/issues/115968
# HF
"AlbertForMaskedLM",
"BartForCausalLM",
@ -245,8 +243,6 @@ CI_USE_SGD = {
"detectron2_maskrcnn_r_101_fpn",
"detectron2_maskrcnn_r_50_c4",
"detectron2_maskrcnn_r_50_fpn",
"hf_T5_base",
"hf_clip",
"llama_v2_7b_16h",
"mobilenet_v2_quantized_qat",
"phi_1_5 resnet50_quantized_qat",
@ -2068,8 +2064,6 @@ class BenchmarkRunner:
from diffusers.models.transformer_2d import Transformer2DModel
from torchbenchmark.models.nanogpt.model import Block
from transformers.models.llama.modeling_llama import LlamaDecoderLayer
from transformers.models.t5.modeling_t5 import T5Block
from transformers.models.whisper.modeling_whisper import WhisperEncoderLayer
from torch.distributed.fsdp.wrap import (
ModuleWrapPolicy,
@ -2079,10 +2073,6 @@ class BenchmarkRunner:
# handcrafted wrap policy
MODEL_FSDP_WRAP = {
"stable_diffusion_unet": (Transformer2DModel,),
"hf_T5": (T5Block,),
"hf_T5_base": (T5Block,),
"hf_T5_large": (T5Block,),
"hf_Whisper": (WhisperEncoderLayer,),
"llama_v2_7b_16h": (LlamaDecoderLayer,),
"nanogpt": (Block,),
}
@ -3863,8 +3853,6 @@ def run(runner, args, original_dir=None):
"timm_efficientdet",
}
)
if args.training:
runner.skip_models.add("hf_T5")
if args.nnc:
torch._C._jit_override_can_fuse_on_cpu(True)

View File

@ -21,9 +21,6 @@ try:
except ImportError:
from torchbench import setup_torchbench_cwd
from transformers.models.bert.modeling_bert import BertLayer, BertLMPredictionHead
from transformers.models.t5.modeling_t5 import T5Block
def setup(rank, world_size):
os.environ["MASTER_ADDR"] = os.getenv("MASTER_ADDR", "localhost")
@ -128,8 +125,6 @@ def fsdp_checkpointing_base(model, blocks):
MODEL_FSDP_WRAP = {
"toy_model": (MyModule,),
"hf_Bert": (BertLayer, BertLMPredictionHead),
"hf_T5": (T5Block,),
}

View File

@ -158,7 +158,7 @@ if __name__ == "__main__":
model_arg.add_argument(
"--torchbench-model",
"--torchbench_model",
help="name of torchbench model, e.g. hf_Bert",
help="name of torchbench model, e.g. BERT_pytorch",
)
model_arg.add_argument(
"--toy-model", "--toy_model", action="store_true", help="use toy model instead"

View File

@ -12,17 +12,6 @@ cuda,dlrm,1024,1.3421,3.2177,4.9493,1.0009
cuda,drq,1,1.0820,3.8157,8.0732,0.9687
cuda,fastNLP_Bert,6,1.4839,37.9050,32.7583,1.1563
cuda,functorch_dp_cifar10,64,1.5014,6.9596,14.1516,0.4432
cuda,hf_Albert,8,2.2452,30.6134,25.9036,1.3098
cuda,hf_Bart,4,1.7012,34.3999,37.9975,1.0128
cuda,hf_Bert,4,1.9003,23.3435,34.8196,1.0273
cuda,hf_Bert_large,4,1.6346,52.8525,62.3112,1.0726
cuda,hf_BigBird,2,1.9208,105.2672,101.4787,1.1415
cuda,hf_DistilBert,8,1.3988,22.5793,20.2386,1.0232
cuda,hf_GPT2,4,1.8075,27.5184,25.3428,1.1562
cuda,hf_GPT2_large,4,1.7716,118.7404,68.1618,1.1725
cuda,hf_Reformer,4,1.1744,70.4228,15.1152,0.9266
cuda,hf_T5,8,1.8778,93.3134,37.0046,1.2279
cuda,hf_T5_large,2,2.3623,101.5518,143.7982,1.1674
cuda,lennard_jones,1000,1.0649,1.5233,4.1119,0.9998
cuda,mnasnet1_0,32,1.1957,19.1993,27.2302,0.7758
cuda,mobilenet_v2,96,1.4876,32.3311,27.4719,1.1729

1 dev name batch_size speedup abs_latency compilation_latency compression_ratio
12 cuda drq 1 1.0820 3.8157 8.0732 0.9687
13 cuda fastNLP_Bert 6 1.4839 37.9050 32.7583 1.1563
14 cuda functorch_dp_cifar10 64 1.5014 6.9596 14.1516 0.4432
cuda hf_Albert 8 2.2452 30.6134 25.9036 1.3098
cuda hf_Bart 4 1.7012 34.3999 37.9975 1.0128
cuda hf_Bert 4 1.9003 23.3435 34.8196 1.0273
cuda hf_Bert_large 4 1.6346 52.8525 62.3112 1.0726
cuda hf_BigBird 2 1.9208 105.2672 101.4787 1.1415
cuda hf_DistilBert 8 1.3988 22.5793 20.2386 1.0232
cuda hf_GPT2 4 1.8075 27.5184 25.3428 1.1562
cuda hf_GPT2_large 4 1.7716 118.7404 68.1618 1.1725
cuda hf_Reformer 4 1.1744 70.4228 15.1152 0.9266
cuda hf_T5 8 1.8778 93.3134 37.0046 1.2279
cuda hf_T5_large 2 2.3623 101.5518 143.7982 1.1674
15 cuda lennard_jones 1000 1.0649 1.5233 4.1119 0.9998
16 cuda mnasnet1_0 32 1.1957 19.1993 27.2302 0.7758
17 cuda mobilenet_v2 96 1.4876 32.3311 27.4719 1.1729

View File

@ -8,22 +8,14 @@ resnet50,inductor,float32,dynamic,default,1.67742767
mobilenet_v3_large,inductor,float32,static,cpp,2.63311706
timm_resnest,inductor,float32,dynamic,cpp,1.7321529
functorch_maml_omniglot,inductor,float32,dynamic,cpp,1.126799
#hf_GPT2,inductor,float32,dynamic,cpp,
yolov3,export-aot-inductor,float32,static,default,1.40687424
mobilenet_v2,export-aot-inductor,float32,static,default,2.90375357
resnext50_32x4d,export-aot-inductor,float32,dynamic,default,1.49299689
hf_Albert,export-aot-inductor,float32,dynamic,default,1.261471
resnext50_32x4d,inductor,amp,static,default,1.47023111
vgg16,inductor,amp,static,default,1.2692454
hf_Longformer,inductor,amp,dynamic,default,1.22015225
hf_Bert_large,inductor,amp,dynamic,default,1.18572179
llama,inductor,amp,static,default,1.33157028
timm_regnet,inductor,amp,static,cpp,1.12734073
mnasnet1_0,inductor,amp,static,cpp,2.1296814
#hf_T5_generate,inductor,amp,dynamic,cpp,
timm_vovnet,inductor,amp,dynamic,cpp,1.10851009
#mobilenet_v2,inductor,amp,dynamic,cpp,2.27774577 # https://github.com/pytorch/pytorch/issues/131693
hf_GPT2,export-aot-inductor,amp,static,default,1.4432794
densenet121,export-aot-inductor,amp,static,default,1.25591385
hf_DistilBert,export-aot-inductor,amp,dynamic,default,1.2926442
hf_Bart,export-aot-inductor,amp,dynamic,default,1.19515416

1 #name backend data_type shape wrapper perf_speedup_target_c7i_metal_24xl
8 mobilenet_v3_large inductor float32 static cpp 2.63311706
9 timm_resnest inductor float32 dynamic cpp 1.7321529
10 functorch_maml_omniglot inductor float32 dynamic cpp 1.126799
#hf_GPT2 inductor float32 dynamic cpp
11 yolov3 export-aot-inductor float32 static default 1.40687424
12 mobilenet_v2 export-aot-inductor float32 static default 2.90375357
13 resnext50_32x4d export-aot-inductor float32 dynamic default 1.49299689
hf_Albert export-aot-inductor float32 dynamic default 1.261471
14 resnext50_32x4d inductor amp static default 1.47023111
15 vgg16 inductor amp static default 1.2692454
hf_Longformer inductor amp dynamic default 1.22015225
hf_Bert_large inductor amp dynamic default 1.18572179
16 llama inductor amp static default 1.33157028
17 timm_regnet inductor amp static cpp 1.12734073
18 mnasnet1_0 inductor amp static cpp 2.1296814
#hf_T5_generate inductor amp dynamic cpp
19 timm_vovnet inductor amp dynamic cpp 1.10851009
20 #mobilenet_v2 inductor amp dynamic cpp 2.27774577 # https://github.com/pytorch/pytorch/issues/131693
hf_GPT2 export-aot-inductor amp static default 1.4432794
21 densenet121 export-aot-inductor amp static default 1.25591385
hf_DistilBert export-aot-inductor amp dynamic default 1.2926442
hf_Bart export-aot-inductor amp dynamic default 1.19515416

View File

@ -75,29 +75,7 @@ def setup_torchbench_cwd():
return original_dir
def process_hf_reformer_output(out):
assert isinstance(out, list)
# second output is unstable
return [elem for i, elem in enumerate(out) if i != 1]
def process_hf_whisper_output(out):
out_ret = []
for i, elem in enumerate(out):
if i == 0:
if elem is not None:
assert isinstance(elem, dict)
out_ret.append({k: v for k, v in elem.items() if k != "logits"})
elif i != 1:
out_ret.append(elem)
return out_ret
process_train_model_output = {
"hf_Reformer": process_hf_reformer_output,
"hf_Whisper": process_hf_whisper_output,
}
process_train_model_output = {}
class TorchBenchmarkRunner(BenchmarkRunner):
@ -227,12 +205,10 @@ class TorchBenchmarkRunner(BenchmarkRunner):
"drq",
"hf_Reformer",
"DALLE2_pytorch",
"hf_BigBird",
"detectron2_maskrcnn_r_50_fpn",
"detectron2_maskrcnn_r_101_fpn",
"vision_maskrcnn",
"doctr_reco_predictor",
"hf_T5_generate",
}
def load_model(
@ -395,8 +371,6 @@ class TorchBenchmarkRunner(BenchmarkRunner):
and hasattr(model.config, "use_cache")
):
model.config.use_cache = False
if model_name == "hf_T5_generate":
model.model.config.use_cache = False
self.validate_model(model, example_inputs)
return device, benchmark.name, model, example_inputs, batch_size

View File

@ -5,8 +5,6 @@ batch_size:
demucs: 4
dlrm: 1024
densenet121: 4
hf_Reformer: 4
hf_T5_base: 4
timm_efficientdet: 1
llama_v2_7b_16h: 1
# reduced from 16 due to cudagraphs OOM in TorchInductor dashboard
@ -30,7 +28,6 @@ tolerance:
- alexnet
- attention_is_all_you_need_pytorch
- densenet121
- hf_Albert
- vgg16
- mobilenet_v3_large
- nvidia_deeprecommender
@ -47,13 +44,11 @@ tolerance:
higher_fp16:
- doctr_reco_predictor
- drq
- hf_Whisper
- phlippe_resnet
higher_bf16:
- doctr_reco_predictor
- drq
- hf_Whisper
# These models need higher tolerance for xpu devices with bf16
higher_bf16_xpu:
@ -75,12 +70,6 @@ require_larger_multiplier_for_smaller_tensor:
# These benchmarks took >600s on an i9-11900K CPU
very_slow: &VERY_SLOW_MODELS
# 3339s
- hf_BigBird
# 3062s
- hf_Longformer
# 930s
- hf_T5
# These benchmarks took >60s on an i9-11900K CPU
@ -92,18 +81,6 @@ slow:
- demucs
# 242s
- fastNLP_Bert
# 221s
- hf_Albert
# 400s
- hf_Bart
# 334s
- hf_Bert
# 187s
- hf_DistilBert
# 470s
- hf_GPT2
# 141s
- hf_Reformer
# 317s
- speech_transformer
# 99s
@ -187,11 +164,20 @@ skip:
- hf_clip
# multi gpu not always available in benchmark runners
- simple_gpt_tp_manual
# skip hf and timm models in torchbench since
# there are already separate benchmarks for them
- hf_Albert
- hf_Bart
- hf_Bert
- hf_BigBird
- hf_DistilBert
- hf_GPT2
- hf_Longformer
- hf_Reformer
- hf_T5
device:
cpu:
# OOMs
- hf_T5_generate
# model is CUDA only
- cm3leon_generate
# timeout
@ -208,7 +194,6 @@ skip:
- torchrec_dlrm
- simple_gpt
# works on cuda, accuracy failure on cpu
- hf_Whisper
- stable_diffusion_text_encoder
- llava
- moco
@ -235,7 +220,6 @@ skip:
- sam_fast
# Model's DEFAULT_TRAIN_BSIZE is not implemented
- cm3leon_generate
- hf_T5_generate
- doctr_det_predictor
- doctr_reco_predictor
- moondream
@ -247,9 +231,6 @@ skip:
- cm3leon_generate
- detectron2_fcos_r_50_fpn
- fastNLP_Bert
- hf_Longformer
- hf_Reformer
- hf_T5_generate
- opacus_cifar10
- speech_transformer
@ -286,8 +267,6 @@ accuracy:
# Models too large to have eager, dynamo and fp64_numbers simultaneosuly
# even for 40 GB machine. We have tested accuracy for smaller version of
# these models
- hf_GPT2_large
- hf_T5_large
- timm_vision_transformer_large
# accuracy https://github.com/pytorch/pytorch/issues/93847
- maml
@ -300,5 +279,4 @@ accuracy:
- pytorch_unet
max_batch_size:
hf_GPT2: 2
pytorch_unet: 2

View File

@ -4,11 +4,6 @@ LearningToPaint,1024
alexnet,1024
dcgan,1024
densenet121,64
hf_Albert,32
hf_Bart,16
hf_Bert,16
hf_GPT2,16
hf_T5,4
mnasnet1_0,256
mobilenet_v2,128
mobilenet_v3_large,256

View File

@ -6,18 +6,6 @@ densenet121,512
dlrm,2048
fastNLP_Bert,8
functorch_dp_cifar10,1024
hf_Albert,8
hf_Bart,8
hf_Bert,8
hf_Bert_large,8
hf_DistilBert,8
hf_GPT2,8
hf_GPT2_large,1
hf_Longformer,4
hf_Reformer,8
hf_T5,4
hf_T5_base,1
hf_T5_large,1
LearningToPaint,96
lennard_jones,1024
mnasnet1_0,32