From 90b4e130d6871bee4e1f15bb8294c1bbbf8f4ba5 Mon Sep 17 00:00:00 2001 From: Boyuan Feng Date: Thu, 9 Oct 2025 00:31:25 +0000 Subject: [PATCH] [Benchmark] cleanup torchbench models (#164816) Prune models from TorchInductor dashboard to reduce ci cost. This PR prunes torchbench models according to the [doc](https://docs.google.com/document/d/1nLPNNAU-_M9Clx9FMrJ1ycdPxe-xRA54olPnsFzdpoU/edit?tab=t.0), which removes timm and huggingface models from torchbench. Pull Request resolved: https://github.com/pytorch/pytorch/pull/164816 Approved by: https://github.com/anijain2305, https://github.com/seemethere, https://github.com/huydhn, https://github.com/malfet --- .ci/pytorch/macos-test.sh | 4 +- .ci/pytorch/test.sh | 6 +- .../dynamo/all_torchbench_models_list.txt | 16 --- benchmarks/dynamo/cachebench.py | 1 - benchmarks/dynamo/check_accuracy.py | 18 +--- benchmarks/dynamo/check_graph_breaks.py | 8 +- .../aot_eager_torchbench_inference.csv | 88 --------------- .../aot_eager_torchbench_training.csv | 80 -------------- .../aot_inductor_torchbench_inference.csv | 80 -------------- ...ctor_amp_freezing_torchbench_inference.csv | 84 --------------- ...inductor_freezing_torchbench_inference.csv | 84 --------------- ...ctor_amp_freezing_torchbench_inference.csv | 92 ---------------- ...inductor_freezing_torchbench_inference.csv | 92 ---------------- .../cpu_inductor_torchbench_inference.csv | 92 ---------------- ...dynamic_aot_eager_torchbench_inference.csv | 88 --------------- .../dynamic_aot_eager_torchbench_training.csv | 80 -------------- ...ctor_amp_freezing_torchbench_inference.csv | 84 --------------- ...inductor_freezing_torchbench_inference.csv | 84 --------------- ...amic_cpu_inductor_torchbench_inference.csv | 92 ---------------- ...ctor_amp_freezing_torchbench_inference.csv | 92 ---------------- .../dynamic_inductor_torchbench_inference.csv | 88 --------------- .../dynamic_inductor_torchbench_training.csv | 80 -------------- .../dynamo_eager_torchbench_inference.csv | 88 --------------- .../dynamo_eager_torchbench_training.csv | 80 -------------- .../inductor_torchbench_inference.csv | 88 --------------- .../inductor_torchbench_training.csv | 80 -------------- .../rocm/aot_eager_torchbench_inference.csv | 99 ----------------- .../rocm/aot_eager_torchbench_training.csv | 96 ----------------- .../aot_inductor_torchbench_inference.csv | 84 --------------- ...dynamic_aot_eager_torchbench_inference.csv | 99 ----------------- .../dynamic_aot_eager_torchbench_training.csv | 96 ----------------- .../dynamic_inductor_torchbench_inference.csv | 100 ------------------ .../dynamic_inductor_torchbench_training.csv | 96 ----------------- .../dynamo_eager_torchbench_inference.csv | 99 ----------------- .../rocm/dynamo_eager_torchbench_training.csv | 96 ----------------- .../rocm/inductor_torchbench_inference.csv | 84 --------------- .../rocm/inductor_torchbench_training.csv | 76 ------------- benchmarks/dynamo/common.py | 31 ------ benchmarks/dynamo/dist_util.py | 5 - benchmarks/dynamo/distributed.py | 2 +- .../expected_ci_perf_inductor_torchbench.csv | 19 ---- ...ted_ci_speedup_inductor_torchbench_cpu.csv | 13 --- benchmarks/dynamo/torchbench.py | 28 +---- benchmarks/dynamo/torchbench.yaml | 69 +++++------- benchmarks/dynamo/torchbench_models_list.txt | 11 -- .../dynamo/torchbench_models_list_cpu.txt | 19 ---- 46 files changed, 40 insertions(+), 2951 deletions(-) diff --git a/.ci/pytorch/macos-test.sh b/.ci/pytorch/macos-test.sh index 987cf4cf0162..2687852a2c4f 100755 --- a/.ci/pytorch/macos-test.sh +++ b/.ci/pytorch/macos-test.sh @@ -256,7 +256,7 @@ test_torchbench_smoketest() { local device=mps local dtypes=(undefined float16 bfloat16 notset) local dtype=${dtypes[$1]} - local models=(hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor timm_resnet timm_vovnet vgg16) + local models=(llama BERT_pytorch dcgan yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor vgg16) for backend in eager inductor; do @@ -319,7 +319,7 @@ test_aoti_torchbench_smoketest() { local device=mps local dtypes=(undefined float16 bfloat16 notset) local dtype=${dtypes[$1]} - local models=(hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor timm_resnet timm_vovnet vgg16) + local models=(llama BERT_pytorch dcgan yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor vgg16) echo "Launching torchbench inference performance run for AOT Inductor and dtype ${dtype}" local dtype_arg="--${dtype}" diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh index 9ea772ebf265..028e1c04745a 100755 --- a/.ci/pytorch/test.sh +++ b/.ci/pytorch/test.sh @@ -838,7 +838,7 @@ test_dynamo_benchmark() { elif [[ "${suite}" == "timm_models" ]]; then export TORCHBENCH_ONLY_MODELS="inception_v3" elif [[ "${suite}" == "torchbench" ]]; then - export TORCHBENCH_ONLY_MODELS="hf_Bert" + export TORCHBENCH_ONLY_MODELS="BERT_pytorch" fi fi test_single_dynamo_benchmark "dashboard" "$suite" "$shard_id" "$@" @@ -869,13 +869,13 @@ test_inductor_torchbench_smoketest_perf() { mkdir -p "$TEST_REPORTS_DIR" python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --float16 --training \ - --batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" --only hf_Bert \ + --batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" --only BERT_pytorch \ --output "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" # The threshold value needs to be actively maintained to make this check useful python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" -t 1.4 # Check memory compression ratio for a few models - for test in hf_Albert timm_vision_transformer; do + for test in BERT_pytorch yolov3; do python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --amp --training \ --disable-cudagraphs --batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" \ --only $test --output "$TEST_REPORTS_DIR/inductor_training_smoketest_$test.csv" diff --git a/benchmarks/dynamo/all_torchbench_models_list.txt b/benchmarks/dynamo/all_torchbench_models_list.txt index 1e896c333288..5205bded7b74 100644 --- a/benchmarks/dynamo/all_torchbench_models_list.txt +++ b/benchmarks/dynamo/all_torchbench_models_list.txt @@ -25,15 +25,6 @@ drq fambench_dlrm fambench_xlmr fastNLP_Bert -hf_Albert -hf_Bart -hf_Bert -hf_BigBird -hf_DistilBert -hf_GPT2 -hf_Longformer -hf_Reformer -hf_T5 maml maml_omniglot mnasnet1_0 @@ -60,13 +51,6 @@ soft_actor_critic speech_transformer squeezenet1_1 tacotron2 -timm_efficientdet -timm_efficientnet -timm_nfnet -timm_regnet -timm_resnest -timm_vision_transformer -timm_vovnet tts_angular vgg16 vision_maskrcnn diff --git a/benchmarks/dynamo/cachebench.py b/benchmarks/dynamo/cachebench.py index c5cbb1eef4d0..9244612b5aeb 100644 --- a/benchmarks/dynamo/cachebench.py +++ b/benchmarks/dynamo/cachebench.py @@ -23,7 +23,6 @@ TORCHBENCH_MODELS: list[str] = [ "resnet50", "moco", "llama", - "hf_T5", ] HUGGINGFACE_MODELS: list[str] = [ "AllenaiLongformerBase", diff --git a/benchmarks/dynamo/check_accuracy.py b/benchmarks/dynamo/check_accuracy.py index 4c2cecda1552..84c549a03aad 100644 --- a/benchmarks/dynamo/check_accuracy.py +++ b/benchmarks/dynamo/check_accuracy.py @@ -11,7 +11,6 @@ import pandas as pd flaky_models = { "yolov3", "detectron2_maskrcnn_r_101_c4", - "timm_efficientnet", # see https://github.com/pytorch/pytorch/issues/148699 "XGLMForCausalLM", # discovered in https://github.com/pytorch/pytorch/pull/128148 "moondream", # discovered in https://github.com/pytorch/pytorch/pull/159291 # discovered in https://github.com/pytorch/pytorch/issues/161419. Its not flaky but really hard to repro, so skipping it @@ -40,13 +39,9 @@ def check_accuracy(actual_csv, expected_csv, expected_filename): "detectron2_fcos_r_50_fpn", "doctr_det_predictor", "doctr_reco_predictor", - "hf_BigBird", - "hf_Longformer", - "hf_Reformer", - "hf_Roberta_base", - "hf_T5", - "hf_T5_base", - "hf_T5_generate", + "dpn107", + "fbnetv3_b", + "levit_128", "llava", "microbench_unbacked_tolist_sum", "mnasnet1_0", @@ -63,12 +58,7 @@ def check_accuracy(actual_csv, expected_csv, expected_filename): "squeezenet1_1", "stable_diffusion_text_encoder", "stable_diffusion_unet", - "timm_efficientdet", - "timm_efficientnet", - "timm_nfnet", - "timm_regnet", - "timm_resnest", - "timm_vovnet", + "swsl_resnext101_32x16d", "torchrec_dlrm", "vgg16", # LLM diff --git a/benchmarks/dynamo/check_graph_breaks.py b/benchmarks/dynamo/check_graph_breaks.py index 7042962c765a..963f370a1ae1 100644 --- a/benchmarks/dynamo/check_graph_breaks.py +++ b/benchmarks/dynamo/check_graph_breaks.py @@ -36,12 +36,7 @@ def check_graph_breaks(actual_csv, expected_csv, expected_filename): "detectron2_fcos_r_50_fpn", "doctr_det_predictor", "doctr_reco_predictor", - "hf_BigBird", - "hf_Longformer", - "hf_Reformer", - "hf_Roberta_base", - "hf_T5", - "hf_T5_base", + "levit_128", "llava", "microbench_unbacked_tolist_sum", "resnet50", @@ -51,7 +46,6 @@ def check_graph_breaks(actual_csv, expected_csv, expected_filename): "stable_diffusion_text_encoder", "stable_diffusion_unet", "timm_efficientdet", - "timm_nfnet", "torchrec_dlrm", "vgg16", # LLM diff --git a/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_inference.csv index ee9a860842e0..b1cdff124841 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_inference.csv @@ -130,70 +130,6 @@ functorch_maml_omniglot,pass,0 -hf_Albert,pass,0 - - - -hf_Bart,pass,0 - - - -hf_Bert,pass,0 - - - -hf_Bert_large,pass,0 - - - -hf_BigBird,pass,0 - - - -hf_DistilBert,pass,0 - - - -hf_GPT2,pass,0 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Reformer,pass,5 - - - -hf_Roberta_base,pass,0 - - - -hf_T5,pass,0 - - - -hf_T5_base,eager_fail_to_run,0 - - - -hf_T5_generate,pass,7 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_Whisper,pass,0 - - - -hf_distil_whisper,pass,0 - - - lennard_jones,pass,0 @@ -342,30 +278,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,0 - - - -timm_vision_transformer,pass,0 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,0 - - - torch_multimodal_clip,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_training.csv b/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_training.csv index ff63e3c114e1..1c8e67120c3c 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_training.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_training.csv @@ -78,62 +78,6 @@ functorch_maml_omniglot,pass,7 -hf_Albert,pass,6 - - - -hf_Bart,pass,6 - - - -hf_Bert,pass,6 - - - -hf_Bert_large,pass,6 - - - -hf_BigBird,pass,6 - - - -hf_DistilBert,pass,6 - - - -hf_GPT2,pass,8 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Reformer,pass,20 - - - -hf_Roberta_base,pass,6 - - - -hf_T5_base,eager_2nd_run_OOM,0 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_Whisper,pass,6 - - - -hf_distil_whisper,model_fail_to_load,0 - - - lennard_jones,pass,7 @@ -250,30 +194,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientnet,fail_accuracy,7 - - - -timm_regnet,pass,7 - - - -timm_resnest,pass,6 - - - -timm_vision_transformer,pass,6 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,6 - - - torch_multimodal_clip,pass,7 diff --git a/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_torchbench_inference.csv index 719b1d120131..b52e4eb905d1 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_torchbench_inference.csv @@ -118,62 +118,6 @@ functorch_maml_omniglot,pass,0 -hf_Albert,pass,0 - - - -hf_Bart,pass,0 - - - -hf_Bert,pass,0 - - - -hf_Bert_large,pass,0 - - - -hf_BigBird,fail_accuracy,0 - - - -hf_DistilBert,pass,0 - - - -hf_GPT2,pass,0 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Roberta_base,pass,0 - - - -hf_T5,pass,0 - - - -hf_T5_base,eager_fail_to_run,0 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_Whisper,pass,0 - - - -hf_distil_whisper,pass,0 - - - lennard_jones,pass,0 @@ -314,30 +258,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,0 - - - -timm_vision_transformer,pass,0 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,0 - - - torch_multimodal_clip,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_aot_inductor_amp_freezing_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_aot_inductor_amp_freezing_torchbench_inference.csv index 976a1ef5004e..6dffdd255b7e 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/cpu_aot_inductor_amp_freezing_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_aot_inductor_amp_freezing_torchbench_inference.csv @@ -114,58 +114,6 @@ functorch_maml_omniglot,pass,0 -hf_Albert,pass,0 - - - -hf_Bart,pass,0 - - - -hf_Bert,pass,0 - - - -hf_Bert_large,pass,0 - - - -hf_BigBird,pass,0 - - - -hf_DistilBert,pass,0 - - - -hf_GPT2,pass,0 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Roberta_base,pass,0 - - - -hf_T5,pass,0 - - - -hf_T5_base,pass,0 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_distil_whisper,pass,0 - - - lennard_jones,pass,0 @@ -278,38 +226,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientdet,model_fail_to_load,0 - - - -timm_efficientnet,pass,0 - - - -timm_nfnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,0 - - - -timm_vision_transformer,pass,0 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,0 - - - torch_multimodal_clip,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_aot_inductor_freezing_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_aot_inductor_freezing_torchbench_inference.csv index 976a1ef5004e..6dffdd255b7e 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/cpu_aot_inductor_freezing_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_aot_inductor_freezing_torchbench_inference.csv @@ -114,58 +114,6 @@ functorch_maml_omniglot,pass,0 -hf_Albert,pass,0 - - - -hf_Bart,pass,0 - - - -hf_Bert,pass,0 - - - -hf_Bert_large,pass,0 - - - -hf_BigBird,pass,0 - - - -hf_DistilBert,pass,0 - - - -hf_GPT2,pass,0 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Roberta_base,pass,0 - - - -hf_T5,pass,0 - - - -hf_T5_base,pass,0 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_distil_whisper,pass,0 - - - lennard_jones,pass,0 @@ -278,38 +226,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientdet,model_fail_to_load,0 - - - -timm_efficientnet,pass,0 - - - -timm_nfnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,0 - - - -timm_vision_transformer,pass,0 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,0 - - - torch_multimodal_clip,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_torchbench_inference.csv index ce307d83e291..79f5cd2d1f1f 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_torchbench_inference.csv @@ -122,66 +122,6 @@ functorch_maml_omniglot,pass,0 -hf_Albert,pass,0 - - - -hf_Bart,pass,0 - - - -hf_Bert,pass,0 - - - -hf_Bert_large,pass,0 - - - -hf_BigBird,pass,27 - - - -hf_DistilBert,pass,0 - - - -hf_GPT2,pass,0 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Longformer,pass,4 - - - -hf_Reformer,pass,5 - - - -hf_Roberta_base,pass,0 - - - -hf_T5,pass,0 - - - -hf_T5_base,pass,0 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_distil_whisper,pass,0 - - - lennard_jones,pass,0 @@ -302,38 +242,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientdet,model_fail_to_load,0 - - - -timm_efficientnet,pass,0 - - - -timm_nfnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,0 - - - -timm_vision_transformer,pass,0 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,0 - - - torch_multimodal_clip,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_torchbench_inference.csv index 3d9ae6195544..f9f970a7fc83 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_torchbench_inference.csv @@ -122,66 +122,6 @@ functorch_maml_omniglot,pass,0 -hf_Albert,pass,0 - - - -hf_Bart,pass,0 - - - -hf_Bert,pass,0 - - - -hf_Bert_large,pass,0 - - - -hf_BigBird,pass,27 - - - -hf_DistilBert,pass,0 - - - -hf_GPT2,pass,0 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Longformer,pass,4 - - - -hf_Reformer,pass,5 - - - -hf_Roberta_base,pass,0 - - - -hf_T5,pass,0 - - - -hf_T5_base,pass,0 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_distil_whisper,pass,0 - - - lennard_jones,pass,0 @@ -302,38 +242,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientdet,model_fail_to_load,0 - - - -timm_efficientnet,pass,0 - - - -timm_nfnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,0 - - - -timm_vision_transformer,pass,0 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,0 - - - torch_multimodal_clip,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv index 7749daf998f6..4c1319db30c8 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv @@ -122,66 +122,6 @@ functorch_maml_omniglot,pass,0 -hf_Albert,pass,0 - - - -hf_Bart,pass,0 - - - -hf_Bert,pass,0 - - - -hf_Bert_large,pass,0 - - - -hf_BigBird,pass,27 - - - -hf_DistilBert,pass,0 - - - -hf_GPT2,pass,0 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Longformer,pass,4 - - - -hf_Reformer,pass,5 - - - -hf_Roberta_base,pass,0 - - - -hf_T5,pass,0 - - - -hf_T5_base,pass,0 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_distil_whisper,pass,0 - - - lennard_jones,pass,0 @@ -302,38 +242,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientdet,model_fail_to_load,0 - - - -timm_efficientnet,pass,0 - - - -timm_nfnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,0 - - - -timm_vision_transformer,pass,0 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,0 - - - torch_multimodal_clip,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_torchbench_inference.csv index f5c52f598c30..f21ff8d1d268 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_torchbench_inference.csv @@ -130,70 +130,6 @@ functorch_maml_omniglot,pass,0 -hf_Albert,pass,0 - - - -hf_Bart,pass,0 - - - -hf_Bert,pass,0 - - - -hf_Bert_large,pass,0 - - - -hf_BigBird,pass,0 - - - -hf_DistilBert,pass,0 - - - -hf_GPT2,pass,0 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Reformer,pass,5 - - - -hf_Roberta_base,pass,0 - - - -hf_T5,pass,0 - - - -hf_T5_base,eager_fail_to_run,0 - - - -hf_T5_generate,pass,7 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_Whisper,pass,0 - - - -hf_distil_whisper,pass,0 - - - lennard_jones,pass,0 @@ -342,30 +278,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,0 - - - -timm_vision_transformer,pass,0 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,0 - - - torch_multimodal_clip,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_torchbench_training.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_torchbench_training.csv index 5a3336efc9a7..01c9a61ddb28 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_torchbench_training.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_torchbench_training.csv @@ -78,62 +78,6 @@ functorch_maml_omniglot,pass,7 -hf_Albert,pass,6 - - - -hf_Bart,pass,6 - - - -hf_Bert,pass,6 - - - -hf_Bert_large,pass,6 - - - -hf_BigBird,pass,6 - - - -hf_DistilBert,pass,6 - - - -hf_GPT2,pass,8 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Reformer,pass,20 - - - -hf_Roberta_base,pass,6 - - - -hf_T5_base,eager_2nd_run_OOM,0 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_Whisper,pass,6 - - - -hf_distil_whisper,model_fail_to_load,0 - - - lennard_jones,pass,7 @@ -246,30 +190,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientnet,pass,7 - - - -timm_regnet,pass,7 - - - -timm_resnest,pass,6 - - - -timm_vision_transformer,pass,6 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,6 - - - torch_multimodal_clip,pass,7 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_amp_freezing_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_amp_freezing_torchbench_inference.csv index 9125efc4d8a3..775c4d3d1076 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_amp_freezing_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_amp_freezing_torchbench_inference.csv @@ -98,58 +98,6 @@ functorch_maml_omniglot,pass,0 -hf_Albert,pass,0 - - - -hf_Bart,pass,0 - - - -hf_Bert,pass,0 - - - -hf_Bert_large,pass,0 - - - -hf_BigBird,pass,0 - - - -hf_DistilBert,pass,0 - - - -hf_GPT2,pass,0 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Roberta_base,pass,0 - - - -hf_T5,pass,0 - - - -hf_T5_base,pass,0 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_distil_whisper,pass,0 - - - lennard_jones,pass,0 @@ -262,38 +210,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientdet,model_fail_to_load,0 - - - -timm_efficientnet,pass,0 - - - -timm_nfnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,0 - - - -timm_vision_transformer,pass,0 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,0 - - - torch_multimodal_clip,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_freezing_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_freezing_torchbench_inference.csv index 9125efc4d8a3..775c4d3d1076 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_freezing_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_aot_inductor_freezing_torchbench_inference.csv @@ -98,58 +98,6 @@ functorch_maml_omniglot,pass,0 -hf_Albert,pass,0 - - - -hf_Bart,pass,0 - - - -hf_Bert,pass,0 - - - -hf_Bert_large,pass,0 - - - -hf_BigBird,pass,0 - - - -hf_DistilBert,pass,0 - - - -hf_GPT2,pass,0 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Roberta_base,pass,0 - - - -hf_T5,pass,0 - - - -hf_T5_base,pass,0 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_distil_whisper,pass,0 - - - lennard_jones,pass,0 @@ -262,38 +210,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientdet,model_fail_to_load,0 - - - -timm_efficientnet,pass,0 - - - -timm_nfnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,0 - - - -timm_vision_transformer,pass,0 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,0 - - - torch_multimodal_clip,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv index 80b5cb2cc57d..4e4cc7dc18bc 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv @@ -106,66 +106,6 @@ functorch_maml_omniglot,pass,0 -hf_Albert,pass,0 - - - -hf_Bart,pass,0 - - - -hf_Bert,pass,0 - - - -hf_Bert_large,pass,0 - - - -hf_BigBird,pass,27 - - - -hf_DistilBert,pass,0 - - - -hf_GPT2,pass,0 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Longformer,pass,4 - - - -hf_Reformer,pass,5 - - - -hf_Roberta_base,pass,0 - - - -hf_T5,pass,0 - - - -hf_T5_base,pass,0 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_distil_whisper,pass,0 - - - lennard_jones,pass,0 @@ -286,38 +226,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientdet,model_fail_to_load,0 - - - -timm_efficientnet,pass,0 - - - -timm_nfnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,0 - - - -timm_vision_transformer,pass,0 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,0 - - - torch_multimodal_clip,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_max_autotune_inductor_amp_freezing_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_max_autotune_inductor_amp_freezing_torchbench_inference.csv index 22db744469da..98e5cd2647f3 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_max_autotune_inductor_amp_freezing_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_max_autotune_inductor_amp_freezing_torchbench_inference.csv @@ -122,66 +122,6 @@ functorch_maml_omniglot,pass,0 -hf_Albert,pass,0 - - - -hf_Bart,pass,0 - - - -hf_Bert,pass,0 - - - -hf_Bert_large,pass,0 - - - -hf_BigBird,pass,25 - - - -hf_DistilBert,pass,0 - - - -hf_GPT2,pass,0 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Longformer,pass,4 - - - -hf_Reformer,pass,8 - - - -hf_Roberta_base,pass,0 - - - -hf_T5,pass,0 - - - -hf_T5_base,pass,0 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_distil_whisper,pass,0 - - - lennard_jones,pass,0 @@ -302,38 +242,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientdet,model_fail_to_load,0 - - - -timm_efficientnet,pass,0 - - - -timm_nfnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,0 - - - -timm_vision_transformer,pass,0 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,0 - - - torch_multimodal_clip,pass,3 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_torchbench_inference.csv index 6ee2d1376f48..f21ff8d1d268 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_torchbench_inference.csv @@ -130,70 +130,6 @@ functorch_maml_omniglot,pass,0 -hf_Albert,pass,0 - - - -hf_Bart,pass,0 - - - -hf_Bert,pass,0 - - - -hf_Bert_large,pass,0 - - - -hf_BigBird,fail_accuracy,0 - - - -hf_DistilBert,pass,0 - - - -hf_GPT2,pass,0 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Reformer,pass,5 - - - -hf_Roberta_base,pass,0 - - - -hf_T5,pass,0 - - - -hf_T5_base,eager_fail_to_run,0 - - - -hf_T5_generate,pass,7 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_Whisper,pass,0 - - - -hf_distil_whisper,pass,0 - - - lennard_jones,pass,0 @@ -342,30 +278,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,0 - - - -timm_vision_transformer,pass,0 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,0 - - - torch_multimodal_clip,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_torchbench_training.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_torchbench_training.csv index be03268cad05..01c9a61ddb28 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_torchbench_training.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_torchbench_training.csv @@ -78,62 +78,6 @@ functorch_maml_omniglot,pass,7 -hf_Albert,pass,6 - - - -hf_Bart,pass,6 - - - -hf_Bert,pass,6 - - - -hf_Bert_large,pass,6 - - - -hf_BigBird,pass,6 - - - -hf_DistilBert,pass,6 - - - -hf_GPT2,pass,8 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Reformer,pass,20 - - - -hf_Roberta_base,pass,6 - - - -hf_T5_base,eager_2nd_run_OOM,0 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_Whisper,pass,6 - - - -hf_distil_whisper,model_fail_to_load,0 - - - lennard_jones,pass,7 @@ -246,30 +190,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientnet,fail_accuracy,7 - - - -timm_regnet,pass,7 - - - -timm_resnest,pass,6 - - - -timm_vision_transformer,pass,6 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,6 - - - torch_multimodal_clip,pass,7 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_inference.csv index ee9a860842e0..b1cdff124841 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_inference.csv @@ -130,70 +130,6 @@ functorch_maml_omniglot,pass,0 -hf_Albert,pass,0 - - - -hf_Bart,pass,0 - - - -hf_Bert,pass,0 - - - -hf_Bert_large,pass,0 - - - -hf_BigBird,pass,0 - - - -hf_DistilBert,pass,0 - - - -hf_GPT2,pass,0 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Reformer,pass,5 - - - -hf_Roberta_base,pass,0 - - - -hf_T5,pass,0 - - - -hf_T5_base,eager_fail_to_run,0 - - - -hf_T5_generate,pass,7 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_Whisper,pass,0 - - - -hf_distil_whisper,pass,0 - - - lennard_jones,pass,0 @@ -342,30 +278,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,0 - - - -timm_vision_transformer,pass,0 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,0 - - - torch_multimodal_clip,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_training.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_training.csv index bbf43360b8a4..1c8e67120c3c 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_training.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_training.csv @@ -78,62 +78,6 @@ functorch_maml_omniglot,pass,7 -hf_Albert,pass,6 - - - -hf_Bart,pass,6 - - - -hf_Bert,pass,6 - - - -hf_Bert_large,pass,6 - - - -hf_BigBird,pass,6 - - - -hf_DistilBert,pass,6 - - - -hf_GPT2,pass,8 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Reformer,pass,20 - - - -hf_Roberta_base,pass,6 - - - -hf_T5_base,eager_2nd_run_OOM,0 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_Whisper,pass,6 - - - -hf_distil_whisper,model_fail_to_load,0 - - - lennard_jones,pass,7 @@ -250,30 +194,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientnet,pass,7 - - - -timm_regnet,pass,7 - - - -timm_resnest,pass,6 - - - -timm_vision_transformer,pass,6 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,6 - - - torch_multimodal_clip,pass,7 diff --git a/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv index 510f3886fa4d..b1cdff124841 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv @@ -130,70 +130,6 @@ functorch_maml_omniglot,pass,0 -hf_Albert,pass,0 - - - -hf_Bart,pass,0 - - - -hf_Bert,pass,0 - - - -hf_Bert_large,pass,0 - - - -hf_BigBird,fail_accuracy,0 - - - -hf_DistilBert,pass,0 - - - -hf_GPT2,pass,0 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Reformer,pass,5 - - - -hf_Roberta_base,pass,0 - - - -hf_T5,pass,0 - - - -hf_T5_base,eager_fail_to_run,0 - - - -hf_T5_generate,pass,7 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_Whisper,pass,0 - - - -hf_distil_whisper,pass,0 - - - lennard_jones,pass,0 @@ -342,30 +278,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,0 - - - -timm_vision_transformer,pass,0 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,0 - - - torch_multimodal_clip,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_training.csv b/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_training.csv index ff63e3c114e1..1c8e67120c3c 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_training.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_training.csv @@ -78,62 +78,6 @@ functorch_maml_omniglot,pass,7 -hf_Albert,pass,6 - - - -hf_Bart,pass,6 - - - -hf_Bert,pass,6 - - - -hf_Bert_large,pass,6 - - - -hf_BigBird,pass,6 - - - -hf_DistilBert,pass,6 - - - -hf_GPT2,pass,8 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Reformer,pass,20 - - - -hf_Roberta_base,pass,6 - - - -hf_T5_base,eager_2nd_run_OOM,0 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_Whisper,pass,6 - - - -hf_distil_whisper,model_fail_to_load,0 - - - lennard_jones,pass,7 @@ -250,30 +194,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientnet,fail_accuracy,7 - - - -timm_regnet,pass,7 - - - -timm_resnest,pass,6 - - - -timm_vision_transformer,pass,6 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,6 - - - torch_multimodal_clip,pass,7 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_inference.csv index 5fac63df5e97..0dfe73870e46 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_inference.csv @@ -130,73 +130,6 @@ functorch_maml_omniglot,pass,0 -hf_Albert,pass,0 - - - -hf_Bart,pass,0 - - - -hf_Bert,pass,0 - - - -hf_Bert_large,pass,0 - - - -hf_BigBird,pass,9 - - - -hf_DistilBert,pass,0 - - - -hf_GPT2,pass,0 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Longformer,pass,4 - - - -hf_Reformer,pass,8 - - - -hf_Roberta_base,pass,0 - - -hf_T5,pass,0 - - - -hf_T5_base,pass,0 - - - -hf_T5_generate,pass,7 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_Whisper,pass,0 - - - -hf_distil_whisper,pass,0 - - - lennard_jones,pass,0 @@ -345,38 +278,6 @@ stable_diffusion_unet,model_fail_to_load,0 -timm_efficientdet,pass,2 - - - -timm_efficientnet,pass,0 - - - -timm_nfnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,0 - - - -timm_vision_transformer,pass,0 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,0 - - - torch_multimodal_clip,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_training.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_training.csv index f0f81c34e1d7..b1c7485b059e 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_training.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_training.csv @@ -78,70 +78,6 @@ functorch_maml_omniglot,pass,7 -hf_Albert,pass,6 - - - -hf_Bart,pass,6 - - - -hf_Bert,pass,6 - - - -hf_Bert_large,pass,6 - - - -hf_BigBird,pass,6 - - - -hf_DistilBert,pass,6 - - - -hf_GPT2,pass,8 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Longformer,pass,4 - - - -hf_Reformer,pass,25 - - - -hf_Roberta_base,pass,6 - - - -hf_T5,pass,0 - - - -hf_T5_base,eager_2nd_run_OOM,0 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_Whisper,pass,6 - - - -hf_distil_whisper,model_fail_to_load,0 - - - lennard_jones,pass,7 @@ -258,38 +194,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientdet,pass,2 - - - -timm_efficientnet,pass,7 - - - -timm_nfnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,6 - - - -timm_vision_transformer,pass,6 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,6 - - - torch_multimodal_clip,pass,7 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_inductor_torchbench_inference.csv index d3ee40f5e5ca..05f9596f620a 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_inductor_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_inductor_torchbench_inference.csv @@ -118,62 +118,6 @@ functorch_maml_omniglot,pass,0 -hf_Albert,pass,0 - - - -hf_Bart,pass,0 - - - -hf_Bert,pass,0 - - - -hf_Bert_large,pass,0 - - - -hf_BigBird,fail_accuracy,0 - - - -hf_DistilBert,pass,0 - - - -hf_GPT2,pass,0 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Roberta_base,pass,0 - - - -hf_T5,pass,0 - - - -hf_T5_base,eager_fail_to_run,0 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_Whisper,pass,0 - - - -hf_distil_whisper,pass,0 - - - lennard_jones,pass,0 @@ -314,34 +258,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientnet,pass,0 - - - -timm_nfnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,0 - - - -timm_vision_transformer,pass,0 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,0 - - - torch_multimodal_clip,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv index 90eeff1ef4d4..1ba446efc363 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv @@ -130,73 +130,6 @@ functorch_maml_omniglot,pass,0 -hf_Albert,pass,0 - - - -hf_Bart,pass,0 - - - -hf_Bert,pass,0 - - - -hf_Bert_large,pass,0 - - - -hf_BigBird,pass,9 - - - -hf_DistilBert,pass,0 - - - -hf_GPT2,pass,0 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Longformer,pass,4 - - - -hf_Reformer,pass,8 - - - -hf_Roberta_base,pass,0 - - -hf_T5,pass,0 - - - -hf_T5_base,pass,0 - - - -hf_T5_generate,pass,7 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_Whisper,pass,0 - - - -hf_distil_whisper,pass,0 - - - lennard_jones,pass,0 @@ -345,38 +278,6 @@ stable_diffusion_unet,model_fail_to_load,0 -timm_efficientdet,pass,2 - - - -timm_efficientnet,pass,0 - - - -timm_nfnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,0 - - - -timm_vision_transformer,pass,0 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,0 - - - torch_multimodal_clip,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_training.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_training.csv index a9a9f8de7040..52173c72c2df 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_training.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_training.csv @@ -78,70 +78,6 @@ functorch_maml_omniglot,pass,7 -hf_Albert,pass,6 - - - -hf_Bart,pass,6 - - - -hf_Bert,pass,6 - - - -hf_Bert_large,pass,6 - - - -hf_BigBird,fail_to_run,3 - - - -hf_DistilBert,pass,6 - - - -hf_GPT2,pass,8 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Longformer,pass,4 - - - -hf_Reformer,pass,25 - - - -hf_Roberta_base,pass,6 - - - -hf_T5,pass,0 - - - -hf_T5_base,eager_2nd_run_OOM,0 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_Whisper,pass,6 - - - -hf_distil_whisper,model_fail_to_load,0 - - - lennard_jones,pass,7 @@ -254,38 +190,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientdet,pass,2 - - - -timm_efficientnet,pass,7 - - - -timm_nfnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,6 - - - -timm_vision_transformer,pass,6 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,6 - - - torch_multimodal_clip,pass,7 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_inference.csv index 7756a1e397ce..35cbd90aa70f 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_inference.csv @@ -130,74 +130,6 @@ functorch_maml_omniglot,pass,0 -hf_Albert,pass,0 - - - -hf_Bart,pass,0 - - - -hf_Bert,pass,0 - - - -hf_Bert_large,pass,0 - - - -hf_BigBird,fail_to_run,0 - - - -hf_DistilBert,pass,0 - - - -hf_GPT2,pass,0 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Longformer,pass,4 - - - -hf_Reformer,pass,5 - - - -hf_Roberta_base,pass,0 - - - -hf_T5,pass,0 - - - -hf_T5_base,eager_fail_to_run,0 - - - -hf_T5_generate,pass,7 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_Whisper,pass,0 - - - -hf_distil_whisper,pass,0 - - - lennard_jones,pass,0 @@ -346,38 +278,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientdet,pass,2 - - - -timm_efficientnet,pass,0 - - - -timm_nfnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,0 - - - -timm_vision_transformer,pass,0 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,0 - - - torch_multimodal_clip,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_training.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_training.csv index aa5f8af11f93..3b5a380483b8 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_training.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_training.csv @@ -78,70 +78,6 @@ functorch_maml_omniglot,pass,7 -hf_Albert,pass,6 - - - -hf_Bart,pass,6 - - - -hf_Bert,pass,6 - - - -hf_Bert_large,pass,6 - - - -hf_BigBird,fail_to_run,3 - - - -hf_DistilBert,pass,6 - - - -hf_GPT2,pass,8 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Longformer,pass,10 - - - -hf_Reformer,pass,20 - - - -hf_Roberta_base,pass,6 - - - -hf_T5,pass,5 - - - -hf_T5_base,eager_2nd_run_OOM,0 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_Whisper,pass,6 - - - -hf_distil_whisper,model_fail_to_load,0 - - - lennard_jones,pass,7 @@ -254,38 +190,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientdet,pass,8 - - - -timm_efficientnet,pass,7 - - - -timm_nfnet,pass,6 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,6 - - - -timm_vision_transformer,pass,6 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,6 - - - torch_multimodal_clip,pass,7 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv index 5fac63df5e97..0dfe73870e46 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv @@ -130,73 +130,6 @@ functorch_maml_omniglot,pass,0 -hf_Albert,pass,0 - - - -hf_Bart,pass,0 - - - -hf_Bert,pass,0 - - - -hf_Bert_large,pass,0 - - - -hf_BigBird,pass,9 - - - -hf_DistilBert,pass,0 - - - -hf_GPT2,pass,0 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Longformer,pass,4 - - - -hf_Reformer,pass,8 - - - -hf_Roberta_base,pass,0 - - -hf_T5,pass,0 - - - -hf_T5_base,pass,0 - - - -hf_T5_generate,pass,7 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_Whisper,pass,0 - - - -hf_distil_whisper,pass,0 - - - lennard_jones,pass,0 @@ -345,38 +278,6 @@ stable_diffusion_unet,model_fail_to_load,0 -timm_efficientdet,pass,2 - - - -timm_efficientnet,pass,0 - - - -timm_nfnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,0 - - - -timm_vision_transformer,pass,0 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,0 - - - torch_multimodal_clip,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_training.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_training.csv index d48198408130..b91dea5f6105 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_training.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_training.csv @@ -78,70 +78,6 @@ functorch_maml_omniglot,pass,7 -hf_Albert,pass,6 - - - -hf_Bart,pass,6 - - - -hf_Bert,pass,6 - - - -hf_Bert_large,pass,6 - - - -hf_BigBird,pass,15 - - - -hf_DistilBert,pass,6 - - - -hf_GPT2,pass,8 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Longformer,pass,4 - - - -hf_Reformer,pass,25 - - - -hf_Roberta_base,pass,6 - - - -hf_T5,pass,0 - - - -hf_T5_base,eager_2nd_run_OOM,0 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_Whisper,pass,6 - - - -hf_distil_whisper,model_fail_to_load,0 - - - lennard_jones,pass,7 @@ -258,38 +194,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientdet,pass,2 - - - -timm_efficientnet,pass,7 - - - -timm_nfnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,6 - - - -timm_vision_transformer,pass,6 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,6 - - - torch_multimodal_clip,pass,7 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_inference.csv index 7377ab719b4a..b508d8bd6e30 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_inference.csv @@ -130,66 +130,6 @@ functorch_maml_omniglot,pass,0 -hf_Albert,pass,0 - - - -hf_Bart,pass,0 - - - -hf_Bert,pass,0 - - - -hf_Bert_large,pass,0 - - - -hf_BigBird,fail_accuracy,0 - - - -hf_DistilBert,pass,0 - - - -hf_GPT2,pass,0 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Reformer,pass,8 - - - -hf_T5,pass,0 - - - -hf_T5_base,eager_fail_to_run,0 - - - -hf_T5_generate,pass,11 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_Whisper,pass,0 - - - -hf_distil_whisper,pass,0 - - - lennard_jones,pass,0 @@ -334,30 +274,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientnet,pass,0 - - - -timm_regnet,pass,0 - - - -timm_resnest,pass,0 - - - -timm_vision_transformer,pass,0 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,0 - - - torch_multimodal_clip,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_training.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_training.csv index fc08f9635754..91e6df19ff02 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_training.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_training.csv @@ -78,58 +78,6 @@ functorch_maml_omniglot,pass,7 -hf_Albert,pass,6 - - - -hf_Bart,pass,6 - - - -hf_Bert,pass,6 - - - -hf_Bert_large,pass,6 - - - -hf_BigBird,pass,6 - - - -hf_DistilBert,pass,6 - - - -hf_GPT2,pass,8 - - - -hf_GPT2_large,pass_due_to_skip,0 - - - -hf_Reformer,pass,25 - - - -hf_T5_base,eager_2nd_run_OOM,0 - - - -hf_T5_large,pass_due_to_skip,0 - - - -hf_Whisper,pass,6 - - - -hf_distil_whisper,model_fail_to_load,0 - - - lennard_jones,pass,7 @@ -246,30 +194,6 @@ stable_diffusion_unet,pass_due_to_skip,0 -timm_efficientnet,pass,7 - - - -timm_regnet,pass,7 - - - -timm_resnest,pass,6 - - - -timm_vision_transformer,pass,6 - - - -timm_vision_transformer_large,pass_due_to_skip,0 - - - -timm_vovnet,pass,6 - - - torch_multimodal_clip,pass,7 diff --git a/benchmarks/dynamo/common.py b/benchmarks/dynamo/common.py index 8a4a1830e601..feb5f97c2dc7 100644 --- a/benchmarks/dynamo/common.py +++ b/benchmarks/dynamo/common.py @@ -149,7 +149,6 @@ CI_SKIP_DYNAMIC_BATCH_ONLY = { "detectron2_fasterrcnn_r_50_c4", "detectron2_fasterrcnn_r_50_dc5", "detectron2_fasterrcnn_r_50_fpn", - "hf_T5_generate", "Reformer", "llama", }.union(INTERNAL_CI_SKIP_DYNAMIC_BATCH_ONLY) @@ -176,13 +175,7 @@ BENCHMARK_USE_SGD = { "speech_transformer", "squeezenet1_1", "stable_diffusion_text_encoder", - "timm_efficientdet", - "timm_nfnet", - "timm_resnest", - "timm_vision_transformer", - "timm_vovnet", "vgg16", - "hf_T5", # Fails dynamic https://github.com/pytorch/pytorch/issues/115968 # HF "AlbertForMaskedLM", "BartForCausalLM", @@ -216,8 +209,6 @@ CI_USE_SGD = { "detectron2_maskrcnn_r_101_fpn", "detectron2_maskrcnn_r_50_c4", "detectron2_maskrcnn_r_50_fpn", - "hf_T5_base", - "hf_clip", "llama_v2_7b_16h", "mobilenet_v2_quantized_qat", "phi_1_5 resnet50_quantized_qat", @@ -2031,8 +2022,6 @@ class BenchmarkRunner: from diffusers.models.transformer_2d import Transformer2DModel from torchbenchmark.models.nanogpt.model import Block from transformers.models.llama.modeling_llama import LlamaDecoderLayer - from transformers.models.t5.modeling_t5 import T5Block - from transformers.models.whisper.modeling_whisper import WhisperEncoderLayer from torch.distributed.fsdp.wrap import ( ModuleWrapPolicy, @@ -2042,10 +2031,6 @@ class BenchmarkRunner: # handcrafted wrap policy MODEL_FSDP_WRAP = { "stable_diffusion_unet": (Transformer2DModel,), - "hf_T5": (T5Block,), - "hf_T5_base": (T5Block,), - "hf_T5_large": (T5Block,), - "hf_Whisper": (WhisperEncoderLayer,), "llama_v2_7b_16h": (LlamaDecoderLayer,), "nanogpt": (Block,), } @@ -3810,22 +3795,6 @@ def run(runner, args, original_dir=None): global synchronize synchronize = torch.cuda.synchronize if HAS_CUDA else torch.xpu.synchronize - if ( - args.devices == ["cuda"] - and torch.cuda.get_device_properties(0).total_memory < 25 * 2**30 - ): - # OOM errors on an RTX 3090 with 24gb RAM - runner.skip_models.update( - { - # torchbench - "hf_Longformer", - "timm_nfnet", - "timm_efficientdet", - } - ) - if args.training: - runner.skip_models.add("hf_T5") - if args.nnc: torch._C._jit_override_can_fuse_on_cpu(True) torch._C._jit_override_can_fuse_on_gpu(True) diff --git a/benchmarks/dynamo/dist_util.py b/benchmarks/dynamo/dist_util.py index 2994c0681c77..14b9de188ec4 100644 --- a/benchmarks/dynamo/dist_util.py +++ b/benchmarks/dynamo/dist_util.py @@ -21,9 +21,6 @@ try: except ImportError: from torchbench import setup_torchbench_cwd -from transformers.models.bert.modeling_bert import BertLayer, BertLMPredictionHead -from transformers.models.t5.modeling_t5 import T5Block - def setup(rank, world_size): os.environ["MASTER_ADDR"] = os.getenv("MASTER_ADDR", "localhost") @@ -128,8 +125,6 @@ def fsdp_checkpointing_base(model, blocks): MODEL_FSDP_WRAP = { "toy_model": (MyModule,), - "hf_Bert": (BertLayer, BertLMPredictionHead), - "hf_T5": (T5Block,), } diff --git a/benchmarks/dynamo/distributed.py b/benchmarks/dynamo/distributed.py index d2af630c8bfb..2b6ed0721ac1 100644 --- a/benchmarks/dynamo/distributed.py +++ b/benchmarks/dynamo/distributed.py @@ -158,7 +158,7 @@ if __name__ == "__main__": model_arg.add_argument( "--torchbench-model", "--torchbench_model", - help="name of torchbench model, e.g. hf_Bert", + help="name of torchbench model, e.g. BERT_pytorch", ) model_arg.add_argument( "--toy-model", "--toy_model", action="store_true", help="use toy model instead" diff --git a/benchmarks/dynamo/expected_ci_perf_inductor_torchbench.csv b/benchmarks/dynamo/expected_ci_perf_inductor_torchbench.csv index d10d41f10f33..ae5b7e6d82cb 100644 --- a/benchmarks/dynamo/expected_ci_perf_inductor_torchbench.csv +++ b/benchmarks/dynamo/expected_ci_perf_inductor_torchbench.csv @@ -12,17 +12,6 @@ cuda,dlrm,1024,1.3421,3.2177,4.9493,1.0009 cuda,drq,1,1.0820,3.8157,8.0732,0.9687 cuda,fastNLP_Bert,6,1.4839,37.9050,32.7583,1.1563 cuda,functorch_dp_cifar10,64,1.5014,6.9596,14.1516,0.4432 -cuda,hf_Albert,8,2.2452,30.6134,25.9036,1.3098 -cuda,hf_Bart,4,1.7012,34.3999,37.9975,1.0128 -cuda,hf_Bert,4,1.9003,23.3435,34.8196,1.0273 -cuda,hf_Bert_large,4,1.6346,52.8525,62.3112,1.0726 -cuda,hf_BigBird,2,1.9208,105.2672,101.4787,1.1415 -cuda,hf_DistilBert,8,1.3988,22.5793,20.2386,1.0232 -cuda,hf_GPT2,4,1.8075,27.5184,25.3428,1.1562 -cuda,hf_GPT2_large,4,1.7716,118.7404,68.1618,1.1725 -cuda,hf_Reformer,4,1.1744,70.4228,15.1152,0.9266 -cuda,hf_T5,8,1.8778,93.3134,37.0046,1.2279 -cuda,hf_T5_large,2,2.3623,101.5518,143.7982,1.1674 cuda,lennard_jones,1000,1.0649,1.5233,4.1119,0.9998 cuda,mnasnet1_0,32,1.1957,19.1993,27.2302,0.7758 cuda,mobilenet_v2,96,1.4876,32.3311,27.4719,1.1729 @@ -42,14 +31,6 @@ cuda,shufflenet_v2_x1_0,128,1.3027,25.7017,27.9875,1.1015 cuda,soft_actor_critic,256,0.9965,2.2580,4.6661,0.9995 cuda,speech_transformer,32,1.8405,35.1645,33.3422,1.0888 cuda,squeezenet1_1,32,1.4191,7.3454,9.4751,1.1148 -cuda,timm_efficientdet,1,1.6630,78.2697,150.9620,0.9904 -cuda,timm_efficientnet,32,1.2689,28.5348,66.3911,0.9428 -cuda,timm_nfnet,128,1.5319,79.5429,32.9961,1.1070 -cuda,timm_regnet,32,1.0564,56.9897,53.0027,0.9500 -cuda,timm_resnest,32,1.6485,14.3908,56.7240,0.9515 -cuda,timm_vision_transformer,8,1.6100,18.7736,36.9495,0.7301 -cuda,timm_vision_transformer_large,8,1.0842,170.9849,72.0604,0.9762 -cuda,timm_vovnet,32,1.0472,25.4676,24.8428,0.8843 cuda,tts_angular,64,1.0366,6.9889,4.2683,0.9973 cuda,vgg16,64,1.2560,52.7072,7.3733,0.9884 cuda,yolov3,16,1.2600,54.2350,42.4711,1.0108 diff --git a/benchmarks/dynamo/expected_ci_speedup_inductor_torchbench_cpu.csv b/benchmarks/dynamo/expected_ci_speedup_inductor_torchbench_cpu.csv index 2462b4cd752d..80339a7ae303 100644 --- a/benchmarks/dynamo/expected_ci_speedup_inductor_torchbench_cpu.csv +++ b/benchmarks/dynamo/expected_ci_speedup_inductor_torchbench_cpu.csv @@ -1,29 +1,16 @@ #name,backend,data_type,shape,wrapper,perf_speedup_target_c7i_metal_24xl -#timm_vision_transformer,inductor,float32,static,default,1.039510755 phlippe_densenet,inductor,float32,static,default,1.46474287 basic_gnn_edgecnn,inductor,float32,dynamic,default,1.30092957 llama_v2_7b_16h,inductor,float32,dynamic,default,1.23234331 resnet50,inductor,float32,dynamic,default,1.67742767 -#timm_efficientnet,inductor,float32,static,cpp, mobilenet_v3_large,inductor,float32,static,cpp,2.63311706 -timm_resnest,inductor,float32,dynamic,cpp,1.7321529 functorch_maml_omniglot,inductor,float32,dynamic,cpp,1.126799 -#hf_GPT2,inductor,float32,dynamic,cpp, yolov3,export-aot-inductor,float32,static,default,1.40687424 mobilenet_v2,export-aot-inductor,float32,static,default,2.90375357 resnext50_32x4d,export-aot-inductor,float32,dynamic,default,1.49299689 -hf_Albert,export-aot-inductor,float32,dynamic,default,1.261471 resnext50_32x4d,inductor,amp,static,default,1.47023111 vgg16,inductor,amp,static,default,1.2692454 -hf_Longformer,inductor,amp,dynamic,default,1.22015225 -hf_Bert_large,inductor,amp,dynamic,default,1.18572179 llama,inductor,amp,static,default,1.33157028 -timm_regnet,inductor,amp,static,cpp,1.12734073 mnasnet1_0,inductor,amp,static,cpp,2.1296814 -#hf_T5_generate,inductor,amp,dynamic,cpp, -timm_vovnet,inductor,amp,dynamic,cpp,1.10851009 #mobilenet_v2,inductor,amp,dynamic,cpp,2.27774577 # https://github.com/pytorch/pytorch/issues/131693 -hf_GPT2,export-aot-inductor,amp,static,default,1.4432794 densenet121,export-aot-inductor,amp,static,default,1.25591385 -hf_DistilBert,export-aot-inductor,amp,dynamic,default,1.2926442 -hf_Bart,export-aot-inductor,amp,dynamic,default,1.19515416 diff --git a/benchmarks/dynamo/torchbench.py b/benchmarks/dynamo/torchbench.py index 1f10ecc661d8..da6a3e1336aa 100755 --- a/benchmarks/dynamo/torchbench.py +++ b/benchmarks/dynamo/torchbench.py @@ -75,29 +75,7 @@ def setup_torchbench_cwd(): return original_dir -def process_hf_reformer_output(out): - assert isinstance(out, list) - # second output is unstable - return [elem for i, elem in enumerate(out) if i != 1] - - -def process_hf_whisper_output(out): - out_ret = [] - for i, elem in enumerate(out): - if i == 0: - if elem is not None: - assert isinstance(elem, dict) - out_ret.append({k: v for k, v in elem.items() if k != "logits"}) - elif i != 1: - out_ret.append(elem) - - return out_ret - - -process_train_model_output = { - "hf_Reformer": process_hf_reformer_output, - "hf_Whisper": process_hf_whisper_output, -} +process_train_model_output = {} class TorchBenchmarkRunner(BenchmarkRunner): @@ -227,12 +205,10 @@ class TorchBenchmarkRunner(BenchmarkRunner): "drq", "hf_Reformer", "DALLE2_pytorch", - "hf_BigBird", "detectron2_maskrcnn_r_50_fpn", "detectron2_maskrcnn_r_101_fpn", "vision_maskrcnn", "doctr_reco_predictor", - "hf_T5_generate", } def load_model( @@ -395,8 +371,6 @@ class TorchBenchmarkRunner(BenchmarkRunner): and hasattr(model.config, "use_cache") ): model.config.use_cache = False - if model_name == "hf_T5_generate": - model.model.config.use_cache = False self.validate_model(model, example_inputs) return device, benchmark.name, model, example_inputs, batch_size diff --git a/benchmarks/dynamo/torchbench.yaml b/benchmarks/dynamo/torchbench.yaml index bf0a1b6c31e8..7d7817936e56 100644 --- a/benchmarks/dynamo/torchbench.yaml +++ b/benchmarks/dynamo/torchbench.yaml @@ -5,8 +5,6 @@ batch_size: demucs: 4 dlrm: 1024 densenet121: 4 - hf_Reformer: 4 - hf_T5_base: 4 timm_efficientdet: 1 llama_v2_7b_16h: 1 # reduced from 16 due to cudagraphs OOM in TorchInductor dashboard @@ -30,7 +28,6 @@ tolerance: - alexnet - attention_is_all_you_need_pytorch - densenet121 - - hf_Albert - vgg16 - mobilenet_v3_large - nvidia_deeprecommender @@ -40,20 +37,16 @@ tolerance: - soft_actor_critic - tacotron2 - yolov3 - - timm_efficientdet - - timm_efficientnet - squeezenet1_1 higher_fp16: - doctr_reco_predictor - drq - - hf_Whisper - phlippe_resnet higher_bf16: - doctr_reco_predictor - drq - - hf_Whisper # These models need higher tolerance for xpu devices with bf16 higher_bf16_xpu: @@ -71,16 +64,9 @@ tolerance: require_larger_multiplier_for_smaller_tensor: - yolov3 - - timm_efficientnet # These benchmarks took >600s on an i9-11900K CPU very_slow: &VERY_SLOW_MODELS - # 3339s - - hf_BigBird - # 3062s - - hf_Longformer - # 930s - - hf_T5 # These benchmarks took >60s on an i9-11900K CPU @@ -92,18 +78,6 @@ slow: - demucs # 242s - fastNLP_Bert - # 221s - - hf_Albert - # 400s - - hf_Bart - # 334s - - hf_Bert - # 187s - - hf_DistilBert - # 470s - - hf_GPT2 - # 141s - - hf_Reformer # 317s - speech_transformer # 99s @@ -187,11 +161,36 @@ skip: - hf_clip # multi gpu not always available in benchmark runners - simple_gpt_tp_manual + # skip hf and timm models in torchbench since + # there are already separate benchmarks for them + - hf_Albert + - hf_Bart + - hf_Bert + - hf_BigBird + - hf_DistilBert + - hf_GPT2 + - hf_Longformer + - hf_Reformer + - hf_T5 + - timm_efficientdet + - timm_efficientnet + - timm_nfnet + - timm_regnet + - timm_resnest + - timm_vision_transformer + - timm_vovnet + - hf_Bert_large + - hf_GPT2_large + - hf_Roberta_base + - hf_T5_base + - hf_T5_generate + - hf_T5_large + - hf_Whisper + - hf_distil_whisper + - timm_vision_transformer_large device: cpu: - # OOMs - - hf_T5_generate # model is CUDA only - cm3leon_generate # timeout @@ -208,16 +207,12 @@ skip: - torchrec_dlrm - simple_gpt # works on cuda, accuracy failure on cpu - - hf_Whisper - stable_diffusion_text_encoder - llava - moco # Skip these additional models when running on aarch64 - cpu_aarch64: - # timeout on aarch64 - - timm_regnet - - timm_nfnet + cpu_aarch64: [] cuda: [] @@ -235,7 +230,6 @@ skip: - sam_fast # Model's DEFAULT_TRAIN_BSIZE is not implemented - cm3leon_generate - - hf_T5_generate - doctr_det_predictor - doctr_reco_predictor - moondream @@ -247,9 +241,6 @@ skip: - cm3leon_generate - detectron2_fcos_r_50_fpn - fastNLP_Bert - - hf_Longformer - - hf_Reformer - - hf_T5_generate - opacus_cifar10 - speech_transformer @@ -286,9 +277,6 @@ accuracy: # Models too large to have eager, dynamo and fp64_numbers simultaneosuly # even for 40 GB machine. We have tested accuracy for smaller version of # these models - - hf_GPT2_large - - hf_T5_large - - timm_vision_transformer_large # accuracy https://github.com/pytorch/pytorch/issues/93847 - maml - llama_v2_7b_16h @@ -300,5 +288,4 @@ accuracy: - pytorch_unet max_batch_size: - hf_GPT2: 2 pytorch_unet: 2 diff --git a/benchmarks/dynamo/torchbench_models_list.txt b/benchmarks/dynamo/torchbench_models_list.txt index 04947c4a6a30..f8f36810c693 100644 --- a/benchmarks/dynamo/torchbench_models_list.txt +++ b/benchmarks/dynamo/torchbench_models_list.txt @@ -4,11 +4,6 @@ LearningToPaint,1024 alexnet,1024 dcgan,1024 densenet121,64 -hf_Albert,32 -hf_Bart,16 -hf_Bert,16 -hf_GPT2,16 -hf_T5,4 mnasnet1_0,256 mobilenet_v2,128 mobilenet_v3_large,256 @@ -19,10 +14,4 @@ resnet50,128 resnext50_32x4d,128 shufflenet_v2_x1_0,512 squeezenet1_1,512 -timm_nfnet,256 -timm_efficientnet,128 -timm_regnet,128 -timm_resnest,256 -timm_vision_transformer,256 -timm_vovnet,128 vgg16,128 diff --git a/benchmarks/dynamo/torchbench_models_list_cpu.txt b/benchmarks/dynamo/torchbench_models_list_cpu.txt index ab485702b838..af2293b5a4a6 100644 --- a/benchmarks/dynamo/torchbench_models_list_cpu.txt +++ b/benchmarks/dynamo/torchbench_models_list_cpu.txt @@ -6,18 +6,6 @@ densenet121,512 dlrm,2048 fastNLP_Bert,8 functorch_dp_cifar10,1024 -hf_Albert,8 -hf_Bart,8 -hf_Bert,8 -hf_Bert_large,8 -hf_DistilBert,8 -hf_GPT2,8 -hf_GPT2_large,1 -hf_Longformer,4 -hf_Reformer,8 -hf_T5,4 -hf_T5_base,1 -hf_T5_large,1 LearningToPaint,96 lennard_jones,1024 mnasnet1_0,32 @@ -35,13 +23,6 @@ shufflenet_v2_x1_0,64 speech_transformer,1024 squeezenet1_1,16 Super_SloMo,1024 -timm_efficientnet,64 -timm_nfnet,128 -timm_regnet,32 -timm_resnest,32 -timm_vision_transformer,16 -timm_vision_transformer_large,8 -timm_vovnet,32 tts_angular,1024 vgg16,64 vision_maskrcnn,1