mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[ROCm][CI] additional dynamo benchmarks for inductor-periodic (#164279)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/164279 Approved by: https://github.com/jeffdaily Co-authored-by: Jeff Daily <jeff.daily@amd.com>
This commit is contained in:
committed by
PyTorch MergeBot
parent
7d570129e0
commit
412c6d28ec
10
.github/workflows/inductor-periodic.yml
vendored
10
.github/workflows/inductor-periodic.yml
vendored
@ -106,6 +106,16 @@ jobs:
|
||||
{ config: "dynamic_aot_eager_huggingface", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.gfx942.1" },
|
||||
{ config: "dynamic_aot_eager_timm", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.1" },
|
||||
{ config: "dynamic_aot_eager_timm", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.1" },
|
||||
{ config: "dynamic_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.gfx942.1" },
|
||||
{ config: "dynamic_inductor_timm", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.1" },
|
||||
{ config: "dynamic_inductor_timm", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.1" },
|
||||
{ config: "dynamic_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.1" },
|
||||
{ config: "dynamic_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.1" },
|
||||
{ config: "aot_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.rocm.gpu.gfx942.1" },
|
||||
{ config: "aot_inductor_timm", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.1" },
|
||||
{ config: "aot_inductor_timm", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.1" },
|
||||
{ config: "aot_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.rocm.gpu.gfx942.1" },
|
||||
{ config: "aot_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.rocm.gpu.gfx942.1" },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
|
@ -34,19 +34,24 @@ def check_accuracy(actual_csv, expected_csv, expected_filename):
|
||||
if "rocm" in expected_filename:
|
||||
flaky_models.update(
|
||||
{
|
||||
"Background_Matting",
|
||||
"alexnet",
|
||||
"cait_m36_384",
|
||||
"dla102",
|
||||
"demucs",
|
||||
"densenet121",
|
||||
"detectron2_fcos_r_50_fpn",
|
||||
"doctr_det_predictor",
|
||||
"doctr_reco_predictor",
|
||||
"dpn107",
|
||||
"fbnetv3_b",
|
||||
"hf_BigBird",
|
||||
"hf_Longformer",
|
||||
"hf_Reformer",
|
||||
"hf_Roberta_base",
|
||||
"hf_T5",
|
||||
"hf_T5_base",
|
||||
"hf_T5_generate",
|
||||
"levit_128",
|
||||
"llava",
|
||||
"microbench_unbacked_tolist_sum",
|
||||
@ -64,6 +69,7 @@ def check_accuracy(actual_csv, expected_csv, expected_filename):
|
||||
"squeezenet1_1",
|
||||
"stable_diffusion_text_encoder",
|
||||
"stable_diffusion_unet",
|
||||
"swsl_resnext101_32x16d",
|
||||
"timm_efficientdet",
|
||||
"timm_efficientnet",
|
||||
"timm_nfnet",
|
||||
|
@ -47,6 +47,8 @@ def check_graph_breaks(actual_csv, expected_csv, expected_filename):
|
||||
"levit_128",
|
||||
"llava",
|
||||
"microbench_unbacked_tolist_sum",
|
||||
"resnet50",
|
||||
"resnet152",
|
||||
"sam",
|
||||
"sam_fast",
|
||||
"stable_diffusion_text_encoder",
|
||||
|
@ -46,7 +46,7 @@ deit_base_distilled_patch16_224,pass,7
|
||||
|
||||
|
||||
|
||||
dla102,pass,7
|
||||
dla102,pass,0
|
||||
|
||||
|
||||
|
||||
|
|
@ -170,7 +170,7 @@ mobilenet_v2_quantized_qat,eager_fail_to_run,0
|
||||
|
||||
|
||||
|
||||
mobilenet_v3_large,pass,7
|
||||
mobilenet_v3_large,pass,0
|
||||
|
||||
|
||||
|
||||
@ -210,7 +210,7 @@ pytorch_unet,pass_due_to_skip,7
|
||||
|
||||
|
||||
|
||||
resnet152,pass,7
|
||||
resnet152,pass,0
|
||||
|
||||
|
||||
|
||||
@ -218,7 +218,7 @@ resnet18,pass,6
|
||||
|
||||
|
||||
|
||||
resnet50,pass,6
|
||||
resnet50,pass,0
|
||||
|
||||
|
||||
|
||||
@ -270,7 +270,7 @@ timm_nfnet,pass,0
|
||||
|
||||
|
||||
|
||||
timm_regnet,pass,7
|
||||
timm_regnet,pass,0
|
||||
|
||||
|
||||
|
||||
|
|
@ -58,7 +58,7 @@ DistilBertForQuestionAnswering,pass,0
|
||||
|
||||
|
||||
|
||||
DistillGPT2,pass,2
|
||||
DistillGPT2,pass,0
|
||||
|
||||
|
||||
|
||||
|
|
@ -150,6 +150,10 @@ hf_GPT2_large,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
hf_Roberta_base,pass,0
|
||||
|
||||
|
||||
|
||||
hf_T5,pass,0
|
||||
|
||||
|
||||
@ -194,6 +198,10 @@ maml_omniglot,pass,0
|
||||
|
||||
|
||||
|
||||
microbench_unbacked_tolist_sum,fail_to_run,0
|
||||
|
||||
|
||||
|
||||
mnasnet1_0,pass,0
|
||||
|
||||
|
||||
@ -310,6 +318,10 @@ timm_efficientnet,pass,0
|
||||
|
||||
|
||||
|
||||
timm_nfnet,pass,0
|
||||
|
||||
|
||||
|
||||
timm_regnet,pass,0
|
||||
|
||||
|
||||
|
|
@ -46,7 +46,7 @@ deit_base_distilled_patch16_224,pass,7
|
||||
|
||||
|
||||
|
||||
dla102,pass,7
|
||||
dla102,pass,0
|
||||
|
||||
|
||||
|
||||
|
|
@ -170,7 +170,7 @@ mobilenet_v2_quantized_qat,eager_fail_to_run,0
|
||||
|
||||
|
||||
|
||||
mobilenet_v3_large,pass,7
|
||||
mobilenet_v3_large,pass,0
|
||||
|
||||
|
||||
|
||||
@ -210,7 +210,7 @@ pytorch_unet,pass_due_to_skip,7
|
||||
|
||||
|
||||
|
||||
resnet152,pass,7
|
||||
resnet152,pass,0
|
||||
|
||||
|
||||
|
||||
@ -266,7 +266,7 @@ timm_nfnet,pass,0
|
||||
|
||||
|
||||
|
||||
timm_regnet,pass,7
|
||||
timm_regnet,pass,0
|
||||
|
||||
|
||||
|
||||
|
|
@ -30,7 +30,7 @@ BertForQuestionAnswering,pass,5
|
||||
|
||||
|
||||
|
||||
BlenderbotForCausalLM,eager_fail_to_run,0
|
||||
BlenderbotForCausalLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
@ -50,7 +50,7 @@ DebertaV2ForMaskedLM,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
DebertaV2ForQuestionAnswering,eager_1st_run_OOM,0
|
||||
DebertaV2ForQuestionAnswering,pass,4
|
||||
|
||||
|
||||
|
||||
|
|
@ -150,7 +150,7 @@ pit_b_224,pass,0
|
||||
|
||||
|
||||
|
||||
pnasnet5large,pass,0
|
||||
pnasnet5large,fail_accuracy,0
|
||||
|
||||
|
||||
|
||||
@ -158,23 +158,23 @@ poolformer_m36,pass,0
|
||||
|
||||
|
||||
|
||||
regnety_002,pass,0
|
||||
regnety_002,fail_accuracy,0
|
||||
|
||||
|
||||
|
||||
repvgg_a2,pass,0
|
||||
repvgg_a2,fail_accuracy,0
|
||||
|
||||
|
||||
|
||||
res2net101_26w_4s,pass,0
|
||||
res2net101_26w_4s,fail_accuracy,0
|
||||
|
||||
|
||||
|
||||
res2net50_14w_8s,pass,0
|
||||
res2net50_14w_8s,fail_accuracy,0
|
||||
|
||||
|
||||
|
||||
res2next50,pass,0
|
||||
res2next50,fail_accuracy,0
|
||||
|
||||
|
||||
|
||||
@ -206,7 +206,7 @@ swin_base_patch4_window7_224,pass,0
|
||||
|
||||
|
||||
|
||||
swsl_resnext101_32x16d,pass,0
|
||||
swsl_resnext101_32x16d,fail_accuracy,0
|
||||
|
||||
|
||||
|
||||
|
|
@ -34,7 +34,7 @@ convnext_base,pass,7
|
||||
|
||||
|
||||
|
||||
crossvit_9_240,pass,7
|
||||
crossvit_9_240,fail_accuracy,7
|
||||
|
||||
|
||||
|
||||
@ -46,7 +46,7 @@ deit_base_distilled_patch16_224,pass,7
|
||||
|
||||
|
||||
|
||||
dla102,pass,7
|
||||
dla102,pass,0
|
||||
|
||||
|
||||
|
||||
@ -62,7 +62,7 @@ eca_botnext26ts_256,pass,7
|
||||
|
||||
|
||||
|
||||
eca_halonext26ts,pass,7
|
||||
eca_halonext26ts,fail_accuracy,7
|
||||
|
||||
|
||||
|
||||
@ -74,7 +74,7 @@ fbnetc_100,pass,7
|
||||
|
||||
|
||||
|
||||
fbnetv3_b,pass,6
|
||||
fbnetv3_b,fail_accuracy,6
|
||||
|
||||
|
||||
|
||||
@ -130,7 +130,7 @@ mnasnet_100,pass,7
|
||||
|
||||
|
||||
|
||||
mobilenetv2_100,pass,7
|
||||
mobilenetv2_100,fail_accuracy,7
|
||||
|
||||
|
||||
|
||||
@ -150,7 +150,7 @@ pit_b_224,pass,6
|
||||
|
||||
|
||||
|
||||
pnasnet5large,pass,5
|
||||
pnasnet5large,fail_accuracy,5
|
||||
|
||||
|
||||
|
||||
@ -162,7 +162,7 @@ regnety_002,pass,6
|
||||
|
||||
|
||||
|
||||
repvgg_a2,pass,7
|
||||
repvgg_a2,fail_accuracy,7
|
||||
|
||||
|
||||
|
||||
@ -186,7 +186,7 @@ resnest101e,pass,6
|
||||
|
||||
|
||||
|
||||
rexnet_100,pass,7
|
||||
rexnet_100,fail_accuracy,7
|
||||
|
||||
|
||||
|
||||
@ -230,7 +230,7 @@ twins_pcpvt_base,pass,7
|
||||
|
||||
|
||||
|
||||
visformer_small,pass,7
|
||||
visformer_small,fail_accuracy,7
|
||||
|
||||
|
||||
|
||||
|
|
@ -162,7 +162,15 @@ hf_GPT2_large,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
hf_Reformer,pass,8
|
||||
hf_Longformer,pass,4
|
||||
|
||||
|
||||
|
||||
hf_Reformer,pass,5
|
||||
|
||||
|
||||
|
||||
hf_Roberta_base,pass,0
|
||||
|
||||
|
||||
|
||||
@ -174,7 +182,7 @@ hf_T5_base,eager_fail_to_run,0
|
||||
|
||||
|
||||
|
||||
hf_T5_generate,pass,11
|
||||
hf_T5_generate,pass,7
|
||||
|
||||
|
||||
|
||||
@ -214,6 +222,10 @@ maml_omniglot,pass,0
|
||||
|
||||
|
||||
|
||||
microbench_unbacked_tolist_sum,pass,2
|
||||
|
||||
|
||||
|
||||
mnasnet1_0,pass,0
|
||||
|
||||
|
||||
@ -306,6 +318,10 @@ sam,pass,0
|
||||
|
||||
|
||||
|
||||
sam_fast,model_fail_to_load,0
|
||||
|
||||
|
||||
|
||||
shufflenet_v2_x1_0,pass,0
|
||||
|
||||
|
||||
@ -330,10 +346,18 @@ stable_diffusion_unet,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
timm_efficientdet,pass,2
|
||||
|
||||
|
||||
|
||||
timm_efficientnet,pass,0
|
||||
|
||||
|
||||
|
||||
timm_nfnet,pass,0
|
||||
|
||||
|
||||
|
||||
timm_regnet,pass,0
|
||||
|
||||
|
||||
|
|
@ -70,7 +70,7 @@ fastNLP_Bert,pass,10
|
||||
|
||||
|
||||
|
||||
functorch_dp_cifar10,pass,7
|
||||
functorch_dp_cifar10,fail_accuracy,7
|
||||
|
||||
|
||||
|
||||
@ -110,7 +110,19 @@ hf_GPT2_large,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
hf_Reformer,pass,25
|
||||
hf_Longformer,pass,10
|
||||
|
||||
|
||||
|
||||
hf_Reformer,pass,20
|
||||
|
||||
|
||||
|
||||
hf_Roberta_base,pass,6
|
||||
|
||||
|
||||
|
||||
hf_T5,pass,5
|
||||
|
||||
|
||||
|
||||
@ -158,7 +170,7 @@ mobilenet_v2_quantized_qat,eager_fail_to_run,0
|
||||
|
||||
|
||||
|
||||
mobilenet_v3_large,pass,7
|
||||
mobilenet_v3_large,pass,0
|
||||
|
||||
|
||||
|
||||
@ -198,7 +210,7 @@ pytorch_unet,pass_due_to_skip,7
|
||||
|
||||
|
||||
|
||||
resnet152,pass,7
|
||||
resnet152,pass,0
|
||||
|
||||
|
||||
|
||||
@ -242,11 +254,19 @@ stable_diffusion_unet,pass_due_to_skip,0
|
||||
|
||||
|
||||
|
||||
timm_efficientdet,pass,8
|
||||
|
||||
|
||||
|
||||
timm_efficientnet,pass,7
|
||||
|
||||
|
||||
|
||||
timm_regnet,pass,7
|
||||
timm_nfnet,pass,6
|
||||
|
||||
|
||||
|
||||
timm_regnet,pass,0
|
||||
|
||||
|
||||
|
||||
@ -278,7 +298,7 @@ vgg16,pass,0
|
||||
|
||||
|
||||
|
||||
vision_maskrcnn,pass,39
|
||||
vision_maskrcnn,fail_accuracy,39
|
||||
|
||||
|
||||
|
||||
|
|
@ -46,7 +46,7 @@ deit_base_distilled_patch16_224,pass,7
|
||||
|
||||
|
||||
|
||||
dla102,pass,7
|
||||
dla102,pass,0
|
||||
|
||||
|
||||
|
||||
|
|
@ -170,7 +170,7 @@ mobilenet_v2_quantized_qat,eager_fail_to_run,0
|
||||
|
||||
|
||||
|
||||
mobilenet_v3_large,pass,7
|
||||
mobilenet_v3_large,pass,0
|
||||
|
||||
|
||||
|
||||
@ -210,7 +210,7 @@ pytorch_unet,pass_due_to_skip,7
|
||||
|
||||
|
||||
|
||||
resnet152,pass,7
|
||||
resnet152,pass,0
|
||||
|
||||
|
||||
|
||||
@ -270,7 +270,7 @@ timm_nfnet,pass,0
|
||||
|
||||
|
||||
|
||||
timm_regnet,pass,7
|
||||
timm_regnet,pass,0
|
||||
|
||||
|
||||
|
||||
|
|
@ -2282,7 +2282,9 @@ class BenchmarkRunner:
|
||||
del model_copy
|
||||
empty_gpu_cache(current_device)
|
||||
|
||||
# Two eager runs should have exactly same result
|
||||
# Two eager runs should have exactly same result, within tolerance.
|
||||
# TODO If we want the above to be true, then deterministic should be set.
|
||||
# For example, MIOpen convolutions could be implemented with non-deterministic algos.
|
||||
is_same = True
|
||||
try:
|
||||
if (
|
||||
@ -2292,7 +2294,7 @@ class BenchmarkRunner:
|
||||
correct_rerun_result,
|
||||
fp64_ref=None,
|
||||
cos_similarity=False,
|
||||
tol=0,
|
||||
tol=tolerance if torch.version.hip else 0,
|
||||
equal_nan=self.equal_nan,
|
||||
use_larger_multiplier_for_smaller_tensor=self.use_larger_multiplier_for_smaller_tensor(
|
||||
name
|
||||
|
Reference in New Issue
Block a user