mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Prune models from TorchInductor dashboard to reduce ci cost. This PR prunes for hugging face models according to the [doc](https://docs.google.com/document/d/1nLPNNAU-_M9Clx9FMrJ1ycdPxe-xRA54olPnsFzdpoU/edit?tab=t.0), which reduces from 46 to 27 models. Pull Request resolved: https://github.com/pytorch/pytorch/pull/164815 Approved by: https://github.com/anijain2305, https://github.com/seemethere, https://github.com/huydhn, https://github.com/malfet
103 lines
2.4 KiB
YAML
103 lines
2.4 KiB
YAML
skip:
|
|
all:
|
|
# Difficult to setup accuracy test because .eval() not supported
|
|
- Reformer
|
|
# Fails deepcopy
|
|
- BlenderbotForConditionalGeneration
|
|
- GPTNeoForCausalLM
|
|
- GPTNeoForSequenceClassification
|
|
# Fails with even batch size = 1
|
|
- GPTJForCausalLM
|
|
- GPTJForQuestionAnswering
|
|
# Model too big
|
|
- google/gemma-3-4b-it
|
|
- openai/gpt-oss-20b
|
|
- mistralai/Mistral-7B-Instruct-v0.3
|
|
|
|
device:
|
|
cpu:
|
|
- meta-llama/Llama-3.2-1B
|
|
- google/gemma-2-2b
|
|
- google/gemma-3-4b-it
|
|
- openai/whisper-tiny
|
|
- Qwen/Qwen3-0.6B
|
|
- mistralai/Mistral-7B-Instruct-v0.3
|
|
- openai/gpt-oss-20b
|
|
|
|
control_flow:
|
|
- AllenaiLongformerBase
|
|
|
|
batch_size:
|
|
# TODO - Fails even after fake tensors
|
|
divisors:
|
|
AlbertForMaskedLM: 2
|
|
AllenaiLongformerBase: 2
|
|
BartForCausalLM: 2
|
|
BertForMaskedLM: 2
|
|
BlenderbotForCausalLM: 8
|
|
# BlenderbotForConditionalGeneration : 16
|
|
DebertaV2ForMaskedLM: 4
|
|
DistilBertForMaskedLM: 2
|
|
DistillGPT2: 2
|
|
ElectraForCausalLM: 2
|
|
GPT2ForSequenceClassification: 2
|
|
# GPTJForCausalLM : 2
|
|
# GPTJForQuestionAnswering : 2
|
|
# GPTNeoForCausalLM : 32
|
|
# GPTNeoForSequenceClassification : 2
|
|
GoogleFnet: 2
|
|
LayoutLMForMaskedLM: 2
|
|
M2M100ForConditionalGeneration: 4
|
|
MBartForCausalLM: 2
|
|
MT5ForConditionalGeneration: 2
|
|
MegatronBertForCausalLM: 4
|
|
MobileBertForMaskedLM: 2
|
|
OPTForCausalLM: 2
|
|
PLBartForCausalLM: 2
|
|
PegasusForCausalLM: 4
|
|
RobertaForCausalLM: 2
|
|
T5ForConditionalGeneration: 2
|
|
T5Small: 2
|
|
TrOCRForCausalLM: 2
|
|
XGLMForCausalLM: 4
|
|
XLNetLMHeadModel: 2
|
|
YituTechConvBert: 2
|
|
meta-llama/Llama-3.2-1B: 8
|
|
google/gemma-2-2b: 8
|
|
google/gemma-3-4b-it: 8
|
|
openai/whisper-tiny: 8
|
|
Qwen/Qwen3-0.6B: 8
|
|
mistralai/Mistral-7B-Instruct-v0.3: 8
|
|
openai/gpt-oss-20b: 8
|
|
|
|
|
|
tolerance:
|
|
higher_training:
|
|
- MT5ForConditionalGeneration
|
|
|
|
higher_max_autotune_training: []
|
|
|
|
higher_inference:
|
|
- GPT2ForSequenceClassification
|
|
|
|
higher_inference_cpu:
|
|
- GPT2ForSequenceClassification
|
|
|
|
cosine: []
|
|
|
|
|
|
accuracy:
|
|
skip:
|
|
large_models:
|
|
# Models too large to have eager, dynamo and fp64_numbers simultaneously
|
|
# even for 40 GB machine.
|
|
- DebertaV2ForMaskedLM
|
|
- BlenderbotForCausalLM
|
|
|
|
only_inference:
|
|
# Fails with dynamo for train mode
|
|
- M2M100ForConditionalGeneration
|
|
|
|
only_fp32:
|
|
- GoogleFnet
|