Add mistral/gpt-oss to benchmarks (#163565)

Potential issues
* gpt-oss-20b is probably too big (I can't run on my devserver)
* Mistral requires HF authentication
* Mistral also takes a while to run the performance checks (need to wait for CI)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/163565
Approved by: https://github.com/huydhn
This commit is contained in:
angelayi
2025-09-24 06:12:36 +00:00
committed by PyTorch MergeBot
parent 2c5a3d7e60
commit dad54ca7c0
15 changed files with 94 additions and 0 deletions

View File

@ -78,6 +78,8 @@ def check_accuracy(actual_csv, expected_csv, expected_filename):
"google/gemma-3-4b-it",
"openai/whisper-tiny",
"Qwen/Qwen3-0.6B",
"mistralai/Mistral-7B-Instruct-v0.3",
"openai/gpt-oss-20b",
}
)

View File

@ -61,6 +61,8 @@ def check_graph_breaks(actual_csv, expected_csv, expected_filename):
"google/gemma-3-4b-it",
"openai/whisper-tiny",
"Qwen/Qwen3-0.6B",
"mistralai/Mistral-7B-Instruct-v0.3",
"openai/gpt-oss-20b",
}
)

View File

@ -191,3 +191,11 @@ openai/whisper-tiny,pass,0
Qwen/Qwen3-0.6B,pass,0
mistralai/Mistral-7B-Instruct-v0.3,pass,0
openai/gpt-oss-20b,pass,0

1 name accuracy graph_breaks
191
192
193
194
195
196
197
198
199
200
201

View File

@ -187,3 +187,11 @@ openai/whisper-tiny,fail_to_run,0
Qwen/Qwen3-0.6B,fail_to_run,0
mistralai/Mistral-7B-Instruct-v0.3,fail_to_run,0
openai/gpt-oss-20b,fail_to_run,0

1 name accuracy graph_breaks
187
188
189
190
191
192
193
194
195
196
197

View File

@ -191,3 +191,11 @@ openai/whisper-tiny,pass_due_to_skip,0
Qwen/Qwen3-0.6B,pass_due_to_skip,0
mistralai/Mistral-7B-Instruct-v0.3,pass_due_to_skip,0
openai/gpt-oss-20b,pass_due_to_skip,0

1 name accuracy graph_breaks
191
192
193
194
195
196
197
198
199
200
201

View File

@ -191,3 +191,11 @@ openai/whisper-tiny,pass_due_to_skip,0
Qwen/Qwen3-0.6B,pass_due_to_skip,0
mistralai/Mistral-7B-Instruct-v0.3,pass_due_to_skip,0
openai/gpt-oss-20b,pass_due_to_skip,0

1 name accuracy graph_breaks
191
192
193
194
195
196
197
198
199
200
201

View File

@ -191,3 +191,11 @@ openai/whisper-tiny,pass_due_to_skip,0
Qwen/Qwen3-0.6B,pass_due_to_skip,0
mistralai/Mistral-7B-Instruct-v0.3,pass_due_to_skip,0
openai/gpt-oss-20b,pass_due_to_skip,0

1 name accuracy graph_breaks
191
192
193
194
195
196
197
198
199
200
201

View File

@ -191,3 +191,11 @@ openai/whisper-tiny,pass,0
Qwen/Qwen3-0.6B,pass,0
mistralai/Mistral-7B-Instruct-v0.3,pass,0
openai/gpt-oss-20b,pass,0

1 name accuracy graph_breaks
191
192
193
194
195
196
197
198
199
200
201

View File

@ -191,3 +191,11 @@ openai/whisper-tiny,pass,0
Qwen/Qwen3-0.6B,pass,0
mistralai/Mistral-7B-Instruct-v0.3,pass,0
openai/gpt-oss-20b,pass,0

1 name accuracy graph_breaks
191
192
193
194
195
196
197
198
199
200
201

View File

@ -191,3 +191,11 @@ openai/whisper-tiny,pass,0
Qwen/Qwen3-0.6B,pass,0
mistralai/Mistral-7B-Instruct-v0.3,pass,0
openai/gpt-oss-20b,pass,0

1 name accuracy graph_breaks
191
192
193
194
195
196
197
198
199
200
201

View File

@ -191,3 +191,11 @@ openai/whisper-tiny,pass,0
Qwen/Qwen3-0.6B,pass,0
mistralai/Mistral-7B-Instruct-v0.3,pass,0
openai/gpt-oss-20b,pass,0

1 name accuracy graph_breaks
191
192
193
194
195
196
197
198
199
200
201

View File

@ -191,3 +191,11 @@ openai/whisper-tiny,pass,0
Qwen/Qwen3-0.6B,pass,0
mistralai/Mistral-7B-Instruct-v0.3,pass,0
openai/gpt-oss-20b,pass,0

1 name accuracy graph_breaks
191
192
193
194
195
196
197
198
199
200
201

View File

@ -11,6 +11,8 @@ skip:
- GPTJForQuestionAnswering
# Model too big
- google/gemma-3-4b-it
- openai/gpt-oss-20b
- mistralai/Mistral-7B-Instruct-v0.3
device:
cpu:
@ -19,6 +21,8 @@ skip:
- google/gemma-3-4b-it
- openai/whisper-tiny
- Qwen/Qwen3-0.6B
- mistralai/Mistral-7B-Instruct-v0.3
- openai/gpt-oss-20b
control_flow:
- AllenaiLongformerBase
@ -79,6 +83,8 @@ batch_size:
google/gemma-3-4b-it: 8
openai/whisper-tiny: 8
Qwen/Qwen3-0.6B: 8
mistralai/Mistral-7B-Instruct-v0.3: 8
openai/gpt-oss-20b: 8
tolerance:

View File

@ -99,4 +99,6 @@ HF_LLM_MODELS: dict[str, Benchmark] = {
"google/gemma-3-4b-it": TextGenerationBenchmark,
"openai/whisper-tiny": WhisperBenchmark,
"Qwen/Qwen3-0.6B": TextGenerationBenchmark,
"mistralai/Mistral-7B-Instruct-v0.3": TextGenerationBenchmark,
"openai/gpt-oss-20b": TextGenerationBenchmark,
}

View File

@ -51,3 +51,5 @@ google/gemma-2-2b,8
google/gemma-3-4b-it,8
openai/whisper-tiny,8
Qwen/Qwen3-0.6B,8
mistralai/Mistral-7B-Instruct-v0.3, 8
openai/gpt-oss-20b, 8