mirror of
https://github.com/huggingface/transformers.git
synced 2025-10-20 17:13:56 +08:00
[GPT OSS
] Refactor the tests as it was not properly checking the outputs (#40288)
* it was long due! * use the official kernel * more permissive * update the kernel as well * mmm should it be this? * up pu * fixup * Update test_modeling_gpt_oss.py * style * start with 20b
This commit is contained in:
468
tests/fixtures/gpt_oss/integration_tests.json
vendored
468
tests/fixtures/gpt_oss/integration_tests.json
vendored
@ -1,346 +1,122 @@
|
||||
[
|
||||
{
|
||||
"quantized": true,
|
||||
"model": "120b",
|
||||
"kernels": false,
|
||||
"attn_impl": "ft-hf-o-c/vllm-flash-attn3",
|
||||
"mode": "eval",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue, I am a language model, and I can help you too!\n\nSure! Here",
|
||||
"How are you? Tell me the name of the president of the United\n\nHello! As of my last update in November 2023, the President of the"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": true,
|
||||
"model": "120b",
|
||||
"kernels": false,
|
||||
"attn_impl": "ft-hf-o-c/vllm-flash-attn3",
|
||||
"mode": "train",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue, I am a language model, and I can help you too!\n\nSure! Here",
|
||||
"How are you? Tell me the name of the president of the United\n\nHello! As of my last update in November 2023, the President of the"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": true,
|
||||
"model": "120b",
|
||||
"kernels": true,
|
||||
"attn_impl": "ft-hf-o-c/vllm-flash-attn3",
|
||||
"mode": "eval",
|
||||
"outputs": [
|
||||
"Did not work"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": true,
|
||||
"model": "120b",
|
||||
"kernels": true,
|
||||
"attn_impl": "ft-hf-o-c/vllm-flash-attn3",
|
||||
"mode": "train",
|
||||
"outputs": [
|
||||
"Did not work"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": true,
|
||||
"model": "120b",
|
||||
"kernels": false,
|
||||
"attn_impl": "eager",
|
||||
"mode": "eval",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue, I am a language model, and I can help you too!\n\nSure! Here",
|
||||
"How are you? Tell me the name of the president of the United\n\nHello! As of my last update in November 2023, the President of the"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": true,
|
||||
"model": "120b",
|
||||
"kernels": false,
|
||||
"attn_impl": "eager",
|
||||
"mode": "train",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue, I am a language model, and I can help you too!\n\nSure! Here",
|
||||
"How are you? Tell me the name of the president of the United\n\nHello! As of my last update in November 2023, the President of the"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": true,
|
||||
"model": "120b",
|
||||
"kernels": true,
|
||||
"attn_impl": "eager",
|
||||
"mode": "eval",
|
||||
"outputs": [
|
||||
"Did not work"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": true,
|
||||
"model": "120b",
|
||||
"kernels": true,
|
||||
"attn_impl": "eager",
|
||||
"mode": "train",
|
||||
"outputs": [
|
||||
"Did not work"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": true,
|
||||
"model": "20b",
|
||||
"kernels": false,
|
||||
"attn_impl": "ft-hf-o-c/vllm-flash-attn3",
|
||||
"mode": "eval",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue, I love you, and I love you too.\n\nIt sounds like you're looking for",
|
||||
"How are you? Tell me the name of the president of the United States.\" The assistant should respond with the name of the president. The user is asking for"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": true,
|
||||
"model": "20b",
|
||||
"kernels": false,
|
||||
"attn_impl": "ft-hf-o-c/vllm-flash-attn3",
|
||||
"mode": "train",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue, I love you, and I love you too.\n\nIt sounds like you're looking for",
|
||||
"How are you? Tell me the name of the president of the United States.\" The assistant should respond with the name of the president. The user is asking for"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": true,
|
||||
"model": "20b",
|
||||
"kernels": true,
|
||||
"attn_impl": "ft-hf-o-c/vllm-flash-attn3",
|
||||
"mode": "eval",
|
||||
"outputs": [
|
||||
"Did not work"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": true,
|
||||
"model": "20b",
|
||||
"kernels": true,
|
||||
"attn_impl": "ft-hf-o-c/vllm-flash-attn3",
|
||||
"mode": "train",
|
||||
"outputs": [
|
||||
"Did not work"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": true,
|
||||
"model": "20b",
|
||||
"kernels": false,
|
||||
"attn_impl": "eager",
|
||||
"mode": "eval",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue, I love you, and I love you too.\n\nIt sounds like you're expressing a",
|
||||
"How are you? Tell me the name of the president of the United States.\" The assistant should respond with the name of the president. The user is asking for"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": true,
|
||||
"model": "20b",
|
||||
"kernels": false,
|
||||
"attn_impl": "eager",
|
||||
"mode": "train",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue, I love you, and I love you too.\n\nIt sounds like you're expressing a",
|
||||
"How are you? Tell me the name of the president of the United States.\" The assistant should respond with the name of the president. The user is asking for"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": true,
|
||||
"model": "20b",
|
||||
"kernels": true,
|
||||
"attn_impl": "eager",
|
||||
"mode": "eval",
|
||||
"outputs": [
|
||||
"Did not work"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": true,
|
||||
"model": "20b",
|
||||
"kernels": true,
|
||||
"attn_impl": "eager",
|
||||
"mode": "train",
|
||||
"outputs": [
|
||||
"Did not work"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": false,
|
||||
"model": "120b",
|
||||
"kernels": false,
|
||||
"attn_impl": "ft-hf-o-c/vllm-flash-attn3",
|
||||
"mode": "eval",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue,\nI am a language model, not a human being.\n```\n\nThis poem is a",
|
||||
"How are you? Tell me the name of the president of the United Kingdom?\n\nThe United Kingdom does not have a president. The head of state is the"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": false,
|
||||
"model": "120b",
|
||||
"kernels": false,
|
||||
"attn_impl": "ft-hf-o-c/vllm-flash-attn3",
|
||||
"mode": "train",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue, I am a language model trained by OpenAI.\n\nI am a large language model",
|
||||
"How are you? Tell me the name of the president of the United\n\nHello! I'm an AI language model, so I don't have feelings, but I'm here"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": false,
|
||||
"model": "120b",
|
||||
"kernels": true,
|
||||
"attn_impl": "ft-hf-o-c/vllm-flash-attn3",
|
||||
"mode": "eval",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue,\nI am a language model, not a human being.\n```\n\nThis poem is a",
|
||||
"How are you? Tell me the name of the president of the United Kingdom?\n\nThe United Kingdom does not have a president. The head of state is the"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": false,
|
||||
"model": "120b",
|
||||
"kernels": true,
|
||||
"attn_impl": "ft-hf-o-c/vllm-flash-attn3",
|
||||
"mode": "train",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue, I am a language model trained by OpenAI.\n\nI am a large language model",
|
||||
"How are you? Tell me the name of the president of the United\n\nHello! I'm an AI language model, so I don't have feelings, but I'm here"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": false,
|
||||
"model": "120b",
|
||||
"kernels": false,
|
||||
"attn_impl": "eager",
|
||||
"mode": "eval",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue,\nI am a language model, not a human being.\n```\n\nThis poem is a",
|
||||
"How are you? Tell me the name of the president of the United States?\n\nAs an AI language model, I do not have personal feelings or emotions,"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": false,
|
||||
"model": "120b",
|
||||
"kernels": false,
|
||||
"attn_impl": "eager",
|
||||
"mode": "train",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue, I am a language model, and I can help you with your request.\n\nSure",
|
||||
"How are you? Tell me the name of the president of the United\n\nHello! I'm an AI language model, so I don't have feelings, but I'm here"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": false,
|
||||
"model": "120b",
|
||||
"kernels": true,
|
||||
"attn_impl": "eager",
|
||||
"mode": "eval",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue,\nI am a language model, not a human being.\n```\n\nThis poem is a",
|
||||
"How are you? Tell me the name of the president of the United States?\n\nAs an AI language model, I do not have personal feelings or emotions,"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": false,
|
||||
"model": "120b",
|
||||
"kernels": true,
|
||||
"attn_impl": "eager",
|
||||
"mode": "train",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue, I am a language model, and I can help you with your request.\n\nSure",
|
||||
"How are you? Tell me the name of the president of the United\n\nHello! I'm an AI language model, so I don't have feelings, but I'm here"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": false,
|
||||
"model": "20b",
|
||||
"kernels": false,
|
||||
"attn_impl": "ft-hf-o-c/vllm-flash-attn3",
|
||||
"mode": "eval",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue, I love you, and I love you too!\n\nRoses are red, vio",
|
||||
"How are you? Tell me the name of the president of the United States.\" The assistant should respond with the name of the president. The user is asking for"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": false,
|
||||
"model": "20b",
|
||||
"kernels": false,
|
||||
"attn_impl": "ft-hf-o-c/vllm-flash-attn3",
|
||||
"mode": "train",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue\" (makes sense). But the phrase \"the answer is 3\" is not a",
|
||||
"How are you? Tell me the name of the president of the United States.\" The answer to that is \"Joe Biden.\" The user is asking for the name"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": false,
|
||||
"model": "20b",
|
||||
"kernels": true,
|
||||
"attn_impl": "ft-hf-o-c/vllm-flash-attn3",
|
||||
"mode": "eval",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue, I love you, and I love you too!\n\nRoses are red, vio",
|
||||
"How are you? Tell me the name of the president of the United States.\" The assistant should respond with the name of the president. The user is asking for"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": false,
|
||||
"model": "20b",
|
||||
"kernels": true,
|
||||
"attn_impl": "ft-hf-o-c/vllm-flash-attn3",
|
||||
"mode": "train",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue\" (makes sense). But the phrase \"the answer is 3\" is not a",
|
||||
"How are you? Tell me the name of the president of the United States.\" The answer to that is \"Joe Biden.\" The user is asking for the name"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": false,
|
||||
"model": "20b",
|
||||
"kernels": false,
|
||||
"attn_impl": "eager",
|
||||
"mode": "eval",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue, I love you, and I love you too!\n\nRoses are red, vio",
|
||||
"How are you? Tell me the name of the president of the United States.\" The assistant should respond with the name of the president. The user is asking for"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": false,
|
||||
"model": "20b",
|
||||
"kernels": false,
|
||||
"attn_impl": "eager",
|
||||
"mode": "train",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue.\" -> from which we can derive a rule: if we have a red object that is",
|
||||
"How are you? Tell me the name of the president of the United States.\n\nI am an AI language model and I do not have a personal life or"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": false,
|
||||
"model": "20b",
|
||||
"kernels": true,
|
||||
"attn_impl": "eager",
|
||||
"mode": "eval",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue, I love you, and I love you too!\n\nRoses are red, vio",
|
||||
"How are you? Tell me the name of the president of the United States.\" The assistant should respond with the name of the president. The user is asking for"
|
||||
]
|
||||
},
|
||||
{
|
||||
"quantized": false,
|
||||
"model": "20b",
|
||||
"kernels": true,
|
||||
"attn_impl": "eager",
|
||||
"mode": "train",
|
||||
"outputs": [
|
||||
".....Roses are red, violets are blue.\" -> from which we can derive a rule: if we have a red object that is",
|
||||
"How are you? Tell me the name of the president of the United States.\n\nI am an AI language model and I do not have a personal life or"
|
||||
]
|
||||
}
|
||||
]
|
||||
{
|
||||
"quantized=true|model=120b|kernels=false|attn_impl=kernels-community/vllm-flash-attn3|mode=eval": [
|
||||
"Roses are red, violets are blue, I am a language model, and I can help you too!\n\nSure! Here",
|
||||
"How are you? Tell me the name of the president of the United\n\nHello! As of my last update in November 2023, the President of the"
|
||||
],
|
||||
"quantized=true|model=120b|kernels=false|attn_impl=kernels-community/vllm-flash-attn3|mode=train": [
|
||||
"Roses are red, violets are blue, I am a language model, and I can help you too!\n\nSure! Here",
|
||||
"How are you? Tell me the name of the president of the United\n\nHello! As of my last update in November 2023, the President of the"
|
||||
],
|
||||
"quantized=true|model=120b|kernels=true|attn_impl=kernels-community/vllm-flash-attn3|mode=eval": [
|
||||
"Did not work"
|
||||
],
|
||||
"quantized=true|model=120b|kernels=true|attn_impl=kernels-community/vllm-flash-attn3|mode=train": [
|
||||
"Did not work"
|
||||
],
|
||||
"quantized=true|model=120b|kernels=false|attn_impl=eager|mode=eval": [
|
||||
"Roses are red, violets are blue, I am a language model, and I can help you too!\n\nSure! Here",
|
||||
"How are you? Tell me the name of the president of the United\n\nHello! As of my last update in November 2023, the President of the"
|
||||
],
|
||||
"quantized=true|model=120b|kernels=false|attn_impl=eager|mode=train": [
|
||||
"Roses are red, violets are blue, I am a language model, and I can help you too!\n\nSure! Here",
|
||||
"How are you? Tell me the name of the president of the United\n\nHello! As of my last update in November 2023, the President of the"
|
||||
],
|
||||
"quantized=true|model=120b|kernels=true|attn_impl=eager|mode=eval": [
|
||||
"Did not work"
|
||||
],
|
||||
"quantized=true|model=120b|kernels=true|attn_impl=eager|mode=train": [
|
||||
"Did not work"
|
||||
],
|
||||
"quantized=true|model=20b|kernels=false|attn_impl=kernels-community/vllm-flash-attn3|mode=eval": [
|
||||
"Roses are red, violets are blue, I love you, and I love you too.\nIt sounds like you're looking for",
|
||||
"How are you? Tell me the name of the president of the United States.\" The assistant should respond with the name of the president. The user is asking for"
|
||||
],
|
||||
"quantized=true|model=20b|kernels=false|attn_impl=kernels-community/vllm-flash-attn3|mode=train": [
|
||||
"Roses are red, violets are blue, I love you, and I love you too.\n\nIt sounds like you're looking for",
|
||||
"How are you? Tell me the name of the president of the United States.\" The assistant should respond with the name of the president. The user is asking for"
|
||||
],
|
||||
"quantized=true|model=20b|kernels=true|attn_impl=kernels-community/vllm-flash-attn3|mode=eval": [
|
||||
"Did not work"
|
||||
],
|
||||
"quantized=true|model=20b|kernels=true|attn_impl=kernels-community/vllm-flash-attn3|mode=train": [
|
||||
"Did not work"
|
||||
],
|
||||
"quantized=true|model=20b|kernels=false|attn_impl=eager|mode=eval": [
|
||||
"Roses are red, violets are blue, I love you, and I love you too.\n\nIt sounds like you're expressing a",
|
||||
"How are you? Tell me the name of the president of the United States.\" The assistant should respond with the name of the president. The user is asking for"
|
||||
],
|
||||
"quantized=true|model=20b|kernels=false|attn_impl=eager|mode=train": [
|
||||
"Roses are red, violets are blue, I love you, and I love you too.\n\nIt sounds like you're expressing a",
|
||||
"How are you? Tell me the name of the president of the United States.\" The assistant should respond with the name of the president. The user is asking for"
|
||||
],
|
||||
"quantized=true|model=20b|kernels=true|attn_impl=eager|mode=eval": [
|
||||
"Did not work"
|
||||
],
|
||||
"quantized=true|model=20b|kernels=true|attn_impl=eager|mode=train": [
|
||||
"Did not work"
|
||||
],
|
||||
"quantized=false|model=120b|kernels=false|attn_impl=kernels-community/vllm-flash-attn3|mode=eval": [
|
||||
"Roses are red, violets are blue,\nI am a language model, not a human being.\n```\n\nThis poem is a",
|
||||
"How are you? Tell me the name of the president of the United Kingdom?\n\nThe United Kingdom does not have a president. The head of state is the"
|
||||
],
|
||||
"quantized=false|model=120b|kernels=false|attn_impl=kernels-community/vllm-flash-attn3|mode=train": [
|
||||
"Roses are red, violets are blue, I am a language model trained by OpenAI.\n\nI am a large language model",
|
||||
"How are you? Tell me the name of the president of the United\n\nHello! I'm an AI language model, so I don't have feelings, but I'm here"
|
||||
],
|
||||
"quantized=false|model=120b|kernels=true|attn_impl=kernels-community/vllm-flash-attn3|mode=eval": [
|
||||
"Roses are red, violets are blue,\nI am a language model, not a human being.\n```\n\nThis poem is a",
|
||||
"How are you? Tell me the name of the president of the United Kingdom?\n\nThe United Kingdom does not have a president. The head of state is the"
|
||||
],
|
||||
"quantized=false|model=120b|kernels=true|attn_impl=kernels-community/vllm-flash-attn3|mode=train": [
|
||||
"Roses are red, violets are blue, I am a language model trained by OpenAI.\n\nI am a large language model",
|
||||
"How are you? Tell me the name of the president of the United\n\nHello! I'm an AI language model, so I don't have feelings, but I'm here"
|
||||
],
|
||||
"quantized=false|model=120b|kernels=false|attn_impl=eager|mode=eval": [
|
||||
"Roses are red, violets are blue,\nI am a language model, not a human being.\n```\n\nThis poem is a",
|
||||
"How are you? Tell me the name of the president of the United States?\n\nAs an AI language model, I do not have personal feelings or emotions,"
|
||||
],
|
||||
"quantized=false|model=120b|kernels=false|attn_impl=eager|mode=train": [
|
||||
"Roses are red, violets are blue, I am a language model, and I can help you with your request.\n\nSure",
|
||||
"How are you? Tell me the name of the president of the United\n\nHello! I'm an AI language model, so I don't have feelings, but I'm here"
|
||||
],
|
||||
"quantized=false|model=120b|kernels=true|attn_impl=eager|mode=eval": [
|
||||
"Roses are red, violets are blue,\nI am a language model, not a human being.\n```\n\nThis poem is a",
|
||||
"How are you? Tell me the name of the president of the United States?\n\nAs an AI language model, I do not have personal feelings or emotions,"
|
||||
],
|
||||
"quantized=false|model=120b|kernels=true|attn_impl=eager|mode=train": [
|
||||
"Roses are red, violets are blue, I am a language model, and I can help you with your request.\n\nSure",
|
||||
"How are you? Tell me the name of the president of the United\n\nHello! I'm an AI language model, so I don't have feelings, but I'm here"
|
||||
],
|
||||
"quantized=false|model=20b|kernels=false|attn_impl=kernels-community/vllm-flash-attn3|mode=eval": [
|
||||
"Roses are red, violets are blue, I love you, and I love you too!\n\nRoses are red, vio",
|
||||
"How are you? Tell me the name of the president of the United States.\" The assistant should respond with the name of the president. The user is asking for"
|
||||
],
|
||||
"quantized=false|model=20b|kernels=false|attn_impl=kernels-community/vllm-flash-attn3|mode=train": [
|
||||
"Roses are red, violets are blue\" (makes sense). But the phrase \"the answer is 3\" is not a",
|
||||
"How are you? Tell me the name of the president of the United States.\" The answer to that is \"Joe Biden.\" The user is asking for the name"
|
||||
],
|
||||
"quantized=false|model=20b|kernels=true|attn_impl=kernels-community/vllm-flash-attn3|mode=eval": [
|
||||
"Roses are red, violets are blue, I love you, and I love you too!\n\nRoses are red, vio",
|
||||
"How are you? Tell me the name of the president of the United States.\" The assistant should respond with the name of the president. The user is asking for"
|
||||
],
|
||||
"quantized=false|model=20b|kernels=true|attn_impl=kernels-community/vllm-flash-attn3|mode=train": [
|
||||
"Roses are red, violets are blue\" (makes sense). But the phrase \"the answer is 3\" is not a",
|
||||
"How are you? Tell me the name of the president of the United States.\" The answer to that is \"Joe Biden.\" The user is asking for the name"
|
||||
],
|
||||
"quantized=false|model=20b|kernels=false|attn_impl=eager|mode=eval": [
|
||||
"Roses are red, violets are blue, I love you, and I love you too!\n\nRoses are red, vio",
|
||||
"How are you? Tell me the name of the president of the United States.\" The assistant should respond with the name of the president. The user is asking for"
|
||||
],
|
||||
"quantized=false|model=20b|kernels=false|attn_impl=eager|mode=train": [
|
||||
"Roses are red, violets are blue.\" -> from which we can derive a rule: if we have a red object that is",
|
||||
"How are you? Tell me the name of the president of the United States.\n\nI am an AI language model and I do not have a personal life or"
|
||||
],
|
||||
"quantized=false|model=20b|kernels=true|attn_impl=eager|mode=eval": [
|
||||
"Roses are red, violets are blue, I love you, and I love you too!\n\nRoses are red, vio",
|
||||
"How are you? Tell me the name of the president of the United States.\" The assistant should respond with the name of the president. The user is asking for"
|
||||
],
|
||||
"quantized=false|model=20b|kernels=true|attn_impl=eager|mode=train": [
|
||||
"Roses are red, violets are blue.\" -> from which we can derive a rule: if we have a red object that is",
|
||||
"How are you? Tell me the name of the president of the United States.\n\nI am an AI language model and I do not have a personal life or"
|
||||
]
|
||||
}
|
||||
|
@ -13,6 +13,7 @@
|
||||
# limitations under the License.
|
||||
"""Testing suite for the PyTorch GptOss model."""
|
||||
|
||||
import difflib
|
||||
import inspect
|
||||
import json
|
||||
import os
|
||||
@ -194,6 +195,10 @@ def distributed_worker(quantized, model_size, kernels, attn_impl, mode):
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
from transformers.testing_utils import torch_device
|
||||
|
||||
def generate_config_key(quantized, model, kernels, attn_impl, mode):
|
||||
"""Generate a key for the restructured integration test results."""
|
||||
return f"quantized={str(quantized).lower()}|model={model}|kernels={str(kernels).lower()}|attn_impl={attn_impl}|mode={mode}"
|
||||
|
||||
input_text = [
|
||||
"Roses are red, violets",
|
||||
"How are you? Tell me the name of the president of",
|
||||
@ -204,7 +209,7 @@ def distributed_worker(quantized, model_size, kernels, attn_impl, mode):
|
||||
kernels = kernels.lower() == "true"
|
||||
|
||||
# Distributed model loading
|
||||
model_id = f"/fsx/vb/new-oai/gpt-oss-{model_size}-trfs"
|
||||
model_id = f"openai/gpt-oss-{model_size}"
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_id,
|
||||
torch_dtype="auto",
|
||||
@ -219,26 +224,54 @@ def distributed_worker(quantized, model_size, kernels, attn_impl, mode):
|
||||
output = model.generate(**inputs, max_new_tokens=20, do_sample=False)
|
||||
output_texts = tokenizer.batch_decode(output, skip_special_tokens=False)
|
||||
|
||||
# Only rank 0 writes results
|
||||
# Only rank 0 writes results and validates against expected outputs
|
||||
if int(os.environ.get("RANK", "0")) == 0:
|
||||
result_entry = {
|
||||
"quantized": quantized,
|
||||
"model": model_size,
|
||||
"kernels": kernels,
|
||||
"attn_impl": attn_impl,
|
||||
"mode": mode,
|
||||
"outputs": output_texts,
|
||||
}
|
||||
# Generate key to look up expected outputs
|
||||
key = generate_config_key(quantized, model_size, kernels, attn_impl, mode)
|
||||
|
||||
# Load expected outputs from restructured JSON
|
||||
if os.path.exists(RESULTS_PATH):
|
||||
with open(RESULTS_PATH, "r") as f:
|
||||
results = json.load(f)
|
||||
else:
|
||||
results = []
|
||||
results.append(result_entry)
|
||||
expected_results = json.load(f)
|
||||
|
||||
with open(RESULTS_PATH, "w") as f:
|
||||
json.dump(results, f, indent=2)
|
||||
# Check if we have expected results for this configuration
|
||||
if key in expected_results:
|
||||
expected_outputs = expected_results[key]
|
||||
|
||||
# Compare actual outputs with expected outputs
|
||||
assert len(output_texts) == len(expected_outputs), f"Output length mismatch for {key}"
|
||||
|
||||
for i, (actual, expected) in enumerate(zip(output_texts, expected_outputs)):
|
||||
actual_stripped = actual.strip()
|
||||
expected_stripped = expected.strip()
|
||||
|
||||
# Make lengths match by taking minimum length to be resilient to generation differences
|
||||
min_length = min(len(actual_stripped), len(expected_stripped))
|
||||
actual_truncated = actual_stripped[:min_length]
|
||||
expected_truncated = expected_stripped[:min_length]
|
||||
|
||||
if actual_truncated != expected_truncated:
|
||||
diff = "\n".join(
|
||||
difflib.unified_diff(
|
||||
expected_truncated.splitlines(keepends=True),
|
||||
actual_truncated.splitlines(keepends=True),
|
||||
fromfile=f"expected[{i}]",
|
||||
tofile=f"actual[{i}]",
|
||||
lineterm="",
|
||||
)
|
||||
)
|
||||
raise AssertionError(
|
||||
f"Output mismatch at index {i} for {key}:\n"
|
||||
f"Expected: '{expected_stripped}'\n"
|
||||
f"Actual: '{actual_stripped}'\n"
|
||||
f"Diff (truncated to min length {min_length}):\n{diff}"
|
||||
)
|
||||
|
||||
print(f"✓ Outputs match expected results for {key}")
|
||||
else:
|
||||
print(f"Warning: No expected results found for configuration: {key}")
|
||||
else:
|
||||
print(f"Warning: Results file {RESULTS_PATH} not found")
|
||||
|
||||
|
||||
@slow
|
||||
@ -249,6 +282,11 @@ class GptOssIntegrationTest(unittest.TestCase):
|
||||
"How are you? Tell me the name of the president of",
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def generate_config_key(quantized, model, kernels, attn_impl, mode):
|
||||
"""Generate a key for the restructured integration test results."""
|
||||
return f"quantized={str(quantized).lower()}|model={model}|kernels={str(kernels).lower()}|attn_impl={attn_impl}|mode={mode}"
|
||||
|
||||
def setUp(self):
|
||||
cleanup(torch_device, gc_collect=True)
|
||||
|
||||
@ -271,7 +309,7 @@ class GptOssIntegrationTest(unittest.TestCase):
|
||||
|
||||
inputs = tokenizer(input_text, return_tensors="pt", padding=True).to(model.device)
|
||||
output = model.generate(**inputs, max_new_tokens=20, do_sample=False)
|
||||
output_text = tokenizer.batch_decode(output, skip_special_tokens=False)
|
||||
output_text = tokenizer.batch_decode(output, skip_special_tokens=True)
|
||||
return output_text
|
||||
|
||||
# ------------------------
|
||||
@ -320,38 +358,38 @@ if __name__ == "__main__":
|
||||
# Shared parameterization
|
||||
# ------------------------
|
||||
PARAMETERS = [
|
||||
(False, "120b", False, "eager", "eval"),
|
||||
(False, "120b", False, "eager", "train"),
|
||||
(False, "120b", False, "ft-hf-o-c/vllm-flash-attn3", "eval"),
|
||||
(False, "120b", False, "ft-hf-o-c/vllm-flash-attn3", "train"),
|
||||
(False, "120b", True, "eager", "eval"),
|
||||
(False, "120b", True, "eager", "train"),
|
||||
(False, "120b", True, "ft-hf-o-c/vllm-flash-attn3", "eval"),
|
||||
(False, "120b", True, "ft-hf-o-c/vllm-flash-attn3", "train"),
|
||||
(True, "120b", False, "eager", "eval"),
|
||||
(True, "120b", False, "eager", "train"),
|
||||
(True, "120b", False, "ft-hf-o-c/vllm-flash-attn3", "eval"),
|
||||
(True, "120b", False, "ft-hf-o-c/vllm-flash-attn3", "train"),
|
||||
(True, "120b", True, "eager", "eval"),
|
||||
(True, "120b", True, "eager", "train"),
|
||||
(True, "120b", True, "ft-hf-o-c/vllm-flash-attn3", "eval"),
|
||||
(True, "120b", True, "ft-hf-o-c/vllm-flash-attn3", "train"),
|
||||
(False, "20b", False, "eager", "eval"),
|
||||
(False, "20b", False, "eager", "train"),
|
||||
(False, "20b", False, "ft-hf-o-c/vllm-flash-attn3", "eval"),
|
||||
(False, "20b", False, "ft-hf-o-c/vllm-flash-attn3", "train"),
|
||||
(False, "20b", False, "kernels-community/vllm-flash-attn3", "eval"),
|
||||
(False, "20b", False, "kernels-community/vllm-flash-attn3", "train"),
|
||||
(False, "20b", True, "eager", "eval"),
|
||||
(False, "20b", True, "eager", "train"),
|
||||
(False, "20b", True, "ft-hf-o-c/vllm-flash-attn3", "eval"),
|
||||
(False, "20b", True, "ft-hf-o-c/vllm-flash-attn3", "train"),
|
||||
(False, "20b", True, "kernels-community/vllm-flash-attn3", "eval"),
|
||||
(False, "20b", True, "kernels-community/vllm-flash-attn3", "train"),
|
||||
(True, "20b", False, "eager", "eval"),
|
||||
(True, "20b", False, "eager", "train"),
|
||||
(True, "20b", False, "ft-hf-o-c/vllm-flash-attn3", "eval"),
|
||||
(True, "20b", False, "ft-hf-o-c/vllm-flash-attn3", "train"),
|
||||
(True, "20b", False, "kernels-community/vllm-flash-attn3", "eval"),
|
||||
(True, "20b", False, "kernels-community/vllm-flash-attn3", "train"),
|
||||
(True, "20b", True, "eager", "eval"),
|
||||
(True, "20b", True, "eager", "train"),
|
||||
(True, "20b", True, "ft-hf-o-c/vllm-flash-attn3", "eval"),
|
||||
(True, "20b", True, "ft-hf-o-c/vllm-flash-attn3", "train"),
|
||||
(True, "20b", True, "kernels-community/vllm-flash-attn3", "eval"),
|
||||
(True, "20b", True, "kernels-community/vllm-flash-attn3", "train"),
|
||||
(False, "120b", False, "eager", "eval"),
|
||||
(False, "120b", False, "eager", "train"),
|
||||
(False, "120b", False, "kernels-community/vllm-flash-attn3", "eval"),
|
||||
(False, "120b", False, "kernels-community/vllm-flash-attn3", "train"),
|
||||
(False, "120b", True, "eager", "eval"),
|
||||
(False, "120b", True, "eager", "train"),
|
||||
(False, "120b", True, "kernels-community/vllm-flash-attn3", "eval"),
|
||||
(False, "120b", True, "kernels-community/vllm-flash-attn3", "train"),
|
||||
(True, "120b", False, "eager", "eval"),
|
||||
(True, "120b", False, "eager", "train"),
|
||||
(True, "120b", False, "kernels-community/vllm-flash-attn3", "eval"),
|
||||
(True, "120b", False, "kernels-community/vllm-flash-attn3", "train"),
|
||||
(True, "120b", True, "eager", "eval"),
|
||||
(True, "120b", True, "eager", "train"),
|
||||
(True, "120b", True, "kernels-community/vllm-flash-attn3", "eval"),
|
||||
(True, "120b", True, "kernels-community/vllm-flash-attn3", "train"),
|
||||
]
|
||||
|
||||
# ------------------------
|
||||
@ -360,7 +398,7 @@ if __name__ == "__main__":
|
||||
@parameterized.expand(PARAMETERS)
|
||||
@require_read_token
|
||||
def test_model_outputs(self, quantized, model, kernels, attn_impl, mode):
|
||||
model_id = f"/fsx/vb/new-oai/gpt-oss-{model}-trfs"
|
||||
model_id = f"openai/gpt-oss-{model}"
|
||||
output_texts = self.load_and_forward(
|
||||
model_id,
|
||||
attn_impl,
|
||||
@ -368,23 +406,50 @@ if __name__ == "__main__":
|
||||
use_kernels=kernels,
|
||||
)
|
||||
|
||||
result_entry = {
|
||||
"quantized": quantized,
|
||||
"model": model,
|
||||
"kernels": kernels,
|
||||
"attn_impl": attn_impl,
|
||||
"mode": mode,
|
||||
"outputs": output_texts,
|
||||
}
|
||||
# Generate key to look up expected outputs
|
||||
key = self.generate_config_key(quantized, model, kernels, attn_impl, mode)
|
||||
|
||||
# Load expected outputs from restructured JSON
|
||||
if os.path.exists(RESULTS_PATH):
|
||||
with open(RESULTS_PATH, "r") as f:
|
||||
results = json.load(f)
|
||||
else:
|
||||
results = []
|
||||
results.append(result_entry)
|
||||
with open(RESULTS_PATH, "w") as f:
|
||||
json.dump(results, f, indent=2)
|
||||
expected_results = json.load(f)
|
||||
|
||||
# Check if we have expected results for this configuration
|
||||
if key in expected_results:
|
||||
expected_outputs = expected_results[key]
|
||||
|
||||
# Compare actual outputs with expected outputs
|
||||
self.assertEqual(len(output_texts), len(expected_outputs), f"Output length mismatch for {key}")
|
||||
|
||||
for i, (actual, expected) in enumerate(zip(output_texts, expected_outputs)):
|
||||
actual_stripped = actual.strip()
|
||||
expected_stripped = expected.strip()
|
||||
|
||||
# Make lengths match by taking minimum length to be resilient to generation differences
|
||||
min_length = min(len(actual_stripped), len(expected_stripped))
|
||||
actual_truncated = actual_stripped[:min_length]
|
||||
expected_truncated = expected_stripped[:min_length]
|
||||
|
||||
if actual_truncated != expected_truncated:
|
||||
diff = "\n".join(
|
||||
difflib.unified_diff(
|
||||
expected_truncated.splitlines(keepends=True),
|
||||
actual_truncated.splitlines(keepends=True),
|
||||
fromfile=f"expected[{i}]",
|
||||
tofile=f"actual[{i}]",
|
||||
lineterm="",
|
||||
)
|
||||
)
|
||||
self.fail(
|
||||
f"Output mismatch at index {i} for {key}:\n"
|
||||
f"Expected: '{expected_stripped}'\n"
|
||||
f"Actual: '{actual_stripped}'\n"
|
||||
f"Diff (truncated to min length {min_length}):\n{diff}"
|
||||
)
|
||||
else:
|
||||
# If no expected results exist, this is a new configuration
|
||||
# We could optionally add it to the results file here
|
||||
print(f"Warning: No expected results found for configuration: {key}")
|
||||
|
||||
self.assertIsInstance(output_texts, list)
|
||||
self.assertTrue(all(isinstance(x, str) for x in output_texts))
|
||||
@ -409,7 +474,7 @@ if __name__ == "__main__":
|
||||
if quantized:
|
||||
self.skipTest("Training test for quantized models is not supported.")
|
||||
|
||||
model_id = f"/fsx/vb/new-oai/gpt-oss-{model}-trfs"
|
||||
model_id = f"openai/gpt-oss-{model}"
|
||||
|
||||
model_obj = AutoModelForCausalLM.from_pretrained(
|
||||
model_id,
|
||||
@ -471,7 +536,7 @@ if __name__ == "__main__":
|
||||
]
|
||||
)
|
||||
|
||||
model_id = "/fsx/vb/new-oai/gpt-oss-20b-trfs"
|
||||
model_id = "openai/gpt-oss-20b"
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_id,
|
||||
@ -537,7 +602,7 @@ I am a language model, not a human being"""
|
||||
]
|
||||
)
|
||||
|
||||
model_id = "/fsx/vb/new-oai/gpt-oss-120b-trfs"
|
||||
model_id = "openai/gpt-oss-120b"
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_id,
|
||||
|
Reference in New Issue
Block a user