mirror of
https://github.com/huggingface/transformers.git
synced 2025-10-20 17:13:56 +08:00
Compare commits
4 Commits
3c7552f733
...
fix_a10_ci
Author | SHA1 | Date | |
---|---|---|---|
578a3d8c11 | |||
9c4e899938 | |||
ef72b838e3 | |||
e62543f6df |
@ -13,7 +13,6 @@
|
||||
# limitations under the License.
|
||||
"""Testing suite for the PyTorch Aria model."""
|
||||
|
||||
import gc
|
||||
import unittest
|
||||
|
||||
import requests
|
||||
@ -32,7 +31,7 @@ from transformers import (
|
||||
from transformers.models.idefics3 import Idefics3VisionConfig
|
||||
from transformers.testing_utils import (
|
||||
Expectations,
|
||||
backend_empty_cache,
|
||||
cleanup,
|
||||
require_bitsandbytes,
|
||||
require_torch,
|
||||
require_torch_large_accelerator,
|
||||
@ -252,14 +251,23 @@ class AriaForConditionalGenerationModelTest(ModelTesterMixin, GenerationTesterMi
|
||||
pass
|
||||
|
||||
|
||||
SKIP = False
|
||||
torch_accelerator_module = getattr(torch, torch_device)
|
||||
memory = 23 # skip on T4 / A10
|
||||
if hasattr(torch_accelerator_module, "get_device_properties"):
|
||||
if torch_accelerator_module.get_device_properties(0).total_memory / 1024**3 < memory:
|
||||
SKIP = True
|
||||
|
||||
|
||||
@unittest.skipIf(SKIP, reason="A10 doesn't have enough GPU memory for this tests")
|
||||
@require_torch
|
||||
class AriaForConditionalGenerationIntegrationTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.processor = AutoProcessor.from_pretrained("rhymes-ai/Aria")
|
||||
cleanup(torch_device, gc_collect=True)
|
||||
|
||||
def tearDown(self):
|
||||
gc.collect()
|
||||
backend_empty_cache(torch_device)
|
||||
cleanup(torch_device, gc_collect=True)
|
||||
|
||||
@slow
|
||||
@require_torch_large_accelerator
|
||||
|
@ -165,7 +165,8 @@ class Cohere2IntegrationTest(unittest.TestCase):
|
||||
EXPECTED_TEXTS = Expectations(
|
||||
{
|
||||
("xpu", 3): ["<BOS_TOKEN>Hello I am doing a project for my school and I need to create a website for a fictional company. I have the", "<PAD><PAD><BOS_TOKEN>Hi today I'm going to show you how to make a simple and easy to make a chocolate cake.\n"],
|
||||
("cuda", 7): ["<BOS_TOKEN>Hello I am doing a project for a school assignment and I need to create a website for a fictional company. I have", "<PAD><PAD><BOS_TOKEN>Hi today I'm going to show you how to make a simple and easy to make a chocolate cake.\n",],
|
||||
(None, None): ["<BOS_TOKEN>Hello I am doing a project for a school assignment and I need to create a website for a fictional company. I have", "<PAD><PAD><BOS_TOKEN>Hi today I'm going to show you how to make a simple and easy to make a chocolate cake.\n"],
|
||||
("cuda", 8): ['<BOS_TOKEN>Hello I am doing a project for my school and I need to create a website for a fictional company. I have the', "<PAD><PAD><BOS_TOKEN>Hi today I'm going to show you how to make a simple and easy to make a chocolate cake.\n"],
|
||||
}
|
||||
)
|
||||
EXPECTED_TEXT = EXPECTED_TEXTS.get_expectation()
|
||||
@ -238,7 +239,8 @@ class Cohere2IntegrationTest(unittest.TestCase):
|
||||
EXPECTED_TEXT_COMPLETIONS = Expectations(
|
||||
{
|
||||
("xpu", 3): ["Hello I am doing a project for a friend and I am stuck on a few things. I have a 2004 Ford F-"],
|
||||
("cuda", 7): ["Hello I am doing a project on the effects of social media on mental health. I have a few questions. 1. What is the relationship",],
|
||||
(None, None): ["Hello I am doing a project on the effects of social media on mental health. I have a few questions. 1. What is the relationship"],
|
||||
("cuda", 8): ['Hello I am doing a project for a friend and I am stuck on a few things. I have a 2004 Ford F-'],
|
||||
}
|
||||
)
|
||||
EXPECTED_TEXT_COMPLETION = EXPECTED_TEXT_COMPLETIONS.get_expectation()
|
||||
@ -290,24 +292,31 @@ class Cohere2IntegrationTest(unittest.TestCase):
|
||||
if attn_implementation == "flash_attention_2" and not is_flash_attn_2_available():
|
||||
self.skipTest("FlashAttention2 is required for this test.")
|
||||
|
||||
# TODO: if we can specify not to compile when `flex` attention is used?
|
||||
if attn_implementation == "flex_attention":
|
||||
self.skipTest(
|
||||
"Flex attention will compile (see `compile_friendly_flex_attention`) which causes triton issue."
|
||||
)
|
||||
|
||||
if torch_device == "xpu" and attn_implementation == "flash_attention_2":
|
||||
self.skipTest(reason="Intel XPU doesn't support falsh_attention_2 as of now.")
|
||||
|
||||
model_id = "CohereForAI/c4ai-command-r7b-12-2024"
|
||||
EXPECTED_COMPLETIONS = [
|
||||
" the mountains, the lakes, the rivers, the waterfalls, the waterfalls, the waterfalls, the waterfalls",
|
||||
" the mountains, the lakes, the rivers, the forests, the trees, the birds, the animals",
|
||||
", green, yellow, orange, purple, pink, brown, black, white, grey, silver",
|
||||
]
|
||||
|
||||
input_text = [
|
||||
"This is a nice place. " * 800 + "I really enjoy the scenery,", # This is larger than 4096 tokens
|
||||
"This is a nice place. " * 200 + "I really enjoy the scenery,", # This is larger than 1024 tokens
|
||||
"A list of colors: red, blue", # This will almost all be padding tokens
|
||||
]
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id, padding="left")
|
||||
inputs = tokenizer(input_text, padding=True, return_tensors="pt").to(torch_device)
|
||||
|
||||
# We use `sliding_window=1024` instead of the origin value `4096` in the config to avoid GPU OOM
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_id, attn_implementation=attn_implementation, torch_dtype=torch.float16
|
||||
model_id, attn_implementation=attn_implementation, torch_dtype=torch.float16, sliding_window=1024
|
||||
).to(torch_device)
|
||||
|
||||
# Make sure prefill is larger than sliding window
|
||||
|
@ -211,6 +211,12 @@ class Data2VecVisionModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Te
|
||||
def test_config(self):
|
||||
self.config_tester.run_common_tests()
|
||||
|
||||
@unittest.skip(
|
||||
reason="Will fix only if requested by the community: it fails with `torch._dynamo.exc.InternalTorchDynamoError: IndexError: list index out of range`. Without compile, the test pass."
|
||||
)
|
||||
def test_sdpa_can_compile_dynamic(self):
|
||||
pass
|
||||
|
||||
@unittest.skip(reason="Data2VecVision does not use inputs_embeds")
|
||||
def test_inputs_embeds(self):
|
||||
pass
|
||||
|
@ -220,6 +220,10 @@ class DepthProModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
|
||||
def test_config(self):
|
||||
self.config_tester.run_common_tests()
|
||||
|
||||
@unittest.skip(reason="Inductor error: name 'OpaqueUnaryFn_log2' is not defined")
|
||||
def test_sdpa_can_compile_dynamic(self):
|
||||
pass
|
||||
|
||||
@unittest.skip(reason="DepthPro does not use inputs_embeds")
|
||||
def test_inputs_embeds(self):
|
||||
pass
|
||||
|
@ -115,9 +115,12 @@ class GemmaIntegrationTest(unittest.TestCase):
|
||||
def setUpClass(cls):
|
||||
cls.device_properties = get_device_properties()
|
||||
|
||||
def setUp(self):
|
||||
cleanup(torch_device, gc_collect=True)
|
||||
|
||||
def tearDown(self):
|
||||
# See LlamaIntegrationTest.tearDown(). Can be removed once LlamaIntegrationTest.tearDown() is removed.
|
||||
cleanup(torch_device, gc_collect=False)
|
||||
cleanup(torch_device, gc_collect=True)
|
||||
|
||||
@require_read_token
|
||||
def test_model_2b_fp16(self):
|
||||
@ -276,7 +279,7 @@ class GemmaIntegrationTest(unittest.TestCase):
|
||||
EXPECTED_TEXTS = Expectations(
|
||||
{
|
||||
("cuda", 7): ["""Hello I am doing a project on a 1991 240sx and I am trying to find""", "Hi today I am going to show you how to make a very simple and easy to make a very simple and",],
|
||||
("cuda", 8): ["Hello I am doing a project for my school and I am trying to make a program that will read a .txt file", "Hi today I am going to show you how to make a very simple and easy to make a very simple and",],
|
||||
("cuda", 8): ['Hello I am doing a project for my school and I am trying to make a game in which you have to get a', 'Hi today I am going to show you how to make a very simple and easy to make a very simple and'],
|
||||
("rocm", 9): ["Hello I am doing a project for my school and I am trying to get a servo to move a certain amount of degrees", "Hi today I am going to show you how to make a very simple and easy to make DIY light up sign",],
|
||||
}
|
||||
)
|
||||
@ -298,10 +301,20 @@ class GemmaIntegrationTest(unittest.TestCase):
|
||||
self.skipTest("This test is failing (`torch.compile` fails) on Nvidia T4 GPU (OOM).")
|
||||
|
||||
model_id = "google/gemma-7b"
|
||||
EXPECTED_TEXTS = [
|
||||
"""Hello I am doing a project on a 1999 4.0L 4x4. I""",
|
||||
"Hi today I am going to show you how to make a simple and easy to make a DIY 3D",
|
||||
]
|
||||
|
||||
expectations = Expectations(
|
||||
{
|
||||
(None, None): [
|
||||
"Hello I am doing a project on a 1999 4.0L 4x4. I",
|
||||
"Hi today I am going to show you how to make a simple and easy to make a DIY 3D",
|
||||
],
|
||||
("cuda", 8): [
|
||||
"Hello I am doing a project on a 1995 3000gt SL. I have a",
|
||||
"Hi today I am going to show you how to make a simple and easy to make a DIY 3D",
|
||||
],
|
||||
}
|
||||
)
|
||||
EXPECTED_TEXTS = expectations.get_expectation()
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16).to(torch_device)
|
||||
|
||||
@ -317,10 +330,20 @@ class GemmaIntegrationTest(unittest.TestCase):
|
||||
@require_read_token
|
||||
def test_model_7b_4bit(self):
|
||||
model_id = "google/gemma-7b"
|
||||
EXPECTED_TEXTS = [
|
||||
"Hello I am doing a project for my school and I am trying to make a program that will take a number and then",
|
||||
"Hi today I am going to talk about the best way to get rid of acne. miniaturing is a very",
|
||||
]
|
||||
|
||||
expectations = Expectations(
|
||||
{
|
||||
(None, None): [
|
||||
"Hello I am doing a project for my school and I am trying to make a program that will take a number and then",
|
||||
"Hi today I am going to talk about the best way to get rid of acne. miniaturing is a very",
|
||||
],
|
||||
("cuda", 8): [
|
||||
"Hello I am doing a project for my school and I am trying to make a program that will take a number and then",
|
||||
'Hi today I am going to talk about the new update for the game called "The new update!:)!:)!:)',
|
||||
],
|
||||
}
|
||||
)
|
||||
EXPECTED_TEXTS = expectations.get_expectation()
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True)
|
||||
|
||||
@ -382,9 +405,19 @@ class GemmaIntegrationTest(unittest.TestCase):
|
||||
)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b", pad_token="</s>", padding_side="right")
|
||||
EXPECTED_TEXT_COMPLETION = [
|
||||
"Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found",
|
||||
]
|
||||
|
||||
expectations = Expectations(
|
||||
{
|
||||
(None, None): [
|
||||
"Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found"
|
||||
],
|
||||
("cuda", 8): [
|
||||
"Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have been looking on the internet and I have"
|
||||
],
|
||||
}
|
||||
)
|
||||
EXPECTED_TEXT_COMPLETION = expectations.get_expectation()
|
||||
|
||||
max_generation_length = tokenizer(EXPECTED_TEXT_COMPLETION, return_tensors="pt", padding=True)[
|
||||
"input_ids"
|
||||
].shape[-1]
|
||||
@ -432,15 +465,38 @@ class GemmaIntegrationTest(unittest.TestCase):
|
||||
exported_program=exported_program, prompt_token_ids=prompt_token_ids, max_new_tokens=max_new_tokens
|
||||
)
|
||||
ep_generated_text = tokenizer.batch_decode(ep_generated_ids, skip_special_tokens=True)
|
||||
|
||||
# After switching to A10 on 2025/06/29, we get slightly different outputs when using export
|
||||
expectations = Expectations(
|
||||
{
|
||||
(None, None): [
|
||||
"Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found"
|
||||
],
|
||||
("cuda", 8): [
|
||||
"Hello I am doing a project on the 1990s and I need to know what the most popular music was in the 1990s. I have looked on the internet and I have found"
|
||||
],
|
||||
}
|
||||
)
|
||||
EXPECTED_TEXT_COMPLETION = expectations.get_expectation()
|
||||
|
||||
self.assertEqual(EXPECTED_TEXT_COMPLETION, ep_generated_text)
|
||||
|
||||
def test_model_2b_bf16_dola(self):
|
||||
model_id = "google/gemma-2b"
|
||||
# ground truth text generated with dola_layers="low", repetition_penalty=1.2
|
||||
EXPECTED_TEXTS = [
|
||||
"Hello I am doing an experiment and need to get the mass of a block. The problem is, it has no scale",
|
||||
"Hi today we have the review for a <strong>2016/2017</strong> season of",
|
||||
]
|
||||
expectations = Expectations(
|
||||
{
|
||||
(None, None): [
|
||||
"Hello I am doing an experiment and need to get the mass of a block. The problem is, it has no scale",
|
||||
"Hi today we have the review for a <strong>2016/2017</strong> season of",
|
||||
],
|
||||
("cuda", 8): [
|
||||
"Hello I am doing an experiment and need to get the mass of a block. The only tool I have is a scale",
|
||||
"Hi today we have the review for a <strong>2016/2017</strong> season of",
|
||||
],
|
||||
}
|
||||
)
|
||||
EXPECTED_TEXTS = expectations.get_expectation()
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(torch_device)
|
||||
|
||||
|
Reference in New Issue
Block a user