|
|
@ -785,7 +785,10 @@ class Gemma3nIntegrationTest(unittest.TestCase):
|
|
|
|
|
|
|
|
|
|
|
|
output = model.generate(**inputs, max_new_tokens=30, do_sample=False)
|
|
|
|
output = model.generate(**inputs, max_new_tokens=30, do_sample=False)
|
|
|
|
output_text = self.processor.batch_decode(output, skip_special_tokens=True)
|
|
|
|
output_text = self.processor.batch_decode(output, skip_special_tokens=True)
|
|
|
|
EXPECTED_TEXTS = ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly'] # fmt: skip
|
|
|
|
EXPECTED_TEXTS = Expectations({
|
|
|
|
|
|
|
|
("cuda", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly'],
|
|
|
|
|
|
|
|
("rocm", (9, 4)): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a turquoise ocean. The sky is blue with a few white clouds. The'],
|
|
|
|
|
|
|
|
}).get_expectation() # fmt: skip
|
|
|
|
self.assertEqual(output_text, EXPECTED_TEXTS)
|
|
|
|
self.assertEqual(output_text, EXPECTED_TEXTS)
|
|
|
|
|
|
|
|
|
|
|
|
def test_model_with_audio(self):
|
|
|
|
def test_model_with_audio(self):
|
|
|
@ -866,18 +869,11 @@ class Gemma3nIntegrationTest(unittest.TestCase):
|
|
|
|
|
|
|
|
|
|
|
|
output = model.generate(**inputs, max_new_tokens=30, do_sample=False)
|
|
|
|
output = model.generate(**inputs, max_new_tokens=30, do_sample=False)
|
|
|
|
output_text = self.processor.batch_decode(output, skip_special_tokens=True)
|
|
|
|
output_text = self.processor.batch_decode(output, skip_special_tokens=True)
|
|
|
|
|
|
|
|
EXPECTED_TEXTS = Expectations({
|
|
|
|
# fmt: off
|
|
|
|
("cuda", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly', "user\nYou are a helpful assistant.\n\n\n\n\n\n\n\n\n\nAre these images identical?\nmodel\nNo, the images are not identical. \n\nHere's a breakdown of the differences:\n\n* **Subject:** The first image features a cow"],
|
|
|
|
EXPECTATIONS = Expectations(
|
|
|
|
("rocm", (9, 4)): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly', "user\nYou are a helpful assistant.\n\n\n\n\n\n\n\n\n\nAre these images identical?\nmodel\nNo, the images are not identical. \n\nHere's a breakdown of the differences:\n\n* **Subject Matter:** The first image shows a"],
|
|
|
|
{
|
|
|
|
("xpu", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a turquoise ocean. The cow is facing the viewer with its head slightly turned', "user\nYou are a helpful assistant.\n\n\n\n\n\n\n\n\n\nAre these images identical?\nmodel\nNo, the images are not identical. \n\nHere's a breakdown of the differences:\n\n* **Subject:** The first image features a cow"],
|
|
|
|
("cuda", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly', "user\nYou are a helpful assistant.\n\n\n\n\n\n\n\n\n\nAre these images identical?\nmodel\nNo, the images are not identical. \n\nHere's a breakdown of the differences:\n\n* **Subject:** The first image features a cow"],
|
|
|
|
}).get_expectation() # fmt: skip
|
|
|
|
("xpu", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a turquoise ocean. The cow is facing the viewer with its head slightly turned', "user\nYou are a helpful assistant.\n\n\n\n\n\n\n\n\n\nAre these images identical?\nmodel\nNo, the images are not identical. \n\nHere's a breakdown of the differences:\n\n* **Subject:** The first image features a cow"],
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
EXPECTED_TEXTS = EXPECTATIONS.get_expectation()
|
|
|
|
|
|
|
|
# fmt: on
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.assertEqual(output_text, EXPECTED_TEXTS)
|
|
|
|
self.assertEqual(output_text, EXPECTED_TEXTS)
|
|
|
|
|
|
|
|
|
|
|
|
def test_model_4b_image(self):
|
|
|
|
def test_model_4b_image(self):
|
|
|
@ -899,18 +895,11 @@ class Gemma3nIntegrationTest(unittest.TestCase):
|
|
|
|
output_text = self.processor.batch_decode(output, skip_special_tokens=True)
|
|
|
|
output_text = self.processor.batch_decode(output, skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
|
|
|
EXPECTED_NUM_IMAGES = 1 # Gemma3n does not support crops
|
|
|
|
EXPECTED_NUM_IMAGES = 1 # Gemma3n does not support crops
|
|
|
|
|
|
|
|
EXPECTED_TEXTS = Expectations({
|
|
|
|
# fmt: off
|
|
|
|
("cuda", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly'],
|
|
|
|
EXPECTATIONS = Expectations(
|
|
|
|
("xpu", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly'],
|
|
|
|
{
|
|
|
|
("rocm", (9, 4)): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a turquoise ocean. The sky is blue with a few white clouds. The'],
|
|
|
|
("cuda", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly'],
|
|
|
|
}).get_expectation() # fmt: skip
|
|
|
|
("xpu", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly'],
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
EXPECTED_TEXTS = EXPECTATIONS.get_expectation()
|
|
|
|
|
|
|
|
# fmt: on
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.assertEqual(len(inputs["pixel_values"]), EXPECTED_NUM_IMAGES)
|
|
|
|
self.assertEqual(len(inputs["pixel_values"]), EXPECTED_NUM_IMAGES)
|
|
|
|
self.assertEqual(output_text, EXPECTED_TEXTS)
|
|
|
|
self.assertEqual(output_text, EXPECTED_TEXTS)
|
|
|
|
|
|
|
|
|
|
|
@ -948,17 +937,11 @@ class Gemma3nIntegrationTest(unittest.TestCase):
|
|
|
|
output = model.generate(**inputs, max_new_tokens=30, do_sample=False)
|
|
|
|
output = model.generate(**inputs, max_new_tokens=30, do_sample=False)
|
|
|
|
output_text = self.processor.batch_decode(output, skip_special_tokens=True)
|
|
|
|
output_text = self.processor.batch_decode(output, skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
|
|
|
# fmt: off
|
|
|
|
EXPECTED_TEXTS = Expectations({
|
|
|
|
EXPECTATIONS = Expectations(
|
|
|
|
("cuda", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat do you see here?\nmodel\nIn the image, I see a street scene in what appears to be a Chinatown district. Here are some key elements:\n\n* **A prominent red'],
|
|
|
|
{
|
|
|
|
("xpu", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat do you see here?\nmodel\nIn the image, I see a street scene in what appears to be a Chinatown district. Here are the key elements:\n\n* **A prominent red'],
|
|
|
|
("cuda", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat do you see here?\nmodel\nIn the image, I see a street scene in what appears to be a Chinatown district. Here are some key elements:\n\n* **A prominent red'],
|
|
|
|
("rocm", (9, 4)): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat do you see here?\nmodel\nIn the image, I see a street scene in what appears to be a Chinatown district. \n\nHere are some key elements:\n\n* **A'],
|
|
|
|
("xpu", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat do you see here?\nmodel\nIn the image, I see a street scene in what appears to be a Chinatown district. Here are the key elements:\n\n* **A prominent red'],
|
|
|
|
}).get_expectation() # fmt: skip
|
|
|
|
}
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
EXPECTED_TEXTS = EXPECTATIONS.get_expectation()
|
|
|
|
|
|
|
|
# fmt: on
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.assertEqual(output_text, EXPECTED_TEXTS)
|
|
|
|
self.assertEqual(output_text, EXPECTED_TEXTS)
|
|
|
|
|
|
|
|
|
|
|
|
@unittest.skip("For now, using a gemma model with the 3n class is not supported")
|
|
|
|
@unittest.skip("For now, using a gemma model with the 3n class is not supported")
|
|
|
@ -1034,15 +1017,10 @@ class Gemma3nIntegrationTest(unittest.TestCase):
|
|
|
|
]
|
|
|
|
]
|
|
|
|
output_text = tokenizer.batch_decode(out)
|
|
|
|
output_text = tokenizer.batch_decode(out)
|
|
|
|
|
|
|
|
|
|
|
|
# fmt: off
|
|
|
|
EXPECTED_COMPLETIONS = Expectations({
|
|
|
|
EXPECTATIONS = Expectations(
|
|
|
|
# FIXME: This test is VERY flaky on ROCm
|
|
|
|
{
|
|
|
|
("cuda", None): [" and I am glad to be here. This is a nice place. This is a nice place.", ", green, yellow, purple, orange, pink, brown, black, white.\n\nHere are"],
|
|
|
|
("cuda", None): [" and I am glad to be here. This is a nice place. This is a nice place.", ", green, yellow, purple, orange, pink, brown, black, white.\n\nHere are"],
|
|
|
|
("rocm", (9, 4)): [' and I think it makes this place special. This is a nice place. This is a nice place', ', green, yellow, purple, orange, pink, brown, black, white.\n\nHere are'],
|
|
|
|
("xpu", None): [" and I think it is very nice. I think it is nice. This is a nice place.", ", green, yellow, purple, orange, pink, brown, black, white.\n\nHere are"],
|
|
|
|
("xpu", None): [" and I think it is very nice. I think it is nice. This is a nice place.", ", green, yellow, purple, orange, pink, brown, black, white.\n\nHere are"],
|
|
|
|
}
|
|
|
|
}).get_expectation() # fmt: skip
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
EXPECTED_COMPLETIONS = EXPECTATIONS.get_expectation()
|
|
|
|
|
|
|
|
# fmt: on
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.assertEqual(output_text, EXPECTED_COMPLETIONS)
|
|
|
|
self.assertEqual(output_text, EXPECTED_COMPLETIONS)
|
|
|
|