extend gemma3n integration ut cases on XPU (#41071)

Signed-off-by: Yao, Matrix <matrix.yao@intel.com>
This commit is contained in:
Yao Matrix
2025-09-25 06:46:37 -07:00
committed by GitHub
parent 05fb90c969
commit 1dd22a234c

View File

@ -37,10 +37,12 @@ from transformers import (
is_torch_available,
)
from transformers.testing_utils import (
Expectations,
cleanup,
require_deterministic_for_xpu,
require_read_token,
require_torch,
require_torch_gpu,
require_torch_accelerator,
set_config_for_less_flaky_test,
set_model_for_less_flaky_test,
slow,
@ -217,8 +219,6 @@ class Gemma3nAudioModelTest(ModelTesterMixin, unittest.TestCase):
self.assertEqual(input_features.shape, self.expected_input_features_shape)
np.testing.assert_allclose(input_features[0, 0, :5], self.expected_input_features_slice, rtol=1e-5, atol=1e-5)
print(input_features[0, 0, :5])
input_features_mask = audio_inputs["input_features_mask"]
self.assertEqual(input_features_mask.shape, self.expected_input_features_mask_shape)
# The second audio sample is shorter (22 frames vs 48), so its mask should become False at index 22
@ -235,8 +235,6 @@ class Gemma3nAudioModelTest(ModelTesterMixin, unittest.TestCase):
with torch.no_grad():
encoder_output, encoder_mask = model(**inputs_dict)
print(encoder_output[0, 0, :5])
# Check output encodings
self.assertEqual(encoder_output.shape, self.expected_encoder_output_shape)
torch.testing.assert_close(
@ -745,7 +743,7 @@ class Gemma3nVision2TextModelTest(ModelTesterMixin, GenerationTesterMixin, unitt
@slow
@require_torch_gpu
@require_torch_accelerator
@require_read_token
class Gemma3nIntegrationTest(unittest.TestCase):
def setUp(self):
@ -766,7 +764,7 @@ class Gemma3nIntegrationTest(unittest.TestCase):
audio_ds = load_dataset(
"etechgrid/28.5k_wavfiles_dataset", "default", data_files="wav_dataset/103-1240-0000.wav"
)
self.audio_file_path = audio_ds["train"][0]["audio"].metadata.path
self.audio_file_path = audio_ds["train"][0]["audio"]["path"]
cleanup(torch_device, gc_collect=True)
def tearDown(self):
@ -869,7 +867,17 @@ class Gemma3nIntegrationTest(unittest.TestCase):
output = model.generate(**inputs, max_new_tokens=30, do_sample=False)
output_text = self.processor.batch_decode(output, skip_special_tokens=True)
EXPECTED_TEXTS = ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly', "user\nYou are a helpful assistant.\n\n\n\n\n\n\n\n\n\nAre these images identical?\nmodel\nNo, the images are not identical. \n\nHere's a breakdown of the differences:\n\n* **Subject:** The first image features a cow"] # fmt: skip
# fmt: off
EXPECTATIONS = Expectations(
{
("cuda", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly', "user\nYou are a helpful assistant.\n\n\n\n\n\n\n\n\n\nAre these images identical?\nmodel\nNo, the images are not identical. \n\nHere's a breakdown of the differences:\n\n* **Subject:** The first image features a cow"],
("xpu", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a turquoise ocean. The cow is facing the viewer with its head slightly turned', "user\nYou are a helpful assistant.\n\n\n\n\n\n\n\n\n\nAre these images identical?\nmodel\nNo, the images are not identical. \n\nHere's a breakdown of the differences:\n\n* **Subject:** The first image features a cow"],
}
)
EXPECTED_TEXTS = EXPECTATIONS.get_expectation()
# fmt: on
self.assertEqual(output_text, EXPECTED_TEXTS)
def test_model_4b_image(self):
@ -891,10 +899,22 @@ class Gemma3nIntegrationTest(unittest.TestCase):
output_text = self.processor.batch_decode(output, skip_special_tokens=True)
EXPECTED_NUM_IMAGES = 1 # Gemma3n does not support crops
EXPECTED_TEXTS = ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly'] # fmt: skip
# fmt: off
EXPECTATIONS = Expectations(
{
("cuda", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly'],
("xpu", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly'],
}
)
EXPECTED_TEXTS = EXPECTATIONS.get_expectation()
# fmt: on
self.assertEqual(len(inputs["pixel_values"]), EXPECTED_NUM_IMAGES)
self.assertEqual(output_text, EXPECTED_TEXTS)
@require_deterministic_for_xpu
def test_model_4b_multiimage(self):
model_id = "Google/gemma-3n-E4B-it"
@ -928,7 +948,17 @@ class Gemma3nIntegrationTest(unittest.TestCase):
output = model.generate(**inputs, max_new_tokens=30, do_sample=False)
output_text = self.processor.batch_decode(output, skip_special_tokens=True)
EXPECTED_TEXTS = ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat do you see here?\nmodel\nIn the image, I see a street scene in what appears to be a Chinatown district. Here are some key elements:\n\n* **A prominent red'] # fmt: skip
# fmt: off
EXPECTATIONS = Expectations(
{
("cuda", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat do you see here?\nmodel\nIn the image, I see a street scene in what appears to be a Chinatown district. Here are some key elements:\n\n* **A prominent red'],
("xpu", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat do you see here?\nmodel\nIn the image, I see a street scene in what appears to be a Chinatown district. Here are the key elements:\n\n* **A prominent red'],
}
)
EXPECTED_TEXTS = EXPECTATIONS.get_expectation()
# fmt: on
self.assertEqual(output_text, EXPECTED_TEXTS)
@unittest.skip("For now, using a gemma model with the 3n class is not supported")
@ -978,6 +1008,7 @@ class Gemma3nIntegrationTest(unittest.TestCase):
EXPECTED_COMPLETIONS = [" and I think it's a nice place to visit. This is a nice place. This is", ", green, yellow, orange, purple, pink, brown, black, white.\n\nHere'"] # fmt: skip
self.assertEqual(output_text, EXPECTED_COMPLETIONS)
@require_deterministic_for_xpu
def test_generation_beyond_sliding_window_with_generation_config(self):
"""Same as `test_generation_beyond_sliding_window`, but passing a GenerationConfig. Regression test for #36684 --
ensures `cache_implementation='hybrid'` is correctly inherited from the base `model.generation_config`.
@ -1003,5 +1034,15 @@ class Gemma3nIntegrationTest(unittest.TestCase):
]
output_text = tokenizer.batch_decode(out)
EXPECTED_COMPLETIONS = [" and I am glad to be here. This is a nice place. This is a nice place.", ", green, yellow, purple, orange, pink, brown, black, white.\n\nHere are"] # fmt: skip
# fmt: off
EXPECTATIONS = Expectations(
{
("cuda", None): [" and I am glad to be here. This is a nice place. This is a nice place.", ", green, yellow, purple, orange, pink, brown, black, white.\n\nHere are"],
("xpu", None): [" and I think it is very nice. I think it is nice. This is a nice place.", ", green, yellow, purple, orange, pink, brown, black, white.\n\nHere are"],
}
)
EXPECTED_COMPLETIONS = EXPECTATIONS.get_expectation()
# fmt: on
self.assertEqual(output_text, EXPECTED_COMPLETIONS)