Enable fa in amd docker (#41069)

* Add FA to docker

* Use caching mechanism for qwen2_5

* Fix a typo in important models list

* Partial fixes for gemma3

* Added a commit ID for FA repo

* Detailled  the expectation storage format

* Rebase fix

* Apply style fixes

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Rémi Ouazan
2025-09-26 13:57:58 +02:00
committed by GitHub
parent 10f6891fc5
commit 50d2448a1a
5 changed files with 41 additions and 51 deletions

View File

@ -35,3 +35,10 @@ RUN python3 -m pip uninstall -y kernels
# On ROCm, torchcodec is required to decode audio files and 0.4 or 0.6 fails # On ROCm, torchcodec is required to decode audio files and 0.4 or 0.6 fails
RUN python3 -m pip install --no-cache-dir "torchcodec==0.5" RUN python3 -m pip install --no-cache-dir "torchcodec==0.5"
# Install flash attention from source. Tested with commit 6387433156558135a998d5568a9d74c1778666d8
RUN git clone https://github.com/ROCm/flash-attention/ -b tridao && \
cd flash-attention && \
GPU_ARCHS="gfx942" python setup.py install
RUN python3 -m pip install --no-cache-dir einops

View File

@ -3244,7 +3244,9 @@ def unpack_device_properties(
class Expectations(UserDict[PackedDeviceProperties, Any]): class Expectations(UserDict[PackedDeviceProperties, Any]):
def get_expectation(self) -> Any: def get_expectation(self) -> Any:
""" """
Find best matching expectation based on environment device properties. Find best matching expectation based on environment device properties. We look at device_type, major and minor
versions of the drivers. Expectations are stored as a dictionary with keys of the form
(device_type, (major, minor)). If the major and minor versions are not provided, we use None.
""" """
return self.find_expectation(get_device_properties()) return self.find_expectation(get_device_properties())

View File

@ -785,7 +785,10 @@ class Gemma3nIntegrationTest(unittest.TestCase):
output = model.generate(**inputs, max_new_tokens=30, do_sample=False) output = model.generate(**inputs, max_new_tokens=30, do_sample=False)
output_text = self.processor.batch_decode(output, skip_special_tokens=True) output_text = self.processor.batch_decode(output, skip_special_tokens=True)
EXPECTED_TEXTS = ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly'] # fmt: skip EXPECTED_TEXTS = Expectations({
("cuda", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly'],
("rocm", (9, 4)): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a turquoise ocean. The sky is blue with a few white clouds. The'],
}).get_expectation() # fmt: skip
self.assertEqual(output_text, EXPECTED_TEXTS) self.assertEqual(output_text, EXPECTED_TEXTS)
def test_model_with_audio(self): def test_model_with_audio(self):
@ -866,18 +869,11 @@ class Gemma3nIntegrationTest(unittest.TestCase):
output = model.generate(**inputs, max_new_tokens=30, do_sample=False) output = model.generate(**inputs, max_new_tokens=30, do_sample=False)
output_text = self.processor.batch_decode(output, skip_special_tokens=True) output_text = self.processor.batch_decode(output, skip_special_tokens=True)
EXPECTED_TEXTS = Expectations({
# fmt: off ("cuda", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly', "user\nYou are a helpful assistant.\n\n\n\n\n\n\n\n\n\nAre these images identical?\nmodel\nNo, the images are not identical. \n\nHere's a breakdown of the differences:\n\n* **Subject:** The first image features a cow"],
EXPECTATIONS = Expectations( ("rocm", (9, 4)): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly', "user\nYou are a helpful assistant.\n\n\n\n\n\n\n\n\n\nAre these images identical?\nmodel\nNo, the images are not identical. \n\nHere's a breakdown of the differences:\n\n* **Subject Matter:** The first image shows a"],
{ ("xpu", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a turquoise ocean. The cow is facing the viewer with its head slightly turned', "user\nYou are a helpful assistant.\n\n\n\n\n\n\n\n\n\nAre these images identical?\nmodel\nNo, the images are not identical. \n\nHere's a breakdown of the differences:\n\n* **Subject:** The first image features a cow"],
("cuda", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly', "user\nYou are a helpful assistant.\n\n\n\n\n\n\n\n\n\nAre these images identical?\nmodel\nNo, the images are not identical. \n\nHere's a breakdown of the differences:\n\n* **Subject:** The first image features a cow"], }).get_expectation() # fmt: skip
("xpu", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a turquoise ocean. The cow is facing the viewer with its head slightly turned', "user\nYou are a helpful assistant.\n\n\n\n\n\n\n\n\n\nAre these images identical?\nmodel\nNo, the images are not identical. \n\nHere's a breakdown of the differences:\n\n* **Subject:** The first image features a cow"],
}
)
EXPECTED_TEXTS = EXPECTATIONS.get_expectation()
# fmt: on
self.assertEqual(output_text, EXPECTED_TEXTS) self.assertEqual(output_text, EXPECTED_TEXTS)
def test_model_4b_image(self): def test_model_4b_image(self):
@ -899,18 +895,11 @@ class Gemma3nIntegrationTest(unittest.TestCase):
output_text = self.processor.batch_decode(output, skip_special_tokens=True) output_text = self.processor.batch_decode(output, skip_special_tokens=True)
EXPECTED_NUM_IMAGES = 1 # Gemma3n does not support crops EXPECTED_NUM_IMAGES = 1 # Gemma3n does not support crops
EXPECTED_TEXTS = Expectations({
# fmt: off ("cuda", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly'],
EXPECTATIONS = Expectations( ("xpu", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly'],
{ ("rocm", (9, 4)): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a turquoise ocean. The sky is blue with a few white clouds. The'],
("cuda", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly'], }).get_expectation() # fmt: skip
("xpu", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat is shown in this image?\nmodel\nThe image shows a brown and white cow standing on a sandy beach next to a clear blue ocean. The cow is facing the viewer with its head slightly'],
}
)
EXPECTED_TEXTS = EXPECTATIONS.get_expectation()
# fmt: on
self.assertEqual(len(inputs["pixel_values"]), EXPECTED_NUM_IMAGES) self.assertEqual(len(inputs["pixel_values"]), EXPECTED_NUM_IMAGES)
self.assertEqual(output_text, EXPECTED_TEXTS) self.assertEqual(output_text, EXPECTED_TEXTS)
@ -948,17 +937,11 @@ class Gemma3nIntegrationTest(unittest.TestCase):
output = model.generate(**inputs, max_new_tokens=30, do_sample=False) output = model.generate(**inputs, max_new_tokens=30, do_sample=False)
output_text = self.processor.batch_decode(output, skip_special_tokens=True) output_text = self.processor.batch_decode(output, skip_special_tokens=True)
# fmt: off EXPECTED_TEXTS = Expectations({
EXPECTATIONS = Expectations( ("cuda", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat do you see here?\nmodel\nIn the image, I see a street scene in what appears to be a Chinatown district. Here are some key elements:\n\n* **A prominent red'],
{ ("xpu", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat do you see here?\nmodel\nIn the image, I see a street scene in what appears to be a Chinatown district. Here are the key elements:\n\n* **A prominent red'],
("cuda", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat do you see here?\nmodel\nIn the image, I see a street scene in what appears to be a Chinatown district. Here are some key elements:\n\n* **A prominent red'], ("rocm", (9, 4)): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat do you see here?\nmodel\nIn the image, I see a street scene in what appears to be a Chinatown district. \n\nHere are some key elements:\n\n* **A'],
("xpu", None): ['user\nYou are a helpful assistant.\n\n\n\n\n\nWhat do you see here?\nmodel\nIn the image, I see a street scene in what appears to be a Chinatown district. Here are the key elements:\n\n* **A prominent red'], }).get_expectation() # fmt: skip
}
)
EXPECTED_TEXTS = EXPECTATIONS.get_expectation()
# fmt: on
self.assertEqual(output_text, EXPECTED_TEXTS) self.assertEqual(output_text, EXPECTED_TEXTS)
@unittest.skip("For now, using a gemma model with the 3n class is not supported") @unittest.skip("For now, using a gemma model with the 3n class is not supported")
@ -1034,15 +1017,10 @@ class Gemma3nIntegrationTest(unittest.TestCase):
] ]
output_text = tokenizer.batch_decode(out) output_text = tokenizer.batch_decode(out)
# fmt: off EXPECTED_COMPLETIONS = Expectations({
EXPECTATIONS = Expectations( # FIXME: This test is VERY flaky on ROCm
{ ("cuda", None): [" and I am glad to be here. This is a nice place. This is a nice place.", ", green, yellow, purple, orange, pink, brown, black, white.\n\nHere are"],
("cuda", None): [" and I am glad to be here. This is a nice place. This is a nice place.", ", green, yellow, purple, orange, pink, brown, black, white.\n\nHere are"], ("rocm", (9, 4)): [' and I think it makes this place special. This is a nice place. This is a nice place', ', green, yellow, purple, orange, pink, brown, black, white.\n\nHere are'],
("xpu", None): [" and I think it is very nice. I think it is nice. This is a nice place.", ", green, yellow, purple, orange, pink, brown, black, white.\n\nHere are"], ("xpu", None): [" and I think it is very nice. I think it is nice. This is a nice place.", ", green, yellow, purple, orange, pink, brown, black, white.\n\nHere are"],
} }).get_expectation() # fmt: skip
)
EXPECTED_COMPLETIONS = EXPECTATIONS.get_expectation()
# fmt: on
self.assertEqual(output_text, EXPECTED_COMPLETIONS) self.assertEqual(output_text, EXPECTED_COMPLETIONS)

View File

@ -27,6 +27,7 @@ from transformers import (
is_torch_available, is_torch_available,
is_vision_available, is_vision_available,
) )
from transformers.image_utils import load_image
from transformers.testing_utils import ( from transformers.testing_utils import (
Expectations, Expectations,
cleanup, cleanup,
@ -47,6 +48,7 @@ from ...test_modeling_common import (
floats_tensor, floats_tensor,
ids_tensor, ids_tensor,
) )
from ...test_processing_common import url_to_local_path
if is_cv2_available(): if is_cv2_available():
@ -454,8 +456,8 @@ class Qwen2_5_VLIntegrationTest(unittest.TestCase):
], ],
} }
] ]
url = "https://qianwen-res.oss-accelerate-overseas.aliyuncs.com/Qwen2-VL/demo_small.jpg" img_url = url_to_local_path("https://qianwen-res.oss-accelerate-overseas.aliyuncs.com/Qwen2-VL/demo_small.jpg")
self.image = Image.open(requests.get(url, stream=True).raw) self.image = load_image(img_url).convert("RGB")
cleanup(torch_device, gc_collect=True) cleanup(torch_device, gc_collect=True)

View File

@ -5,7 +5,8 @@ IMPORTANT_MODELS = [
"gpt2", "gpt2",
"t5", "t5",
"modernbert", "modernbert",
"vit,clip", "vit",
"clip",
"detr", "detr",
"table_transformer", "table_transformer",
"got_ocr2", "got_ocr2",