[Bugfix] Fixed the issue of not being able to input video and image simultaneously (#15387)

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
Chauncey
2025-03-25 11:48:08 +08:00
committed by GitHub
parent b5269db959
commit 10b34e36b9

View File

@ -556,11 +556,11 @@ class MultiModalItemTracker(BaseMultiModalItemTracker[object]):
raise ValueError(\
"Only one message can have {'type': 'image_embeds'}")
mm_inputs["image"] = image_embeds_lst[0]
elif "image" in items_by_modality:
if "image" in items_by_modality:
mm_inputs["image"] = items_by_modality["image"] # A list of images
elif "audio" in items_by_modality:
if "audio" in items_by_modality:
mm_inputs["audio"] = items_by_modality["audio"] # A list of audios
elif "video" in items_by_modality:
if "video" in items_by_modality:
mm_inputs["video"] = items_by_modality["video"] # A list of videos
return mm_inputs
@ -589,11 +589,11 @@ class AsyncMultiModalItemTracker(BaseMultiModalItemTracker[Awaitable[object]]):
raise ValueError(
"Only one message can have {'type': 'image_embeds'}")
mm_inputs["image"] = image_embeds_lst[0]
elif "image" in items_by_modality:
if "image" in items_by_modality:
mm_inputs["image"] = items_by_modality["image"] # A list of images
elif "audio" in items_by_modality:
if "audio" in items_by_modality:
mm_inputs["audio"] = items_by_modality["audio"] # A list of audios
elif "video" in items_by_modality:
if "video" in items_by_modality:
mm_inputs["video"] = items_by_modality["video"] # A list of videos
return mm_inputs