mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Bugfix] Fix MRoPE Errors in the Qwen-VL Model When Processing Pure Text (#18407)
Co-authored-by: 松灵 <wpf272043@alibaba-inc.com>
This commit is contained in:
@ -729,8 +729,6 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
|
||||
mm_kwargs, placeholder_maps = MultiModalPlaceholderMap.from_seq_group(
|
||||
seq_group_metadata,
|
||||
range(positions[0], positions[0] + len(positions)))
|
||||
if not mm_kwargs:
|
||||
return
|
||||
|
||||
inter_data.multi_modal_kwargs = mm_kwargs
|
||||
inter_data.multi_modal_placeholder_maps = placeholder_maps
|
||||
@ -741,12 +739,6 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
|
||||
video_grid_thw = mm_kwargs.get("video_grid_thw", None)
|
||||
audio_feature_lengths = mm_kwargs.get("audio_feature_lengths",
|
||||
None)
|
||||
assert (
|
||||
image_grid_thw is not None or video_grid_thw is not None
|
||||
or audio_feature_lengths is not None), (
|
||||
"mrope embedding type requires multi-modal input mapper "
|
||||
"returns 'image_grid_thw' or 'video_grid_thw' or "
|
||||
"'audio_feature_lengths'.")
|
||||
|
||||
second_per_grid_ts = mm_kwargs.get("second_per_grid_ts", None)
|
||||
use_audio_in_video = mm_kwargs.get("use_audio_in_video", False)
|
||||
|
Reference in New Issue
Block a user