mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Bugfix] validate urls object for multimodal content parts (#16990)
Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com>
This commit is contained in:
committed by
GitHub
parent
83d933718c
commit
36fe78769f
@ -104,6 +104,35 @@ async def test_single_chat_session_audio(client: openai.AsyncOpenAI,
|
||||
assert message.content is not None and len(message.content) >= 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
@pytest.mark.parametrize("audio_url", [TEST_AUDIO_URLS[0]])
|
||||
async def test_error_on_invalid_audio_url_type(client: openai.AsyncOpenAI,
|
||||
model_name: str,
|
||||
audio_url: str):
|
||||
messages = [{
|
||||
"role":
|
||||
"user",
|
||||
"content": [
|
||||
{
|
||||
"type": "audio_url",
|
||||
"audio_url": audio_url
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What's happening in this audio?"
|
||||
},
|
||||
],
|
||||
}]
|
||||
|
||||
# audio_url should be a dict {"url": "some url"}, not directly a string
|
||||
with pytest.raises(openai.BadRequestError):
|
||||
_ = await client.chat.completions.create(model=model_name,
|
||||
messages=messages,
|
||||
max_completion_tokens=10,
|
||||
temperature=0.0)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
@pytest.mark.parametrize("audio_url", [TEST_AUDIO_URLS[0]])
|
||||
|
@ -108,6 +108,35 @@ async def test_single_chat_session_video(client: openai.AsyncOpenAI,
|
||||
assert message.content is not None and len(message.content) >= 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
@pytest.mark.parametrize("video_url", TEST_VIDEO_URLS)
|
||||
async def test_error_on_invalid_video_url_type(client: openai.AsyncOpenAI,
|
||||
model_name: str,
|
||||
video_url: str):
|
||||
messages = [{
|
||||
"role":
|
||||
"user",
|
||||
"content": [
|
||||
{
|
||||
"type": "video_url",
|
||||
"video_url": video_url
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What's in this video?"
|
||||
},
|
||||
],
|
||||
}]
|
||||
|
||||
# video_url should be a dict {"url": "some url"}, not directly a string
|
||||
with pytest.raises(openai.BadRequestError):
|
||||
_ = await client.chat.completions.create(model=model_name,
|
||||
messages=messages,
|
||||
max_completion_tokens=10,
|
||||
temperature=0.0)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
@pytest.mark.parametrize("video_url", TEST_VIDEO_URLS)
|
||||
|
@ -137,6 +137,36 @@ async def test_single_chat_session_image(client: openai.AsyncOpenAI,
|
||||
assert message.content is not None and len(message.content) >= 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
@pytest.mark.parametrize("image_url", TEST_IMAGE_URLS)
|
||||
async def test_error_on_invalid_image_url_type(client: openai.AsyncOpenAI,
|
||||
model_name: str,
|
||||
image_url: str):
|
||||
content_text = "What's in this image?"
|
||||
messages = [{
|
||||
"role":
|
||||
"user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": image_url
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": content_text
|
||||
},
|
||||
],
|
||||
}]
|
||||
|
||||
# image_url should be a dict {"url": "some url"}, not directly a string
|
||||
with pytest.raises(openai.BadRequestError):
|
||||
_ = await client.chat.completions.create(model=model_name,
|
||||
messages=messages,
|
||||
max_completion_tokens=10,
|
||||
temperature=0.0)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
@pytest.mark.parametrize("image_url", TEST_IMAGE_URLS)
|
||||
|
@ -27,10 +27,11 @@ from openai.types.chat import (ChatCompletionMessageToolCallParam,
|
||||
ChatCompletionToolMessageParam)
|
||||
from openai.types.chat.chat_completion_content_part_input_audio_param import (
|
||||
InputAudio)
|
||||
from pydantic import TypeAdapter
|
||||
# yapf: enable
|
||||
# pydantic needs the TypedDict from typing_extensions
|
||||
from transformers import (PreTrainedTokenizer, PreTrainedTokenizerFast,
|
||||
ProcessorMixin)
|
||||
# pydantic needs the TypedDict from typing_extensions
|
||||
from typing_extensions import Required, TypeAlias, TypedDict
|
||||
|
||||
from vllm.config import ModelConfig
|
||||
@ -879,12 +880,13 @@ def _get_full_multimodal_text_prompt(placeholder_counts: dict[str, int],
|
||||
|
||||
# No need to validate using Pydantic again
|
||||
_TextParser = partial(cast, ChatCompletionContentPartTextParam)
|
||||
_ImageParser = partial(cast, ChatCompletionContentPartImageParam)
|
||||
_ImageEmbedsParser = partial(cast, ChatCompletionContentPartImageEmbedsParam)
|
||||
_AudioParser = partial(cast, ChatCompletionContentPartAudioParam)
|
||||
_InputAudioParser = partial(cast, ChatCompletionContentPartInputAudioParam)
|
||||
_RefusalParser = partial(cast, ChatCompletionContentPartRefusalParam)
|
||||
_VideoParser = partial(cast, ChatCompletionContentPartVideoParam)
|
||||
# Need to validate url objects
|
||||
_ImageParser = TypeAdapter(ChatCompletionContentPartImageParam).validate_python
|
||||
_AudioParser = TypeAdapter(ChatCompletionContentPartAudioParam).validate_python
|
||||
_VideoParser = TypeAdapter(ChatCompletionContentPartVideoParam).validate_python
|
||||
|
||||
_ContentPart: TypeAlias = Union[str, dict[str, str], InputAudio]
|
||||
|
||||
|
Reference in New Issue
Block a user