mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
[Misc] Update openai client example file for multimodal (#25795)
Signed-off-by: Roger Wang <hey@rogerw.io> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@ -38,11 +38,13 @@ client = OpenAI(
|
|||||||
base_url=openai_api_base,
|
base_url=openai_api_base,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
headers = {"User-Agent": "vLLM Example Client"}
|
||||||
|
|
||||||
|
|
||||||
def encode_base64_content_from_url(content_url: str) -> str:
|
def encode_base64_content_from_url(content_url: str) -> str:
|
||||||
"""Encode a content retrieved from a remote url to base64 format."""
|
"""Encode a content retrieved from a remote url to base64 format."""
|
||||||
|
|
||||||
with requests.get(content_url) as response:
|
with requests.get(content_url, headers=headers) as response:
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
result = base64.b64encode(response.content).decode("utf-8")
|
result = base64.b64encode(response.content).decode("utf-8")
|
||||||
|
|
||||||
@ -50,19 +52,19 @@ def encode_base64_content_from_url(content_url: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
# Text-only inference
|
# Text-only inference
|
||||||
def run_text_only(model: str) -> None:
|
def run_text_only(model: str, max_completion_tokens: int) -> None:
|
||||||
chat_completion = client.chat.completions.create(
|
chat_completion = client.chat.completions.create(
|
||||||
messages=[{"role": "user", "content": "What's the capital of France?"}],
|
messages=[{"role": "user", "content": "What's the capital of France?"}],
|
||||||
model=model,
|
model=model,
|
||||||
max_completion_tokens=64,
|
max_completion_tokens=max_completion_tokens,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = chat_completion.choices[0].message.content
|
result = chat_completion.choices[0].message.content
|
||||||
print("Chat completion output:", result)
|
print("Chat completion output:\n", result)
|
||||||
|
|
||||||
|
|
||||||
# Single-image input inference
|
# Single-image input inference
|
||||||
def run_single_image(model: str) -> None:
|
def run_single_image(model: str, max_completion_tokens: int) -> None:
|
||||||
## Use image url in the payload
|
## Use image url in the payload
|
||||||
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
|
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
|
||||||
chat_completion_from_url = client.chat.completions.create(
|
chat_completion_from_url = client.chat.completions.create(
|
||||||
@ -79,11 +81,11 @@ def run_single_image(model: str) -> None:
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
model=model,
|
model=model,
|
||||||
max_completion_tokens=64,
|
max_completion_tokens=max_completion_tokens,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = chat_completion_from_url.choices[0].message.content
|
result = chat_completion_from_url.choices[0].message.content
|
||||||
print("Chat completion output from image url:", result)
|
print("Chat completion output from image url:\n", result)
|
||||||
|
|
||||||
## Use base64 encoded image in the payload
|
## Use base64 encoded image in the payload
|
||||||
image_base64 = encode_base64_content_from_url(image_url)
|
image_base64 = encode_base64_content_from_url(image_url)
|
||||||
@ -101,7 +103,7 @@ def run_single_image(model: str) -> None:
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
model=model,
|
model=model,
|
||||||
max_completion_tokens=64,
|
max_completion_tokens=max_completion_tokens,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = chat_completion_from_base64.choices[0].message.content
|
result = chat_completion_from_base64.choices[0].message.content
|
||||||
@ -109,7 +111,7 @@ def run_single_image(model: str) -> None:
|
|||||||
|
|
||||||
|
|
||||||
# Multi-image input inference
|
# Multi-image input inference
|
||||||
def run_multi_image(model: str) -> None:
|
def run_multi_image(model: str, max_completion_tokens: int) -> None:
|
||||||
image_url_duck = "https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg"
|
image_url_duck = "https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg"
|
||||||
image_url_lion = "https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg"
|
image_url_lion = "https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg"
|
||||||
chat_completion_from_url = client.chat.completions.create(
|
chat_completion_from_url = client.chat.completions.create(
|
||||||
@ -130,15 +132,15 @@ def run_multi_image(model: str) -> None:
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
model=model,
|
model=model,
|
||||||
max_completion_tokens=64,
|
max_completion_tokens=max_completion_tokens,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = chat_completion_from_url.choices[0].message.content
|
result = chat_completion_from_url.choices[0].message.content
|
||||||
print("Chat completion output:", result)
|
print("Chat completion output:\n", result)
|
||||||
|
|
||||||
|
|
||||||
# Video input inference
|
# Video input inference
|
||||||
def run_video(model: str) -> None:
|
def run_video(model: str, max_completion_tokens: int) -> None:
|
||||||
video_url = "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerFun.mp4"
|
video_url = "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerFun.mp4"
|
||||||
video_base64 = encode_base64_content_from_url(video_url)
|
video_base64 = encode_base64_content_from_url(video_url)
|
||||||
|
|
||||||
@ -157,11 +159,11 @@ def run_video(model: str) -> None:
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
model=model,
|
model=model,
|
||||||
max_completion_tokens=64,
|
max_completion_tokens=max_completion_tokens,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = chat_completion_from_url.choices[0].message.content
|
result = chat_completion_from_url.choices[0].message.content
|
||||||
print("Chat completion output from image url:", result)
|
print("Chat completion output from video url:\n", result)
|
||||||
|
|
||||||
## Use base64 encoded video in the payload
|
## Use base64 encoded video in the payload
|
||||||
chat_completion_from_base64 = client.chat.completions.create(
|
chat_completion_from_base64 = client.chat.completions.create(
|
||||||
@ -178,15 +180,15 @@ def run_video(model: str) -> None:
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
model=model,
|
model=model,
|
||||||
max_completion_tokens=64,
|
max_completion_tokens=max_completion_tokens,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = chat_completion_from_base64.choices[0].message.content
|
result = chat_completion_from_base64.choices[0].message.content
|
||||||
print("Chat completion output from base64 encoded image:", result)
|
print("Chat completion output from base64 encoded video:\n", result)
|
||||||
|
|
||||||
|
|
||||||
# Audio input inference
|
# Audio input inference
|
||||||
def run_audio(model: str) -> None:
|
def run_audio(model: str, max_completion_tokens: int) -> None:
|
||||||
from vllm.assets.audio import AudioAsset
|
from vllm.assets.audio import AudioAsset
|
||||||
|
|
||||||
audio_url = AudioAsset("winning_call").url
|
audio_url = AudioAsset("winning_call").url
|
||||||
@ -211,11 +213,11 @@ def run_audio(model: str) -> None:
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
model=model,
|
model=model,
|
||||||
max_completion_tokens=64,
|
max_completion_tokens=max_completion_tokens,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = chat_completion_from_base64.choices[0].message.content
|
result = chat_completion_from_base64.choices[0].message.content
|
||||||
print("Chat completion output from input audio:", result)
|
print("Chat completion output from input audio:\n", result)
|
||||||
|
|
||||||
# HTTP URL
|
# HTTP URL
|
||||||
chat_completion_from_url = client.chat.completions.create(
|
chat_completion_from_url = client.chat.completions.create(
|
||||||
@ -235,11 +237,11 @@ def run_audio(model: str) -> None:
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
model=model,
|
model=model,
|
||||||
max_completion_tokens=64,
|
max_completion_tokens=max_completion_tokens,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = chat_completion_from_url.choices[0].message.content
|
result = chat_completion_from_url.choices[0].message.content
|
||||||
print("Chat completion output from audio url:", result)
|
print("Chat completion output from audio url:\n", result)
|
||||||
|
|
||||||
# base64 URL
|
# base64 URL
|
||||||
chat_completion_from_base64 = client.chat.completions.create(
|
chat_completion_from_base64 = client.chat.completions.create(
|
||||||
@ -259,14 +261,14 @@ def run_audio(model: str) -> None:
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
model=model,
|
model=model,
|
||||||
max_completion_tokens=64,
|
max_completion_tokens=max_completion_tokens,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = chat_completion_from_base64.choices[0].message.content
|
result = chat_completion_from_base64.choices[0].message.content
|
||||||
print("Chat completion output from base64 encoded audio:", result)
|
print("Chat completion output from base64 encoded audio:\n", result)
|
||||||
|
|
||||||
|
|
||||||
def run_multi_audio(model: str) -> None:
|
def run_multi_audio(model: str, max_completion_tokens: int) -> None:
|
||||||
from vllm.assets.audio import AudioAsset
|
from vllm.assets.audio import AudioAsset
|
||||||
|
|
||||||
# Two different audios to showcase batched inference.
|
# Two different audios to showcase batched inference.
|
||||||
@ -300,11 +302,11 @@ def run_multi_audio(model: str) -> None:
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
model=model,
|
model=model,
|
||||||
max_completion_tokens=64,
|
max_completion_tokens=max_completion_tokens,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = chat_completion_from_base64.choices[0].message.content
|
result = chat_completion_from_base64.choices[0].message.content
|
||||||
print("Chat completion output from input audio:", result)
|
print("Chat completion output from input audio:\n", result)
|
||||||
|
|
||||||
|
|
||||||
example_function_map = {
|
example_function_map = {
|
||||||
@ -330,13 +332,20 @@ def parse_args():
|
|||||||
choices=list(example_function_map.keys()),
|
choices=list(example_function_map.keys()),
|
||||||
help="Conversation type with multimodal data.",
|
help="Conversation type with multimodal data.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--max-completion-tokens",
|
||||||
|
"-n",
|
||||||
|
type=int,
|
||||||
|
default=128,
|
||||||
|
help="Maximum number of tokens to generate for each completion.",
|
||||||
|
)
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
def main(args) -> None:
|
def main(args) -> None:
|
||||||
chat_type = args.chat_type
|
chat_type = args.chat_type
|
||||||
model = get_first_model(client)
|
model = get_first_model(client)
|
||||||
example_function_map[chat_type](model)
|
example_function_map[chat_type](model, args.max_completion_tokens)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
Reference in New Issue
Block a user