mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
[Feature][Responses API] Stream Function Call - harmony (#24317)
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
@ -16,6 +16,22 @@ from ...utils import RemoteOpenAIServer
|
||||
|
||||
MODEL_NAME = "openai/gpt-oss-20b"
|
||||
|
||||
GET_WEATHER_SCHEMA = {
|
||||
"type": "function",
|
||||
"name": "get_weather",
|
||||
"description": "Get current temperature for provided coordinates in celsius.", # noqa
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"latitude": {"type": "number"},
|
||||
"longitude": {"type": "number"},
|
||||
},
|
||||
"required": ["latitude", "longitude"],
|
||||
"additionalProperties": False,
|
||||
},
|
||||
"strict": True,
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def server():
|
||||
@ -305,6 +321,54 @@ async def test_streaming_types(client: OpenAI, model_name: str):
|
||||
assert len(stack_of_event_types) == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_function_calling_with_streaming_types(client: OpenAI, model_name: str):
|
||||
# this links the "done" type with the "start" type
|
||||
# so every "done" type should have a corresponding "start" type
|
||||
# and every open block should be closed by the end of the stream
|
||||
pairs_of_event_types = {
|
||||
"response.completed": "response.created",
|
||||
"response.output_item.done": "response.output_item.added",
|
||||
"response.output_text.done": "response.output_text.delta",
|
||||
"response.reasoning_text.done": "response.reasoning_text.delta",
|
||||
"response.reasoning_part.done": "response.reasoning_part.added",
|
||||
"response.function_call_arguments.done": "response.function_call_arguments.delta", # noqa
|
||||
}
|
||||
|
||||
tools = [GET_WEATHER_SCHEMA]
|
||||
input_list = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather like in Paris today?",
|
||||
}
|
||||
]
|
||||
stream_response = await client.responses.create(
|
||||
model=model_name,
|
||||
input=input_list,
|
||||
tools=tools,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
stack_of_event_types = []
|
||||
async for event in stream_response:
|
||||
if event.type == "response.created":
|
||||
stack_of_event_types.append(event.type)
|
||||
elif event.type == "response.completed":
|
||||
assert stack_of_event_types[-1] == pairs_of_event_types[event.type]
|
||||
stack_of_event_types.pop()
|
||||
if event.type.endswith("added"):
|
||||
stack_of_event_types.append(event.type)
|
||||
elif event.type.endswith("delta"):
|
||||
if stack_of_event_types[-1] == event.type:
|
||||
continue
|
||||
stack_of_event_types.append(event.type)
|
||||
elif event.type.endswith("done"):
|
||||
assert stack_of_event_types[-1] == pairs_of_event_types[event.type]
|
||||
stack_of_event_types.pop()
|
||||
assert len(stack_of_event_types) == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
@pytest.mark.parametrize("background", [True, False])
|
||||
@ -483,23 +547,7 @@ def call_function(name, args):
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_function_calling(client: OpenAI, model_name: str):
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"name": "get_weather",
|
||||
"description": "Get current temperature for provided coordinates in celsius.", # noqa
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"latitude": {"type": "number"},
|
||||
"longitude": {"type": "number"},
|
||||
},
|
||||
"required": ["latitude", "longitude"],
|
||||
"additionalProperties": False,
|
||||
},
|
||||
"strict": True,
|
||||
}
|
||||
]
|
||||
tools = [GET_WEATHER_SCHEMA]
|
||||
|
||||
response = await client.responses.create(
|
||||
model=model_name,
|
||||
@ -565,21 +613,7 @@ async def test_function_calling_multi_turn(client: OpenAI, model_name: str):
|
||||
},
|
||||
"strict": True,
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"name": "get_weather",
|
||||
"description": "Get current temperature for provided coordinates in celsius.", # noqa
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"latitude": {"type": "number"},
|
||||
"longitude": {"type": "number"},
|
||||
},
|
||||
"required": ["latitude", "longitude"],
|
||||
"additionalProperties": False,
|
||||
},
|
||||
"strict": True,
|
||||
},
|
||||
GET_WEATHER_SCHEMA,
|
||||
]
|
||||
|
||||
response = await client.responses.create(
|
||||
@ -643,23 +677,7 @@ async def test_function_calling_multi_turn(client: OpenAI, model_name: str):
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_function_calling_required(client: OpenAI, model_name: str):
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"name": "get_weather",
|
||||
"description": "Get current temperature for provided coordinates in celsius.", # noqa
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"latitude": {"type": "number"},
|
||||
"longitude": {"type": "number"},
|
||||
},
|
||||
"required": ["latitude", "longitude"],
|
||||
"additionalProperties": False,
|
||||
},
|
||||
"strict": True,
|
||||
}
|
||||
]
|
||||
tools = [GET_WEATHER_SCHEMA]
|
||||
|
||||
with pytest.raises(BadRequestError):
|
||||
await client.responses.create(
|
||||
@ -689,23 +707,7 @@ async def test_system_message_with_tools(client: OpenAI, model_name: str):
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_function_calling_full_history(client: OpenAI, model_name: str):
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"name": "get_weather",
|
||||
"description": "Get current temperature for provided coordinates in celsius.", # noqa
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"latitude": {"type": "number"},
|
||||
"longitude": {"type": "number"},
|
||||
},
|
||||
"required": ["latitude", "longitude"],
|
||||
"additionalProperties": False,
|
||||
},
|
||||
"strict": True,
|
||||
}
|
||||
]
|
||||
tools = [GET_WEATHER_SCHEMA]
|
||||
|
||||
input_messages = [
|
||||
{"role": "user", "content": "What's the weather like in Paris today?"}
|
||||
@ -745,6 +747,74 @@ async def test_function_calling_full_history(client: OpenAI, model_name: str):
|
||||
assert response_2.output_text is not None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_function_calling_with_stream(client: OpenAI, model_name: str):
|
||||
tools = [GET_WEATHER_SCHEMA]
|
||||
input_list = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather like in Paris today?",
|
||||
}
|
||||
]
|
||||
stream_response = await client.responses.create(
|
||||
model=model_name,
|
||||
input=input_list,
|
||||
tools=tools,
|
||||
stream=True,
|
||||
)
|
||||
assert stream_response is not None
|
||||
final_tool_calls = {}
|
||||
final_tool_calls_named = {}
|
||||
async for event in stream_response:
|
||||
if event.type == "response.output_item.added":
|
||||
if event.item.type != "function_call":
|
||||
continue
|
||||
final_tool_calls[event.output_index] = event.item
|
||||
final_tool_calls_named[event.item.name] = event.item
|
||||
elif event.type == "response.function_call_arguments.delta":
|
||||
index = event.output_index
|
||||
tool_call = final_tool_calls[index]
|
||||
if tool_call:
|
||||
tool_call.arguments += event.delta
|
||||
final_tool_calls_named[tool_call.name] = tool_call
|
||||
elif event.type == "response.function_call_arguments.done":
|
||||
assert event.arguments == final_tool_calls_named[event.name].arguments
|
||||
for tool_call in final_tool_calls.values():
|
||||
if (
|
||||
tool_call
|
||||
and tool_call.type == "function_call"
|
||||
and tool_call.name == "get_weather"
|
||||
):
|
||||
args = json.loads(tool_call.arguments)
|
||||
result = call_function(tool_call.name, args)
|
||||
input_list += [tool_call]
|
||||
break
|
||||
assert result is not None
|
||||
response = await client.responses.create(
|
||||
model=model_name,
|
||||
input=input_list
|
||||
+ [
|
||||
{
|
||||
"type": "function_call_output",
|
||||
"call_id": tool_call.call_id,
|
||||
"output": str(result),
|
||||
}
|
||||
],
|
||||
tools=tools,
|
||||
stream=True,
|
||||
)
|
||||
assert response is not None
|
||||
async for event in response:
|
||||
# check that no function call events in the stream
|
||||
assert event.type != "response.function_call_arguments.delta"
|
||||
assert event.type != "response.function_call_arguments.done"
|
||||
# check that the response contains output text
|
||||
if event.type == "response.completed":
|
||||
assert len(event.response.output) > 0
|
||||
assert event.response.output_text is not None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
async def test_output_messages_enabled(client: OpenAI, model_name: str, server):
|
||||
|
@ -23,6 +23,8 @@ from openai.types.responses import (
|
||||
ResponseCodeInterpreterToolCallParam,
|
||||
ResponseContentPartAddedEvent,
|
||||
ResponseContentPartDoneEvent,
|
||||
ResponseFunctionCallArgumentsDeltaEvent,
|
||||
ResponseFunctionCallArgumentsDoneEvent,
|
||||
ResponseFunctionToolCall,
|
||||
ResponseFunctionWebSearch,
|
||||
ResponseOutputItem,
|
||||
@ -927,6 +929,11 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
# to add the tool call request to prev_outputs so that the
|
||||
# parse_response_input can find the tool call request when
|
||||
# parsing the tool call output.
|
||||
if (
|
||||
isinstance(response_msg, dict)
|
||||
and response_msg.get("type") == "function_call"
|
||||
):
|
||||
response_msg = ResponseFunctionToolCall.model_validate(response_msg)
|
||||
if isinstance(response_msg, ResponseFunctionToolCall):
|
||||
prev_outputs.append(response_msg)
|
||||
return messages
|
||||
@ -1398,19 +1405,48 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
current_output_index = 0
|
||||
current_item_id: str = ""
|
||||
sent_output_item_added = False
|
||||
|
||||
is_first_function_call_delta = False
|
||||
async for ctx in result_generator:
|
||||
assert isinstance(ctx, StreamingHarmonyContext)
|
||||
|
||||
if ctx.is_expecting_start():
|
||||
current_output_index += 1
|
||||
sent_output_item_added = False
|
||||
|
||||
is_first_function_call_delta = False
|
||||
if len(ctx.parser.messages) > 0:
|
||||
previous_item = ctx.parser.messages[-1]
|
||||
if previous_item.recipient is not None:
|
||||
# Deal with tool call here
|
||||
pass
|
||||
# Deal with tool call
|
||||
if previous_item.recipient.startswith("functions."):
|
||||
function_name = previous_item.recipient[len("functions.") :]
|
||||
yield _increment_sequence_number_and_return(
|
||||
ResponseFunctionCallArgumentsDoneEvent(
|
||||
type="response.function_call_arguments.done",
|
||||
arguments=previous_item.content[0].text,
|
||||
name=function_name,
|
||||
item_id=current_item_id,
|
||||
output_index=current_output_index,
|
||||
sequence_number=-1,
|
||||
)
|
||||
)
|
||||
function_call_item = ResponseFunctionToolCall(
|
||||
type="function_call",
|
||||
arguments=previous_item.content[0].text,
|
||||
name=function_name,
|
||||
item_id=current_item_id,
|
||||
output_index=current_output_index,
|
||||
sequence_number=-1,
|
||||
call_id=f"fc_{random_uuid()}",
|
||||
status="completed",
|
||||
)
|
||||
yield _increment_sequence_number_and_return(
|
||||
ResponseOutputItemDoneEvent(
|
||||
type="response.output_item.done",
|
||||
sequence_number=-1,
|
||||
output_index=current_output_index,
|
||||
item=function_call_item,
|
||||
)
|
||||
)
|
||||
elif previous_item.channel == "analysis":
|
||||
content = ResponseReasoningTextContent(
|
||||
text=previous_item.content[0].text,
|
||||
@ -1766,6 +1802,43 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
),
|
||||
)
|
||||
)
|
||||
# developer tools will be triggered on the commentary channel
|
||||
# and recipient starts with "functions.TOOL_NAME"
|
||||
if (
|
||||
ctx.parser.current_channel == "commentary"
|
||||
and ctx.parser.current_recipient
|
||||
and ctx.parser.current_recipient.startswith("functions.")
|
||||
):
|
||||
if is_first_function_call_delta is False:
|
||||
is_first_function_call_delta = True
|
||||
fc_name = ctx.parser.current_recipient[len("functions.") :]
|
||||
tool_call_item = ResponseFunctionToolCall(
|
||||
name=fc_name,
|
||||
type="function_call",
|
||||
id=current_item_id,
|
||||
call_id=f"call_{random_uuid()}",
|
||||
arguments="",
|
||||
status="in_progress",
|
||||
)
|
||||
current_item_id = f"fc_{random_uuid()}"
|
||||
yield _increment_sequence_number_and_return(
|
||||
ResponseOutputItemAddedEvent(
|
||||
type="response.output_item.added",
|
||||
sequence_number=-1,
|
||||
output_index=current_output_index,
|
||||
item=tool_call_item,
|
||||
)
|
||||
)
|
||||
else:
|
||||
yield _increment_sequence_number_and_return(
|
||||
ResponseFunctionCallArgumentsDeltaEvent(
|
||||
item_id=current_item_id,
|
||||
delta=ctx.parser.last_content_delta,
|
||||
output_index=current_output_index,
|
||||
sequence_number=-1,
|
||||
type="response.function_call_arguments.delta",
|
||||
)
|
||||
)
|
||||
|
||||
async def responses_stream_generator(
|
||||
self,
|
||||
|
Reference in New Issue
Block a user