[Feature][Responses API] Stream Function Call - harmony (#24317)

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
Chauncey
2025-10-14 23:31:43 +08:00
committed by GitHub
parent 720394de43
commit df850c4912
2 changed files with 213 additions and 70 deletions

View File

@ -16,6 +16,22 @@ from ...utils import RemoteOpenAIServer
MODEL_NAME = "openai/gpt-oss-20b"
GET_WEATHER_SCHEMA = {
"type": "function",
"name": "get_weather",
"description": "Get current temperature for provided coordinates in celsius.", # noqa
"parameters": {
"type": "object",
"properties": {
"latitude": {"type": "number"},
"longitude": {"type": "number"},
},
"required": ["latitude", "longitude"],
"additionalProperties": False,
},
"strict": True,
}
@pytest.fixture(scope="module")
def server():
@ -305,6 +321,54 @@ async def test_streaming_types(client: OpenAI, model_name: str):
assert len(stack_of_event_types) == 0
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_function_calling_with_streaming_types(client: OpenAI, model_name: str):
# this links the "done" type with the "start" type
# so every "done" type should have a corresponding "start" type
# and every open block should be closed by the end of the stream
pairs_of_event_types = {
"response.completed": "response.created",
"response.output_item.done": "response.output_item.added",
"response.output_text.done": "response.output_text.delta",
"response.reasoning_text.done": "response.reasoning_text.delta",
"response.reasoning_part.done": "response.reasoning_part.added",
"response.function_call_arguments.done": "response.function_call_arguments.delta", # noqa
}
tools = [GET_WEATHER_SCHEMA]
input_list = [
{
"role": "user",
"content": "What's the weather like in Paris today?",
}
]
stream_response = await client.responses.create(
model=model_name,
input=input_list,
tools=tools,
stream=True,
)
stack_of_event_types = []
async for event in stream_response:
if event.type == "response.created":
stack_of_event_types.append(event.type)
elif event.type == "response.completed":
assert stack_of_event_types[-1] == pairs_of_event_types[event.type]
stack_of_event_types.pop()
if event.type.endswith("added"):
stack_of_event_types.append(event.type)
elif event.type.endswith("delta"):
if stack_of_event_types[-1] == event.type:
continue
stack_of_event_types.append(event.type)
elif event.type.endswith("done"):
assert stack_of_event_types[-1] == pairs_of_event_types[event.type]
stack_of_event_types.pop()
assert len(stack_of_event_types) == 0
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize("background", [True, False])
@ -483,23 +547,7 @@ def call_function(name, args):
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_function_calling(client: OpenAI, model_name: str):
tools = [
{
"type": "function",
"name": "get_weather",
"description": "Get current temperature for provided coordinates in celsius.", # noqa
"parameters": {
"type": "object",
"properties": {
"latitude": {"type": "number"},
"longitude": {"type": "number"},
},
"required": ["latitude", "longitude"],
"additionalProperties": False,
},
"strict": True,
}
]
tools = [GET_WEATHER_SCHEMA]
response = await client.responses.create(
model=model_name,
@ -565,21 +613,7 @@ async def test_function_calling_multi_turn(client: OpenAI, model_name: str):
},
"strict": True,
},
{
"type": "function",
"name": "get_weather",
"description": "Get current temperature for provided coordinates in celsius.", # noqa
"parameters": {
"type": "object",
"properties": {
"latitude": {"type": "number"},
"longitude": {"type": "number"},
},
"required": ["latitude", "longitude"],
"additionalProperties": False,
},
"strict": True,
},
GET_WEATHER_SCHEMA,
]
response = await client.responses.create(
@ -643,23 +677,7 @@ async def test_function_calling_multi_turn(client: OpenAI, model_name: str):
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_function_calling_required(client: OpenAI, model_name: str):
tools = [
{
"type": "function",
"name": "get_weather",
"description": "Get current temperature for provided coordinates in celsius.", # noqa
"parameters": {
"type": "object",
"properties": {
"latitude": {"type": "number"},
"longitude": {"type": "number"},
},
"required": ["latitude", "longitude"],
"additionalProperties": False,
},
"strict": True,
}
]
tools = [GET_WEATHER_SCHEMA]
with pytest.raises(BadRequestError):
await client.responses.create(
@ -689,23 +707,7 @@ async def test_system_message_with_tools(client: OpenAI, model_name: str):
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_function_calling_full_history(client: OpenAI, model_name: str):
tools = [
{
"type": "function",
"name": "get_weather",
"description": "Get current temperature for provided coordinates in celsius.", # noqa
"parameters": {
"type": "object",
"properties": {
"latitude": {"type": "number"},
"longitude": {"type": "number"},
},
"required": ["latitude", "longitude"],
"additionalProperties": False,
},
"strict": True,
}
]
tools = [GET_WEATHER_SCHEMA]
input_messages = [
{"role": "user", "content": "What's the weather like in Paris today?"}
@ -745,6 +747,74 @@ async def test_function_calling_full_history(client: OpenAI, model_name: str):
assert response_2.output_text is not None
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_function_calling_with_stream(client: OpenAI, model_name: str):
tools = [GET_WEATHER_SCHEMA]
input_list = [
{
"role": "user",
"content": "What's the weather like in Paris today?",
}
]
stream_response = await client.responses.create(
model=model_name,
input=input_list,
tools=tools,
stream=True,
)
assert stream_response is not None
final_tool_calls = {}
final_tool_calls_named = {}
async for event in stream_response:
if event.type == "response.output_item.added":
if event.item.type != "function_call":
continue
final_tool_calls[event.output_index] = event.item
final_tool_calls_named[event.item.name] = event.item
elif event.type == "response.function_call_arguments.delta":
index = event.output_index
tool_call = final_tool_calls[index]
if tool_call:
tool_call.arguments += event.delta
final_tool_calls_named[tool_call.name] = tool_call
elif event.type == "response.function_call_arguments.done":
assert event.arguments == final_tool_calls_named[event.name].arguments
for tool_call in final_tool_calls.values():
if (
tool_call
and tool_call.type == "function_call"
and tool_call.name == "get_weather"
):
args = json.loads(tool_call.arguments)
result = call_function(tool_call.name, args)
input_list += [tool_call]
break
assert result is not None
response = await client.responses.create(
model=model_name,
input=input_list
+ [
{
"type": "function_call_output",
"call_id": tool_call.call_id,
"output": str(result),
}
],
tools=tools,
stream=True,
)
assert response is not None
async for event in response:
# check that no function call events in the stream
assert event.type != "response.function_call_arguments.delta"
assert event.type != "response.function_call_arguments.done"
# check that the response contains output text
if event.type == "response.completed":
assert len(event.response.output) > 0
assert event.response.output_text is not None
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
async def test_output_messages_enabled(client: OpenAI, model_name: str, server):

View File

@ -23,6 +23,8 @@ from openai.types.responses import (
ResponseCodeInterpreterToolCallParam,
ResponseContentPartAddedEvent,
ResponseContentPartDoneEvent,
ResponseFunctionCallArgumentsDeltaEvent,
ResponseFunctionCallArgumentsDoneEvent,
ResponseFunctionToolCall,
ResponseFunctionWebSearch,
ResponseOutputItem,
@ -927,6 +929,11 @@ class OpenAIServingResponses(OpenAIServing):
# to add the tool call request to prev_outputs so that the
# parse_response_input can find the tool call request when
# parsing the tool call output.
if (
isinstance(response_msg, dict)
and response_msg.get("type") == "function_call"
):
response_msg = ResponseFunctionToolCall.model_validate(response_msg)
if isinstance(response_msg, ResponseFunctionToolCall):
prev_outputs.append(response_msg)
return messages
@ -1398,19 +1405,48 @@ class OpenAIServingResponses(OpenAIServing):
current_output_index = 0
current_item_id: str = ""
sent_output_item_added = False
is_first_function_call_delta = False
async for ctx in result_generator:
assert isinstance(ctx, StreamingHarmonyContext)
if ctx.is_expecting_start():
current_output_index += 1
sent_output_item_added = False
is_first_function_call_delta = False
if len(ctx.parser.messages) > 0:
previous_item = ctx.parser.messages[-1]
if previous_item.recipient is not None:
# Deal with tool call here
pass
# Deal with tool call
if previous_item.recipient.startswith("functions."):
function_name = previous_item.recipient[len("functions.") :]
yield _increment_sequence_number_and_return(
ResponseFunctionCallArgumentsDoneEvent(
type="response.function_call_arguments.done",
arguments=previous_item.content[0].text,
name=function_name,
item_id=current_item_id,
output_index=current_output_index,
sequence_number=-1,
)
)
function_call_item = ResponseFunctionToolCall(
type="function_call",
arguments=previous_item.content[0].text,
name=function_name,
item_id=current_item_id,
output_index=current_output_index,
sequence_number=-1,
call_id=f"fc_{random_uuid()}",
status="completed",
)
yield _increment_sequence_number_and_return(
ResponseOutputItemDoneEvent(
type="response.output_item.done",
sequence_number=-1,
output_index=current_output_index,
item=function_call_item,
)
)
elif previous_item.channel == "analysis":
content = ResponseReasoningTextContent(
text=previous_item.content[0].text,
@ -1766,6 +1802,43 @@ class OpenAIServingResponses(OpenAIServing):
),
)
)
# developer tools will be triggered on the commentary channel
# and recipient starts with "functions.TOOL_NAME"
if (
ctx.parser.current_channel == "commentary"
and ctx.parser.current_recipient
and ctx.parser.current_recipient.startswith("functions.")
):
if is_first_function_call_delta is False:
is_first_function_call_delta = True
fc_name = ctx.parser.current_recipient[len("functions.") :]
tool_call_item = ResponseFunctionToolCall(
name=fc_name,
type="function_call",
id=current_item_id,
call_id=f"call_{random_uuid()}",
arguments="",
status="in_progress",
)
current_item_id = f"fc_{random_uuid()}"
yield _increment_sequence_number_and_return(
ResponseOutputItemAddedEvent(
type="response.output_item.added",
sequence_number=-1,
output_index=current_output_index,
item=tool_call_item,
)
)
else:
yield _increment_sequence_number_and_return(
ResponseFunctionCallArgumentsDeltaEvent(
item_id=current_item_id,
delta=ctx.parser.last_content_delta,
output_index=current_output_index,
sequence_number=-1,
type="response.function_call_arguments.delta",
)
)
async def responses_stream_generator(
self,