mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[BugFix] Fix tool call finish reason in streaming case (#9209)
Signed-off-by: Max de Bayser <mbayser@br.ibm.com>
This commit is contained in:
committed by
GitHub
parent
d11b46f3a5
commit
ec10cb8511
@ -538,10 +538,12 @@ class OpenAIServingChat(OpenAIServing):
|
||||
# any tokens that were generated but previously
|
||||
# matched by partial json parsing
|
||||
# only happens if we are NOT using guided decoding
|
||||
auto_tools_called = False
|
||||
if tool_parser:
|
||||
index = len(
|
||||
tool_parser.prev_tool_call_arr) - 1 if len(
|
||||
tool_parser.prev_tool_call_arr) > 0 else 0
|
||||
auto_tools_called = len(
|
||||
tool_parser.prev_tool_call_arr) > 0
|
||||
index = len(tool_parser.prev_tool_call_arr
|
||||
) - 1 if auto_tools_called else 0
|
||||
else:
|
||||
index = 0
|
||||
|
||||
@ -576,9 +578,7 @@ class OpenAIServingChat(OpenAIServing):
|
||||
delta=delta_message,
|
||||
logprobs=logprobs,
|
||||
finish_reason=output.finish_reason
|
||||
if not (tool_parser
|
||||
and len(tool_parser.prev_tool_call_arr))
|
||||
else "tool_calls",
|
||||
if not auto_tools_called else "tool_calls",
|
||||
stop_reason=output.stop_reason)
|
||||
chunk = ChatCompletionStreamResponse(
|
||||
id=request_id,
|
||||
@ -680,8 +680,10 @@ class OpenAIServingChat(OpenAIServing):
|
||||
else:
|
||||
logprobs = None
|
||||
|
||||
# by default, tools are not used.
|
||||
tools_called = False
|
||||
# In the OpenAI API the finish_reason is "tools_called"
|
||||
# if the tool choice is auto and the model produced a tool
|
||||
# call. The same is not true for named function calls
|
||||
auto_tools_called = False
|
||||
|
||||
# if auto tools are not enabled, and a named tool choice using
|
||||
# outlines is not being used
|
||||
@ -703,7 +705,6 @@ class OpenAIServingChat(OpenAIServing):
|
||||
name=request.tool_choice.function.name,
|
||||
arguments=output.text))
|
||||
])
|
||||
tools_called = True
|
||||
|
||||
# if the request doesn't use tool choice
|
||||
# OR specifies to not use a tool
|
||||
@ -725,7 +726,10 @@ class OpenAIServingChat(OpenAIServing):
|
||||
|
||||
tool_call_info = tool_parser.extract_tool_calls(
|
||||
output.text, request=request)
|
||||
tools_called = tool_call_info.tools_called
|
||||
# In the OpenAI API the finish_reason is "tools_called"
|
||||
# if the tool choice is auto and the model produced a tool
|
||||
# call. The same is not true for named function calls
|
||||
auto_tools_called = tool_call_info.tools_called
|
||||
if tool_call_info.tools_called:
|
||||
message = ChatMessage(role=role,
|
||||
content=tool_call_info.content,
|
||||
@ -748,7 +752,7 @@ class OpenAIServingChat(OpenAIServing):
|
||||
index=output.index,
|
||||
message=message,
|
||||
logprobs=logprobs,
|
||||
finish_reason="tool_calls" if tools_called else
|
||||
finish_reason="tool_calls" if auto_tools_called else
|
||||
output.finish_reason if output.finish_reason else "stop",
|
||||
stop_reason=output.stop_reason)
|
||||
choices.append(choice_data)
|
||||
|
Reference in New Issue
Block a user