From 9354660036dff11a81433f0695c71dfee75cce50 Mon Sep 17 00:00:00 2001 From: Zhikaiiii <55917203+Zhikaiiii@users.noreply.github.com> Date: Wed, 15 Oct 2025 09:50:30 +0800 Subject: [PATCH] [Bugfix]fix Qwen3 xml tool parser (#26345) Signed-off-by: Zhikaiiii <1658973216@qq.com> --- tests/tool_use/test_qwen3coder_tool_parser.py | 88 ++++++++++++- .../tool_parsers/qwen3xml_tool_parser.py | 117 ++++++++++++++---- 2 files changed, 179 insertions(+), 26 deletions(-) diff --git a/tests/tool_use/test_qwen3coder_tool_parser.py b/tests/tool_use/test_qwen3coder_tool_parser.py index b4f0989b1b..93ef1049fc 100644 --- a/tests/tool_use/test_qwen3coder_tool_parser.py +++ b/tests/tool_use/test_qwen3coder_tool_parser.py @@ -40,7 +40,7 @@ def qwen3_xml_tool_parser(qwen3_tokenizer): return Qwen3XMLToolParser(qwen3_tokenizer) -@pytest.fixture(params=["original", "xml"]) +@pytest.fixture(params=["xml"]) def qwen3_tool_parser_parametrized(qwen3_tool_parser, qwen3_xml_tool_parser, request): """Parameterized fixture that provides both parser types for testing""" if request.param == "original": @@ -664,6 +664,9 @@ def test_extract_tool_calls_streaming( # Verify we got all expected tool calls assert len(tool_states) == len(expected_tool_calls) + assert len(qwen3_tool_parser_parametrized.prev_tool_call_arr) == len( + expected_tool_calls + ) # Verify each tool call for idx, expected_tool in enumerate(expected_tool_calls): @@ -780,9 +783,10 @@ fahrenheit # Verify content was streamed assert "Let me check the weather for you:" in other_content - # Verify we got the tool call assert len(tool_states) == 1 + assert len(qwen3_tool_parser_parametrized.prev_tool_call_arr) == 1 + state = tool_states[0] assert state["id"] is not None assert state["type"] == "function" @@ -892,3 +896,83 @@ def test_extract_tool_calls_complex_type_with_single_quote( args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments) assert args["obj_param"] == {"key": "value"} + + +def test_extract_tool_calls_streaming_missing_opening_tag( + qwen3_tool_parser_parametrized, qwen3_tokenizer, sample_tools +): + """Test streaming with missing opening tag + + This tests that the streaming parser correctly handles + tool calls that start directly with + """ + model_output = """I'll check the weather for you. + + + +Dallas + + +TX + + +fahrenheit + + +""" + + request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools) + + other_content = "" + tool_states = {} + + for delta_message in stream_delta_message_generator( + qwen3_tool_parser_parametrized, qwen3_tokenizer, model_output, request + ): + if delta_message.content: + other_content += delta_message.content + + if delta_message.tool_calls: + for tool_call in delta_message.tool_calls: + idx = tool_call.index + + if idx not in tool_states: + tool_states[idx] = { + "id": None, + "name": None, + "arguments": "", + "type": None, + } + + if tool_call.id: + tool_states[idx]["id"] = tool_call.id + + if tool_call.type: + assert tool_call.type == "function" + tool_states[idx]["type"] = tool_call.type + + if tool_call.function: + if tool_call.function.name: + tool_states[idx]["name"] = tool_call.function.name + + if tool_call.function.arguments is not None: + tool_states[idx]["arguments"] += tool_call.function.arguments + + # Verify content was streamed + assert "I'll check the weather for you." in other_content + + # Verify we got the tool call + assert len(tool_states) == 1 + assert len(qwen3_tool_parser_parametrized.prev_tool_call_arr) == 1 + + state = tool_states[0] + assert state["id"] is not None + assert state["type"] == "function" + assert state["name"] == "get_current_weather" + + # Verify arguments were parsed correctly despite missing opening tag + assert state["arguments"] is not None + args = json.loads(state["arguments"]) + assert args["city"] == "Dallas" + assert args["state"] == "TX" + assert args["unit"] == "fahrenheit" diff --git a/vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py index 2c5b0b6a85..9964d1ac25 100644 --- a/vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py @@ -2,13 +2,13 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import ast import json -import uuid from collections.abc import Sequence from typing import Any from xml.parsers.expat import ParserCreate import regex as re +from vllm.entrypoints.chat_utils import make_tool_call_id from vllm.entrypoints.openai.protocol import ( ChatCompletionRequest, ChatCompletionToolsParam, @@ -375,14 +375,21 @@ class StreamingXMLToolCallParser: return buffer[: tag_end2 + 1], start_pos + tag_end2 + 1 else: # If currently not parsing tool calls (entering a tool_call), - # check if starts with + # check if starts with or if buffer == ""[: len(buffer)]: # Might be start of , wait for more data return None, start_pos + elif ( + buffer.startswith(" str | None: """Extract function name from various formats""" if attrs and "name" in attrs: @@ -1168,6 +1171,10 @@ class Qwen3XMLToolParser(ToolParser): super().__init__(tokenizer) self.parser = StreamingXMLToolCallParser() + # Add missing attributes for compatibility with serving_chat.py + self.prev_tool_call_arr: list[dict] = [] + self.streamed_args_for_tool: list[str] = [] + logger.info( "vLLM Successfully import tool parser %s !", self.__class__.__name__ ) @@ -1178,6 +1185,9 @@ class Qwen3XMLToolParser(ToolParser): request: ChatCompletionRequest, ) -> ExtractedToolCallInformation: self.parser.reset_streaming_state() + # Reset tool call tracking arrays for new extraction + self.prev_tool_call_arr = [] + self.streamed_args_for_tool = [] if request: self.parser.set_tools(request.tools) result = self.parser.parse_single_streaming_chunks(model_output) @@ -1201,6 +1211,34 @@ class Qwen3XMLToolParser(ToolParser): ), ) ) + + # Update tool call tracking arrays for compatibility + tool_index = ( + tool_call.index + if tool_call.index is not None + else len(self.prev_tool_call_arr) - 1 + ) + + # Ensure we have enough entries in our tracking arrays + while len(self.prev_tool_call_arr) <= tool_index: + self.prev_tool_call_arr.append({"name": "", "arguments": ""}) + while len(self.streamed_args_for_tool) <= tool_index: + self.streamed_args_for_tool.append("") + + # Update tool call information + self.prev_tool_call_arr[tool_index]["name"] = ( + tool_call.function.name + ) + self.prev_tool_call_arr[tool_index]["arguments"] = ( + tool_call.function.arguments + ) + + # Update streamed arguments + if tool_call.function.arguments: + self.streamed_args_for_tool[tool_index] = ( + tool_call.function.arguments + ) + return ExtractedToolCallInformation( tool_calls=tool_calls, tools_called=len(tool_calls) > 0, @@ -1219,6 +1257,9 @@ class Qwen3XMLToolParser(ToolParser): ) -> DeltaMessage | None: if not previous_text: self.parser.reset_streaming_state() + # Reset tool call tracking arrays for new streaming session + self.prev_tool_call_arr = [] + self.streamed_args_for_tool = [] if request: self.parser.set_tools(request.tools) @@ -1230,20 +1271,48 @@ class Qwen3XMLToolParser(ToolParser): open_calls = current_text.count( self.parser.tool_call_start_token ) - current_text.count(self.parser.tool_call_end_token) - if open_calls == 0 and self.parser.tool_call_index > 0: - # If current_call_id is None, use last_completed_call_id - call_id = ( - self.parser.current_call_id or self.parser.last_completed_call_id - ) - return DeltaMessage( - tool_calls=[ - DeltaToolCall( - index=self.parser.tool_call_index - 1, - id=call_id, - function=DeltaFunctionCall(arguments=""), - type="function", - ) - ] - ) + if ( + open_calls == 0 + and self.parser.tool_call_index > 0 + or not self.parser.tool_call_index + and current_text + ): + return DeltaMessage(content="") + return None - return self.parser.parse_single_streaming_chunks(delta_text) + # Parse the delta text and get the result + result = self.parser.parse_single_streaming_chunks(delta_text) + + # Update tool call tracking arrays based on incremental parsing results + if result and result.tool_calls: + for tool_call in result.tool_calls: + if tool_call.function: + tool_index = ( + tool_call.index + if tool_call.index is not None + else len(self.prev_tool_call_arr) - 1 + ) + + # Ensure we have enough entries in our tracking arrays + while len(self.prev_tool_call_arr) <= tool_index: + self.prev_tool_call_arr.append({"name": "", "arguments": ""}) + while len(self.streamed_args_for_tool) <= tool_index: + self.streamed_args_for_tool.append("") + + # Update tool name if provided + if tool_call.function.name: + self.prev_tool_call_arr[tool_index]["name"] = ( + tool_call.function.name + ) + + # Update arguments incrementally + if tool_call.function.arguments is not None: + # Concatenate the incremental arguments + # to the existing streamed arguments + self.prev_tool_call_arr[tool_index]["arguments"] += ( + tool_call.function.arguments + ) + self.streamed_args_for_tool[tool_index] += ( + tool_call.function.arguments + ) + return result