Files
vllm/tests/tool_use/test_seed_oss_tool_parser.py
2025-10-12 09:51:31 -07:00

498 lines
25 KiB
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# ruff: noqa: E501
import json
from collections.abc import Generator
import pytest
from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest,
ChatCompletionToolsParam,
DeltaMessage,
FunctionCall,
ToolCall,
)
from vllm.entrypoints.openai.tool_parsers import SeedOssToolParser
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
pytestmark = pytest.mark.cpu_test
# Use a common model that is likely to be available
MODEL = "ByteDance-Seed/Seed-OSS-36B-Instruct"
@pytest.fixture(scope="module")
def seed_oss_tokenizer():
return get_tokenizer(tokenizer_name=MODEL, trust_remote_code=True)
@pytest.fixture
def seed_oss_tool_parser(seed_oss_tokenizer):
return SeedOssToolParser(seed_oss_tokenizer)
@pytest.fixture
def sample_tools():
return [
ChatCompletionToolsParam(
type="function",
function={
"name": "get_weather",
"description": "Get current temperature for a given location.",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City and country e.g. Bogotá, Colombia",
},
"unit": {
"type": "string",
"description": "this is the unit of temperature",
},
},
"required": ["location"],
"additionalProperties": False,
},
"returns": {
"type": "object",
"properties": {
"temperature": {
"type": "number",
"description": "temperature in celsius",
}
},
"required": ["temperature"],
"additionalProperties": False,
},
"strict": True,
},
),
]
def assert_tool_calls(
actual_tool_calls: list[ToolCall], expected_tool_calls: list[ToolCall]
):
assert len(actual_tool_calls) == len(expected_tool_calls)
for actual_tool_call, expected_tool_call in zip(
actual_tool_calls, expected_tool_calls
):
# Seed-OSS tool call will not generate id
assert actual_tool_call.type == "function"
assert actual_tool_call.function == expected_tool_call.function
assert actual_tool_call.function.name == expected_tool_call.function.name
assert (
actual_tool_call.function.arguments == expected_tool_call.function.arguments
)
def test_extract_tool_calls_no_tools(seed_oss_tool_parser):
model_output = "This is a test response without any tool calls"
extracted_tool_calls = seed_oss_tool_parser.extract_tool_calls(
model_output, request=None
) # type: ignore[arg-type]
assert not extracted_tool_calls.tools_called
assert extracted_tool_calls.tool_calls == []
assert extracted_tool_calls.content == model_output
@pytest.mark.parametrize(
ids=[
"tool_call_0_thinking_budget",
"tool_call_512_thinkg_budget",
"tool_call_unlimited_thinking_budget",
],
argnames=["model_output", "expected_tool_calls", "expected_content"],
argvalues=[
(
"""<seed:tool_call>\n<function=get_weather>\n"""
"""<parameter=location>Barcelona, Spain</parameter>\n</function>\n</seed:tool_call>""",
[
ToolCall(
function=FunctionCall(
name="get_weather",
arguments=json.dumps(
{
"location": "Barcelona, Spain",
},
),
),
type="function",
)
],
None,
),
(
"""<seed:think>The user\'s current thinking budget is 512.</seed:cot_budget_reflect>\nLet me analyze the """
"""question. The user wants to know the weather in Barcelona, Spain. Looking at the functions available, """
"""there\'s a get_weather function that can retrieve the current temperature for a given location. \n\nFirst, """
"""check the parameters required by get_weather: location is mandatory (needs city and country), and unit is """
"""optional. The user provided "Barcelona Spain" as the location, which fits the required format (city, """
"""country). \n<seed:cot_budget_reflect>I have used 131 tokens, and there are 381 tokens remaining for use."""
"""</seed:cot_budget_reflect>\n Since the unit isn\'t specified, the function will default to Celsius, which """
"""is fine. \n\nThere\'s no need to ask for more information because the location is clear. So I should call """
"""the get_weather function with location set to "Barcelona, Spain" (adding a comma for clarity, though the """
"""user\'s input has a space, but the function might accept either; to be safe, using the standard format """
"""with a comma).\n<seed:cot_budget_reflect>I have used 257 tokens, and there are 255 tokens remaining for """
"""use.</seed:cot_budget_reflect>\n The unit parameter can be omitted since it\'s optional.</seed:think>\n"""
"""<seed:tool_call>\n<function=get_weather>\n<parameter=location>Barcelona, Spain</parameter>\n</function>"""
"""\n</seed:tool_call>""",
[
ToolCall(
function=FunctionCall(
name="get_weather",
arguments=json.dumps(
{
"location": "Barcelona, Spain",
},
),
),
type="function",
)
],
"""<seed:think>The user\'s current thinking budget is 512.</seed:cot_budget_reflect>\nLet me analyze the """
"""question. The user wants to know the weather in Barcelona, Spain. Looking at the functions available, """
"""there\'s a get_weather function that can retrieve the current temperature for a given location. \n\nFirst, """
"""check the parameters required by get_weather: location is mandatory (needs city and country), and unit is """
"""optional. The user provided "Barcelona Spain" as the location, which fits the required format (city, """
"""country). \n<seed:cot_budget_reflect>I have used 131 tokens, and there are 381 tokens remaining for use."""
"""</seed:cot_budget_reflect>\n Since the unit isn\'t specified, the function will default to Celsius, which """
"""is fine. \n\nThere\'s no need to ask for more information because the location is clear. So I should call """
"""the get_weather function with location set to "Barcelona, Spain" (adding a comma for clarity, though the """
"""user\'s input has a space, but the function might accept either; to be safe, using the standard format """
"""with a comma).\n<seed:cot_budget_reflect>I have used 257 tokens, and there are 255 tokens remaining for """
"""use.</seed:cot_budget_reflect>\n The unit parameter can be omitted since it\'s optional.</seed:think>\n""",
),
(
"""<seed:think>\nGot it, let\'s see. The user asked for the weather in Barcelona, Spain. """
"""First, I need to remember the function I can use: get_weather. The function requires a """
"""location (city and country) which is "Barcelona, Spain" here, and unit is optional. Since """
"""the user didn\'t specify the unit, the default in the function is Celsius, right? Wait, """
"""let me check the function docstring again. Oh, the function says unit is optional, and """
"""returns temperature in Celsius. So I should call get_weather with location "Barcelona, """
"""Spain" and maybe omit unit or set to Celsius. Let me format the function call correctly. """
"""The format is <seed:tool_call>\n<function=get_weather>\n<parameter=location>Barcelona, """
"""Spain</parameter>\n<parameter=unit>celsius</parameter>\n</function>\n</seed:tool_call>. """
"""Wait, but does the unit parameter accept "celsius"? The docstring says unit is the unit """
"""of temperature, but the return is in Celsius anyway. Maybe even if I don\'t pass unit, """
"""it\'s okay, but to be explicit, maybe pass "celsius". Let me go with that. So the function """
"""call should be as above. Then wait for the result to come back and tell the user the """
"""temperature in Celsius.</seed:think><seed:tool_call>\n<function=get_weather>\n<parameter=location>"""
"""Barcelona, Spain</parameter>\n<parameter=unit>celsius</parameter>\n</function>\n</seed:tool_call>""",
[
ToolCall(
function=FunctionCall(
name="get_weather",
arguments=json.dumps(
{
"location": "Barcelona, Spain",
"unit": "celsius",
},
),
),
type="function",
)
],
"""<seed:think>\nGot it, let\'s see. The user asked for the weather in Barcelona, Spain. """
"""First, I need to remember the function I can use: get_weather. The function requires a """
"""location (city and country) which is "Barcelona, Spain" here, and unit is optional. Since """
"""the user didn\'t specify the unit, the default in the function is Celsius, right? Wait, """
"""let me check the function docstring again. Oh, the function says unit is optional, and """
"""returns temperature in Celsius. So I should call get_weather with location "Barcelona, """
"""Spain" and maybe omit unit or set to Celsius. Let me format the function call correctly. """
"""The format is <seed:tool_call>\n<function=get_weather>\n<parameter=location>Barcelona, """
"""Spain</parameter>\n<parameter=unit>celsius</parameter>\n</function>\n</seed:tool_call>. """
"""Wait, but does the unit parameter accept "celsius"? The docstring says unit is the unit """
"""of temperature, but the return is in Celsius anyway. Maybe even if I don\'t pass unit, """
"""it\'s okay, but to be explicit, maybe pass "celsius". Let me go with that. So the function """
"""call should be as above. Then wait for the result to come back and tell the user the """
"""temperature in Celsius.</seed:think>""",
),
],
)
def test_extract_tool_calls(
seed_oss_tool_parser,
sample_tools,
model_output,
expected_tool_calls,
expected_content,
):
request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools)
extracted_tool_calls = seed_oss_tool_parser.extract_tool_calls(
model_output, request=request
) # type: ignore[arg-type]
assert extracted_tool_calls.tools_called
assert_tool_calls(extracted_tool_calls.tool_calls, expected_tool_calls)
assert extracted_tool_calls.content == expected_content
def test_streaming_tool_calls_no_tools(seed_oss_tool_parser):
model_output = "This is a test response without any tool calls"
result = seed_oss_tool_parser.extract_tool_calls_streaming(
previous_text="his is a test response",
current_text=model_output,
delta_text=" without any tool calls.",
previous_token_ids=[],
current_token_ids=[],
delta_token_ids=[],
request=None,
)
# Should return the delta text as content
assert result is not None
assert hasattr(result, "content")
assert result.content == " without any tool calls."
def stream_delta_message_generator(
seed_oss_tool_parser: SeedOssToolParser,
seed_oss_tokenizer: AnyTokenizer,
model_output: str,
request: ChatCompletionRequest | None = None,
) -> Generator[DeltaMessage, None, None]:
all_token_ids = seed_oss_tokenizer.encode(model_output, add_special_tokens=False)
previous_text = ""
previous_tokens = None
prefix_offset = 0
read_offset = 0
for i, delta_token in enumerate(all_token_ids):
delta_token_ids = [delta_token]
previous_token_ids = all_token_ids[:i]
current_token_ids = all_token_ids[: i + 1]
(new_tokens, delta_text, new_prefix_offset, new_read_offset) = (
detokenize_incrementally(
tokenizer=seed_oss_tokenizer,
all_input_ids=current_token_ids,
prev_tokens=previous_tokens,
prefix_offset=prefix_offset,
read_offset=read_offset,
skip_special_tokens=False,
spaces_between_special_tokens=True,
)
)
current_text = previous_text + delta_text
delta_message = seed_oss_tool_parser.extract_tool_calls_streaming(
previous_text,
current_text,
delta_text,
previous_token_ids,
current_token_ids,
delta_token_ids,
request=request,
)
if delta_message:
yield delta_message
previous_text = current_text
previous_tokens = (
previous_tokens + new_tokens if previous_tokens else new_tokens
)
prefix_offset = new_prefix_offset
read_offset = new_read_offset
@pytest.mark.parametrize(
ids=[
"tool_call_0_thinking_budget",
"tool_call_512_thinkg_budget",
"tool_call_unlimited_thinking_budget",
],
argnames=["model_output", "expected_tool_calls", "expected_content"],
argvalues=[
(
"""<seed:think>\n</seed:cot_budget_reflect>\n</seed:cot_budget_reflect>\n"""
"""The current thinking budget is 0, so I will directly start answering the question.\n</seed:think>\n"""
"""<seed:tool_call>\n<function=get_weather>\n"""
"""<parameter=location>Barcelona, Spain</parameter>\n</function>\n</seed:tool_call>""",
[
ToolCall(
function=FunctionCall(
name="get_weather",
arguments=json.dumps(
{
"location": "Barcelona, Spain",
},
),
),
type="function",
)
],
"""<seed:think>\n</seed:cot_budget_reflect>\n</seed:cot_budget_reflect>\n"""
"""The current thinking budget is 0, so I will directly start answering the question.\n</seed:think>\n""",
),
(
"""<seed:think>The user\'s current thinking budget is 512.</seed:cot_budget_reflect>\nLet me analyze the """
"""question. The user wants to know the weather in Barcelona, Spain. Looking at the functions available, """
"""there\'s a get_weather function that can retrieve the current temperature for a given location. \n\nFirst, """
"""check the parameters required by get_weather: location is mandatory (needs city and country), and unit is """
"""optional. The user provided "Barcelona Spain" as the location, which fits the required format (city, """
"""country). \n<seed:cot_budget_reflect>I have used 131 tokens, and there are 381 tokens remaining for use."""
"""</seed:cot_budget_reflect>\n Since the unit isn\'t specified, the function will default to Celsius, which """
"""is fine. \n\nThere\'s no need to ask for more information because the location is clear. So I should call """
"""the get_weather function with location set to "Barcelona, Spain" (adding a comma for clarity, though the """
"""user\'s input has a space, but the function might accept either; to be safe, using the standard format """
"""with a comma).\n<seed:cot_budget_reflect>I have used 257 tokens, and there are 255 tokens remaining for """
"""use.</seed:cot_budget_reflect>\n The unit parameter can be omitted since it\'s optional.</seed:think>\n"""
"""<seed:tool_call>\n<function=get_weather>\n<parameter=location>Barcelona, Spain</parameter>\n</function>"""
"""\n</seed:tool_call>""",
[
ToolCall(
function=FunctionCall(
name="get_weather",
arguments=json.dumps(
{
"location": "Barcelona, Spain",
},
),
),
type="function",
)
],
"""<seed:think>The user\'s current thinking budget is 512.</seed:cot_budget_reflect>\nLet me analyze the """
"""question. The user wants to know the weather in Barcelona, Spain. Looking at the functions available, """
"""there\'s a get_weather function that can retrieve the current temperature for a given location. \n\nFirst, """
"""check the parameters required by get_weather: location is mandatory (needs city and country), and unit is """
"""optional. The user provided "Barcelona Spain" as the location, which fits the required format (city, """
"""country). \n<seed:cot_budget_reflect>I have used 131 tokens, and there are 381 tokens remaining for use."""
"""</seed:cot_budget_reflect>\n Since the unit isn\'t specified, the function will default to Celsius, which """
"""is fine. \n\nThere\'s no need to ask for more information because the location is clear. So I should call """
"""the get_weather function with location set to "Barcelona, Spain" (adding a comma for clarity, though the """
"""user\'s input has a space, but the function might accept either; to be safe, using the standard format """
"""with a comma).\n<seed:cot_budget_reflect>I have used 257 tokens, and there are 255 tokens remaining for """
"""use.</seed:cot_budget_reflect>\n The unit parameter can be omitted since it\'s optional.</seed:think>\n""",
),
(
"""<seed:think>\nGot it, let\'s see. The user asked for the weather in Barcelona, Spain. """
"""First, I need to remember the function I can use: get_weather. The function requires a """
"""location (city and country) which is "Barcelona, Spain" here, and unit is optional. Since """
"""the user didn\'t specify the unit, the default in the function is Celsius, right? Wait, """
"""let me check the function docstring again. Oh, the function says unit is optional, and """
"""returns temperature in Celsius. So I should call get_weather with location "Barcelona, """
"""Spain" and maybe omit unit or set to Celsius. Let me format the function call correctly. """
"""The format is <seed:tool_call>\n<function=get_weather>\n<parameter=location>Barcelona, """
"""Spain</parameter>\n<parameter=unit>celsius</parameter>\n</function>\n</seed:tool_call>. """
"""Wait, but does the unit parameter accept "celsius"? The docstring says unit is the unit """
"""of temperature, but the return is in Celsius anyway. Maybe even if I don\'t pass unit, """
"""it\'s okay, but to be explicit, maybe pass "celsius". Let me go with that. So the function """
"""call should be as above. Then wait for the result to come back and tell the user the """
"""temperature in Celsius.</seed:think><seed:tool_call>\n<function=get_weather>\n<parameter=location>"""
"""Barcelona, Spain</parameter>\n<parameter=unit>celsius</parameter>\n</function>\n</seed:tool_call>""",
[
ToolCall(
function=FunctionCall(
name="get_weather",
arguments=json.dumps(
{
"location": "Barcelona, Spain",
"unit": "celsius",
},
),
),
type="function",
)
],
"""<seed:think>\nGot it, let\'s see. The user asked for the weather in Barcelona, Spain. """
"""First, I need to remember the function I can use: get_weather. The function requires a """
"""location (city and country) which is "Barcelona, Spain" here, and unit is optional. Since """
"""the user didn\'t specify the unit, the default in the function is Celsius, right? Wait, """
"""let me check the function docstring again. Oh, the function says unit is optional, and """
"""returns temperature in Celsius. So I should call get_weather with location "Barcelona, """
"""Spain" and maybe omit unit or set to Celsius. Let me format the function call correctly. """
"""The format is <seed:tool_call>\n<function=get_weather>\n<parameter=location>Barcelona, """
"""Spain</parameter>\n<parameter=unit>celsius</parameter>\n</function>\n</seed:tool_call>. """
"""Wait, but does the unit parameter accept "celsius"? The docstring says unit is the unit """
"""of temperature, but the return is in Celsius anyway. Maybe even if I don\'t pass unit, """
"""it\'s okay, but to be explicit, maybe pass "celsius". Let me go with that. So the function """
"""call should be as above. Then wait for the result to come back and tell the user the """
"""temperature in Celsius.</seed:think>""",
),
],
)
def test_streaming_tool_calls(
seed_oss_tool_parser,
seed_oss_tokenizer,
sample_tools,
model_output,
expected_tool_calls,
expected_content,
):
"""Test incremental streaming behavior"""
request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools)
other_content = ""
tool_states = {} # Track state per tool index
for delta_message in stream_delta_message_generator(
seed_oss_tool_parser, seed_oss_tokenizer, model_output, request
):
# role should never be streamed from tool parser
assert not delta_message.role
if delta_message.content:
other_content += delta_message.content
if delta_message.tool_calls:
for tool_call in delta_message.tool_calls:
idx = tool_call.index
# Initialize state for new tool
if idx not in tool_states:
tool_states[idx] = {
"id": None,
"name": None,
"arguments": "",
"type": None,
}
# First chunk should have id, name, and type
if tool_call.id:
tool_states[idx]["id"] = tool_call.id
if tool_call.type:
assert tool_call.type == "function"
tool_states[idx]["type"] = tool_call.type
if tool_call.function:
if tool_call.function.name:
# Should only be set once
assert tool_states[idx]["name"] is None
tool_states[idx]["name"] = tool_call.function.name
if tool_call.function.arguments is not None:
# Accumulate arguments incrementally
tool_states[idx]["arguments"] += tool_call.function.arguments
# Verify final content
assert other_content == expected_content
# Verify we got all expected tool calls
assert len(tool_states) == len(expected_tool_calls)
# Verify each tool call
for idx, expected_tool in enumerate(expected_tool_calls):
state = tool_states[idx]
assert state["id"] is not None
assert state["type"] == "function"
assert state["name"] == expected_tool.function.name
# Parse accumulated arguments
arguments_str = state["arguments"]
assert arguments_str is not None
actual_args = json.loads(arguments_str)
expected_args = json.loads(expected_tool.function.arguments)
assert actual_args == expected_args