mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Bugfix][Frontend] Eliminate regex based check in reasoning full generator (#14821)
Signed-off-by: Ce Gao <cegao@tensorchord.ai>
This commit is contained in:
@ -90,6 +90,40 @@ SHORTEST_REASONING_WITH_THINK = {
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
THINK_NO_END = {
|
||||
"output": "<think>This is a reasoning section",
|
||||
"reasoning_content": "This is a reasoning section",
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
EMPTY = {
|
||||
"output": "",
|
||||
"reasoning_content": "",
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
EMPTY_STREAMING = {
|
||||
"output": "",
|
||||
"reasoning_content": None,
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
NEW_LINE = {
|
||||
"output": "\n<think>This is a reasoning section</think>\nThis is the rest",
|
||||
"reasoning_content": "This is a reasoning section",
|
||||
"content": "\nThis is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
# Streaming cannot handle new lines at the beginning of the output
|
||||
# because we need to support <think>...</think> and </think>...
|
||||
# We cannot know if the text before <think> is reasoning content
|
||||
# or not.
|
||||
NEW_LINE_STREAMING = {
|
||||
"output": "\n<think>This is a reasoning section</think>\nThis is the rest",
|
||||
"reasoning_content": "\nThis is a reasoning section",
|
||||
"content": "\nThis is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
TEST_CASES = [
|
||||
pytest.param(
|
||||
@ -182,6 +216,36 @@ TEST_CASES = [
|
||||
SHORTEST_REASONING_WITH_THINK,
|
||||
id="shortest_with_think_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
THINK_NO_END,
|
||||
id="think_no_end",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
THINK_NO_END,
|
||||
id="think_no_end_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
EMPTY,
|
||||
id="empty",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
EMPTY_STREAMING,
|
||||
id="empty_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
NEW_LINE,
|
||||
id="new_line",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
NEW_LINE_STREAMING,
|
||||
id="new_line_streaming",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
|
@ -1,6 +1,5 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import re
|
||||
from collections.abc import Sequence
|
||||
from typing import Optional, Union
|
||||
|
||||
@ -32,9 +31,6 @@ class DeepSeekR1ReasoningParser(ReasoningParser):
|
||||
def __init__(self, tokenizer: PreTrainedTokenizerBase):
|
||||
super().__init__(tokenizer)
|
||||
|
||||
self.reasoning_regex = re.compile(
|
||||
rf"{self.start_token}(.*?){self.end_token}", re.DOTALL)
|
||||
|
||||
if not self.model_tokenizer:
|
||||
raise ValueError(
|
||||
"The model tokenizer must be passed to the ReasoningParser "
|
||||
@ -143,23 +139,34 @@ class DeepSeekR1ReasoningParser(ReasoningParser):
|
||||
def extract_reasoning_content(
|
||||
self, model_output: str, request: ChatCompletionRequest
|
||||
) -> tuple[Optional[str], Optional[str]]:
|
||||
"""
|
||||
Extract reasoning content from the model output.
|
||||
|
||||
For text <think>abc</think>xyz:
|
||||
- 'abc' goes to reasoning_content
|
||||
- 'xyz' goes to content
|
||||
|
||||
Returns:
|
||||
tuple[Optional[str], Optional[str]]: reasoning content and content
|
||||
"""
|
||||
|
||||
# Check if the start token is present in the model output, remove it
|
||||
# if it is present.
|
||||
model_output_parts = model_output.partition(self.start_token)
|
||||
model_output = model_output_parts[2] if model_output_parts[
|
||||
1] else model_output_parts[0]
|
||||
|
||||
# DeepSeek R1 doesn't generate <think> now.
|
||||
# Thus we assume the reasoning content is always at the start.
|
||||
# Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f
|
||||
if self.end_token not in model_output:
|
||||
return model_output, None
|
||||
else:
|
||||
# Add a start token if it's missing to keep compatibility.
|
||||
if self.start_token not in model_output:
|
||||
model_output = f"{self.start_token}{model_output}"
|
||||
# Use a regex to find the reasoning content
|
||||
reasoning_content = self.reasoning_regex.findall(model_output)[0]
|
||||
|
||||
end_index = len(
|
||||
f"{self.start_token}{reasoning_content}{self.end_token}")
|
||||
final_output = model_output[end_index:]
|
||||
|
||||
if len(final_output) == 0:
|
||||
return reasoning_content, None
|
||||
|
||||
return reasoning_content, final_output
|
||||
reasoning_content, _, content = model_output.partition(
|
||||
self.end_token)
|
||||
# If the end token is not found, return the model output as is.
|
||||
# It should not happen since we already checked for the presence
|
||||
# of the end token.
|
||||
# If generation stops right after end-of-think, return null content
|
||||
final_content = content or None
|
||||
return reasoning_content, final_content
|
||||
|
Reference in New Issue
Block a user