diff --git a/tests/reasoning/test_deepseekr1_reasoning_parser.py b/tests/reasoning/test_deepseekr1_reasoning_parser.py index 7b6af183a86a..1b669c8fd2fb 100644 --- a/tests/reasoning/test_deepseekr1_reasoning_parser.py +++ b/tests/reasoning/test_deepseekr1_reasoning_parser.py @@ -90,6 +90,40 @@ def deepseek_r1_qwen_tokenizer(): "content": "This is the rest", "is_reasoning_end": True, } +THINK_NO_END = { + "output": "This is a reasoning section", + "reasoning_content": "This is a reasoning section", + "content": None, + "is_reasoning_end": False, +} +EMPTY = { + "output": "", + "reasoning_content": "", + "content": None, + "is_reasoning_end": False, +} +EMPTY_STREAMING = { + "output": "", + "reasoning_content": None, + "content": None, + "is_reasoning_end": False, +} +NEW_LINE = { + "output": "\nThis is a reasoning section\nThis is the rest", + "reasoning_content": "This is a reasoning section", + "content": "\nThis is the rest", + "is_reasoning_end": True, +} +# Streaming cannot handle new lines at the beginning of the output +# because we need to support ... and ... +# We cannot know if the text before is reasoning content +# or not. +NEW_LINE_STREAMING = { + "output": "\nThis is a reasoning section\nThis is the rest", + "reasoning_content": "\nThis is a reasoning section", + "content": "\nThis is the rest", + "is_reasoning_end": True, +} TEST_CASES = [ pytest.param( @@ -182,6 +216,36 @@ def deepseek_r1_qwen_tokenizer(): SHORTEST_REASONING_WITH_THINK, id="shortest_with_think_streaming", ), + pytest.param( + False, + THINK_NO_END, + id="think_no_end", + ), + pytest.param( + True, + THINK_NO_END, + id="think_no_end_streaming", + ), + pytest.param( + False, + EMPTY, + id="empty", + ), + pytest.param( + True, + EMPTY_STREAMING, + id="empty_streaming", + ), + pytest.param( + False, + NEW_LINE, + id="new_line", + ), + pytest.param( + True, + NEW_LINE_STREAMING, + id="new_line_streaming", + ), ] diff --git a/vllm/reasoning/deepseek_r1_reasoning_parser.py b/vllm/reasoning/deepseek_r1_reasoning_parser.py index 73be6d4d1ab1..1c283c092a28 100644 --- a/vllm/reasoning/deepseek_r1_reasoning_parser.py +++ b/vllm/reasoning/deepseek_r1_reasoning_parser.py @@ -1,6 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 -import re from collections.abc import Sequence from typing import Optional, Union @@ -32,9 +31,6 @@ class DeepSeekR1ReasoningParser(ReasoningParser): def __init__(self, tokenizer: PreTrainedTokenizerBase): super().__init__(tokenizer) - self.reasoning_regex = re.compile( - rf"{self.start_token}(.*?){self.end_token}", re.DOTALL) - if not self.model_tokenizer: raise ValueError( "The model tokenizer must be passed to the ReasoningParser " @@ -143,23 +139,34 @@ def extract_reasoning_content_streaming( def extract_reasoning_content( self, model_output: str, request: ChatCompletionRequest ) -> tuple[Optional[str], Optional[str]]: + """ + Extract reasoning content from the model output. + + For text abcxyz: + - 'abc' goes to reasoning_content + - 'xyz' goes to content + + Returns: + tuple[Optional[str], Optional[str]]: reasoning content and content + """ + + # Check if the start token is present in the model output, remove it + # if it is present. + model_output_parts = model_output.partition(self.start_token) + model_output = model_output_parts[2] if model_output_parts[ + 1] else model_output_parts[0] + # DeepSeek R1 doesn't generate now. # Thus we assume the reasoning content is always at the start. # Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f if self.end_token not in model_output: return model_output, None else: - # Add a start token if it's missing to keep compatibility. - if self.start_token not in model_output: - model_output = f"{self.start_token}{model_output}" - # Use a regex to find the reasoning content - reasoning_content = self.reasoning_regex.findall(model_output)[0] - - end_index = len( - f"{self.start_token}{reasoning_content}{self.end_token}") - final_output = model_output[end_index:] - - if len(final_output) == 0: - return reasoning_content, None - - return reasoning_content, final_output + reasoning_content, _, content = model_output.partition( + self.end_token) + # If the end token is not found, return the model output as is. + # It should not happen since we already checked for the presence + # of the end token. + # If generation stops right after end-of-think, return null content + final_content = content or None + return reasoning_content, final_content