vllm-project · ExtReMLapin · Sep 4, 2025 · Sep 12, 2025 · Sep 12, 2025 · Sep 12, 2025
@@ -211,11 +211,19 @@ async def test_function_tool_use(client: openai.AsyncOpenAI, model_name: str,
             })
 
         output = []
+        reasoning = []
         async for chunk in output_stream:
-            if chunk.choices and chunk.choices[0].delta.tool_calls:
-                output.extend(chunk.choices[0].delta.tool_calls)
+            if chunk.choices:
+                if enable_thinking and\
+                        getattr(chunk.choices[0].delta,\
+                            "reasoning_content", None):
+                    reasoning.append(chunk.choices[0].delta.reasoning_content)
+                if chunk.choices[0].delta.tool_calls:
+                    output.extend(chunk.choices[0].delta.tool_calls)
 
         assert len(output) > 0
+        if enable_thinking:
+            assert len(reasoning) > 0
 
 
 @pytest.fixture(scope="module")

@@ -520,8 +520,6 @@ async def chat_completion_stream_generator(
             # For reasoning parser and tool call all enabled
             added_content_delta_arr = [False] * num_choices
             reasoning_end_arr = [False] * num_choices
-        elif request.tool_choice == "required":
-            all_previous_token_ids = None
         else:
             all_previous_token_ids = None
 
@@ -807,26 +805,50 @@ async def chat_completion_stream_generator(
                         previous_text = previous_texts[i]
                         current_text = previous_text + delta_text
                         fn_name_returned = function_name_returned[i]
+                        output_token_ids = as_list(output.token_ids)
 
-                        if self.reasoning_parser:
-                            _, content = \
-                                reasoning_parser.extract_reasoning_content(
+                        if self.reasoning_parser is not None and \
+                                not reasoning_end_arr[i] and \
+                                res.prompt_token_ids and \
+                                reasoning_parser.is_reasoning_end(res.prompt_token_ids):
+                            reasoning_end_arr[i] = True
+
+                        if self.reasoning_parser and not reasoning_end_arr[i]:
+                            delta_message = (
+                                reasoning_parser.
+                                extract_reasoning_content_streaming(
+                                    previous_text,
                                     current_text,
-                                    request
-                                )
+                                    delta_text,
+                                    previous_token_ids,
+                                    current_token_ids,
+                                    output_token_ids,
+                                ))
+                            if reasoning_parser.is_reasoning_end(
+                                    output_token_ids):
+                                reasoning_end_arr[i] = True
+                                if delta_message and delta_message.content:
+                                    current_text = delta_message.content
+                                    delta_message.content = None
+                                else:
+                                    #reasoning ended
+                                    current_text = ""
                         else:
+                            #either finished reasoning or no reasoning at all
                             content = current_text
-                        delta_message, function_name_returned[i] = (
-                            self.extract_tool_call_required_streaming(
-                                previous_text=previous_text,
-                                current_text=content,
-                                delta_text=delta_text,
-                                function_name_returned=fn_name_returned,
-                                tool_call_idx=history_tool_call_cnt))
-                        if (delta_message and delta_message.tool_calls and
-                                delta_message.tool_calls[0].id is not None):
-                            history_tool_call_cnt += 1
-                            tools_streamed[i] = True
+
+                            delta_message, function_name_returned[i] = (
+                                self.extract_tool_call_required_streaming(
+                                    previous_text=previous_text,
+                                    current_text=content,
+                                    delta_text=delta_text,
+                                    function_name_returned=fn_name_returned,
+                                    tool_call_idx=history_tool_call_cnt))
+                            if (delta_message and delta_message.tool_calls
+                                    and delta_message.tool_calls[0].id
+                                    is not None):
+                                history_tool_call_cnt += 1
+                                tools_streamed[i] = True
 
                     # handle streaming deltas for tools with "auto" tool choice
                     # and reasoning parser