diff --git a/examples/reasoning_content/__init__.py b/examples/reasoning_content/__init__.py new file mode 100644 index 000000000..f24b2606d --- /dev/null +++ b/examples/reasoning_content/__init__.py @@ -0,0 +1,3 @@ +""" +Examples demonstrating how to use models that provide reasoning content. +""" diff --git a/examples/reasoning_content/main.py b/examples/reasoning_content/main.py new file mode 100644 index 000000000..54c9155ec --- /dev/null +++ b/examples/reasoning_content/main.py @@ -0,0 +1,123 @@ +""" +Example demonstrating how to use the reasoning content feature with models that support it. + +Some models, like deepseek-reasoner, provide a reasoning_content field in addition to the regular content. +This example shows how to access and use this reasoning content from both streaming and non-streaming responses. + +To run this example, you need to: +1. Set your OPENAI_API_KEY environment variable +2. Use a model that supports reasoning content (e.g., deepseek-reasoner) +""" + +import asyncio +from typing import Any, cast + +from agents import ModelSettings +from agents.models.interface import ModelTracing +from agents.models.openai_provider import OpenAIProvider +from agents.types import ResponseOutputRefusal, ResponseOutputText # type: ignore + +# Replace this with a model that supports reasoning content (e.g., deepseek-reasoner) +# For demonstration purposes, we'll use a placeholder model name +MODEL_NAME = "deepseek-reasoner" + + +async def stream_with_reasoning_content(): + """ + Example of streaming a response from a model that provides reasoning content. + The reasoning content will be emitted as separate events. + """ + provider = OpenAIProvider() + model = provider.get_model(MODEL_NAME) + + print("\n=== Streaming Example ===") + print("Prompt: Write a haiku about recursion in programming") + + reasoning_content = "" + regular_content = "" + + async for event in model.stream_response( + system_instructions="You are a helpful assistant that writes creative content.", + input="Write a haiku about recursion in programming", + model_settings=ModelSettings(), + tools=[], + output_schema=None, + handoffs=[], + tracing=ModelTracing.DISABLED, + previous_response_id=None, + ): + if event.type == "response.reasoning_summary_text.delta": + print( + f"\033[33m{event.delta}\033[0m", end="", flush=True + ) # Yellow for reasoning content + reasoning_content += event.delta + elif event.type == "response.output_text.delta": + print(f"\033[32m{event.delta}\033[0m", end="", flush=True) # Green for regular content + regular_content += event.delta + + print("\n\nReasoning Content:") + print(reasoning_content) + print("\nRegular Content:") + print(regular_content) + print("\n") + + +async def get_response_with_reasoning_content(): + """ + Example of getting a complete response from a model that provides reasoning content. + The reasoning content will be available as a separate item in the response. + """ + provider = OpenAIProvider() + model = provider.get_model(MODEL_NAME) + + print("\n=== Non-streaming Example ===") + print("Prompt: Explain the concept of recursion in programming") + + response = await model.get_response( + system_instructions="You are a helpful assistant that explains technical concepts clearly.", + input="Explain the concept of recursion in programming", + model_settings=ModelSettings(), + tools=[], + output_schema=None, + handoffs=[], + tracing=ModelTracing.DISABLED, + previous_response_id=None, + ) + + # Extract reasoning content and regular content from the response + reasoning_content = None + regular_content = None + + for item in response.output: + if hasattr(item, "type") and item.type == "reasoning": + reasoning_content = item.summary[0].text + elif hasattr(item, "type") and item.type == "message": + if item.content and len(item.content) > 0: + content_item = item.content[0] + if isinstance(content_item, ResponseOutputText): + regular_content = content_item.text + elif isinstance(content_item, ResponseOutputRefusal): + refusal_item = cast(Any, content_item) + regular_content = refusal_item.refusal + + print("\nReasoning Content:") + print(reasoning_content or "No reasoning content provided") + + print("\nRegular Content:") + print(regular_content or "No regular content provided") + + print("\n") + + +async def main(): + try: + await stream_with_reasoning_content() + await get_response_with_reasoning_content() + except Exception as e: + print(f"Error: {e}") + print("\nNote: This example requires a model that supports reasoning content.") + print("You may need to use a specific model like deepseek-reasoner or similar.") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/reasoning_content/runner_example.py b/examples/reasoning_content/runner_example.py new file mode 100644 index 000000000..723235fc9 --- /dev/null +++ b/examples/reasoning_content/runner_example.py @@ -0,0 +1,89 @@ +""" +Example demonstrating how to use the reasoning content feature with the Runner API. + +This example shows how to extract and use reasoning content from responses when using +the Runner API, which is the most common way users interact with the Agents library. + +To run this example, you need to: +1. Set your OPENAI_API_KEY environment variable +2. Use a model that supports reasoning content (e.g., deepseek-reasoner) +""" + +import asyncio +from typing import Any + +from agents import Agent, Runner, trace +from agents.items import ReasoningItem + +# Replace this with a model that supports reasoning content (e.g., deepseek-reasoner) +# For demonstration purposes, we'll use a placeholder model name +MODEL_NAME = "deepseek-reasoner" + + +async def main(): + print(f"Using model: {MODEL_NAME}") + + # Create an agent with a model that supports reasoning content + agent = Agent( + name="Reasoning Agent", + instructions="You are a helpful assistant that explains your reasoning step by step.", + model=MODEL_NAME, + ) + + # Example 1: Non-streaming response + with trace("Reasoning Content - Non-streaming"): + print("\n=== Example 1: Non-streaming response ===") + result = await Runner.run( + agent, "What is the square root of 841? Please explain your reasoning." + ) + + # Extract reasoning content from the result items + reasoning_content = None + # RunResult has 'response' attribute which has 'output' attribute + for item in result.response.output: # type: ignore + if isinstance(item, ReasoningItem): + reasoning_content = item.summary[0].text # type: ignore + break + + print("\nReasoning Content:") + print(reasoning_content or "No reasoning content provided") + + print("\nFinal Output:") + print(result.final_output) + + # Example 2: Streaming response + with trace("Reasoning Content - Streaming"): + print("\n=== Example 2: Streaming response ===") + print("\nStreaming response:") + + # Buffers to collect reasoning and regular content + reasoning_buffer = "" + content_buffer = "" + + # RunResultStreaming is async iterable + stream = Runner.run_streamed(agent, "What is 15 × 27? Please explain your reasoning.") + + async for event in stream: # type: ignore + if isinstance(event, ReasoningItem): + # This is reasoning content + reasoning_item: Any = event + reasoning_buffer += reasoning_item.summary[0].text + print( + f"\033[33m{reasoning_item.summary[0].text}\033[0m", end="", flush=True + ) # Yellow for reasoning + elif hasattr(event, "text"): + # This is regular content + content_buffer += event.text + print( + f"\033[32m{event.text}\033[0m", end="", flush=True + ) # Green for regular content + + print("\n\nCollected Reasoning Content:") + print(reasoning_buffer) + + print("\nCollected Final Answer:") + print(content_buffer) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/src/agents/models/chatcmpl_converter.py b/src/agents/models/chatcmpl_converter.py index 1d599e8c0..25d9f083d 100644 --- a/src/agents/models/chatcmpl_converter.py +++ b/src/agents/models/chatcmpl_converter.py @@ -33,8 +33,10 @@ ResponseOutputMessageParam, ResponseOutputRefusal, ResponseOutputText, + ResponseReasoningItem, ) from openai.types.responses.response_input_param import FunctionCallOutput, ItemReference, Message +from openai.types.responses.response_reasoning_item import Summary from ..agent_output import AgentOutputSchemaBase from ..exceptions import AgentsException, UserError @@ -85,6 +87,16 @@ def convert_response_format( def message_to_output_items(cls, message: ChatCompletionMessage) -> list[TResponseOutputItem]: items: list[TResponseOutputItem] = [] + # Handle reasoning content if available + if hasattr(message, "reasoning_content") and message.reasoning_content: + items.append( + ResponseReasoningItem( + id=FAKE_RESPONSES_ID, + summary=[Summary(text=message.reasoning_content, type="summary_text")], + type="reasoning", + ) + ) + message_item = ResponseOutputMessage( id=FAKE_RESPONSES_ID, content=[], diff --git a/src/agents/models/chatcmpl_stream_handler.py b/src/agents/models/chatcmpl_stream_handler.py index d18f5912a..0cf1e6a3e 100644 --- a/src/agents/models/chatcmpl_stream_handler.py +++ b/src/agents/models/chatcmpl_stream_handler.py @@ -20,21 +20,38 @@ ResponseOutputMessage, ResponseOutputRefusal, ResponseOutputText, + ResponseReasoningItem, + ResponseReasoningSummaryPartAddedEvent, + ResponseReasoningSummaryPartDoneEvent, + ResponseReasoningSummaryTextDeltaEvent, ResponseRefusalDeltaEvent, ResponseTextDeltaEvent, ResponseUsage, ) +from openai.types.responses.response_reasoning_item import Summary +from openai.types.responses.response_reasoning_summary_part_added_event import ( + Part as AddedEventPart, +) +from openai.types.responses.response_reasoning_summary_part_done_event import Part as DoneEventPart from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails from ..items import TResponseStreamEvent from .fake_id import FAKE_RESPONSES_ID +# Define a Part class for internal use +class Part: + def __init__(self, text: str, type: str): + self.text = text + self.type = type + + @dataclass class StreamingState: started: bool = False text_content_index_and_output: tuple[int, ResponseOutputText] | None = None refusal_content_index_and_output: tuple[int, ResponseOutputRefusal] | None = None + reasoning_content_index_and_output: tuple[int, ResponseReasoningItem] | None = None function_calls: dict[int, ResponseFunctionToolCall] = field(default_factory=dict) @@ -75,12 +92,65 @@ async def handle_stream( delta = chunk.choices[0].delta - # Handle text - if delta.content: + # Handle reasoning content + if hasattr(delta, "reasoning_content"): + reasoning_content = delta.reasoning_content + if reasoning_content and not state.reasoning_content_index_and_output: + state.reasoning_content_index_and_output = ( + 0, + ResponseReasoningItem( + id=FAKE_RESPONSES_ID, + summary=[Summary(text="", type="summary_text")], + type="reasoning", + ), + ) + yield ResponseOutputItemAddedEvent( + item=ResponseReasoningItem( + id=FAKE_RESPONSES_ID, + summary=[Summary(text="", type="summary_text")], + type="reasoning", + ), + output_index=0, + type="response.output_item.added", + sequence_number=sequence_number.get_and_increment(), + ) + + yield ResponseReasoningSummaryPartAddedEvent( + item_id=FAKE_RESPONSES_ID, + output_index=0, + summary_index=0, + part=AddedEventPart(text="", type="summary_text"), + type="response.reasoning_summary_part.added", + sequence_number=sequence_number.get_and_increment(), + ) + + if reasoning_content and state.reasoning_content_index_and_output: + yield ResponseReasoningSummaryTextDeltaEvent( + delta=reasoning_content, + item_id=FAKE_RESPONSES_ID, + output_index=0, + summary_index=0, + type="response.reasoning_summary_text.delta", + sequence_number=sequence_number.get_and_increment(), + ) + + # Create a new summary with updated text + current_summary = state.reasoning_content_index_and_output[1].summary[0] + updated_text = current_summary.text + reasoning_content + new_summary = Summary(text=updated_text, type="summary_text") + state.reasoning_content_index_and_output[1].summary[0] = new_summary + + # Handle regular content + if delta.content is not None: if not state.text_content_index_and_output: - # Initialize a content tracker for streaming text + content_index = 0 + if state.reasoning_content_index_and_output: + content_index += 1 + if state.refusal_content_index_and_output: + content_index += 1 + state.text_content_index_and_output = ( - 0 if not state.refusal_content_index_and_output else 1, + content_index, ResponseOutputText( text="", type="output_text", @@ -98,14 +168,16 @@ async def handle_stream( # Notify consumers of the start of a new output message + first content part yield ResponseOutputItemAddedEvent( item=assistant_item, - output_index=0, + output_index=state.reasoning_content_index_and_output + is not None, # fixed 0 -> 0 or 1 type="response.output_item.added", sequence_number=sequence_number.get_and_increment(), ) yield ResponseContentPartAddedEvent( content_index=state.text_content_index_and_output[0], item_id=FAKE_RESPONSES_ID, - output_index=0, + output_index=state.reasoning_content_index_and_output + is not None, # fixed 0 -> 0 or 1 part=ResponseOutputText( text="", type="output_text", @@ -119,7 +191,8 @@ async def handle_stream( content_index=state.text_content_index_and_output[0], delta=delta.content, item_id=FAKE_RESPONSES_ID, - output_index=0, + output_index=state.reasoning_content_index_and_output + is not None, # fixed 0 -> 0 or 1 type="response.output_text.delta", sequence_number=sequence_number.get_and_increment(), ) @@ -130,9 +203,14 @@ async def handle_stream( # This is always set by the OpenAI API, but not by others e.g. LiteLLM if hasattr(delta, "refusal") and delta.refusal: if not state.refusal_content_index_and_output: - # Initialize a content tracker for streaming refusal text + refusal_index = 0 + if state.reasoning_content_index_and_output: + refusal_index += 1 + if state.text_content_index_and_output: + refusal_index += 1 + state.refusal_content_index_and_output = ( - 0 if not state.text_content_index_and_output else 1, + refusal_index, ResponseOutputRefusal(refusal="", type="refusal"), ) # Start a new assistant message if one doesn't exist yet (in-progress) @@ -146,14 +224,16 @@ async def handle_stream( # Notify downstream that assistant message + first content part are starting yield ResponseOutputItemAddedEvent( item=assistant_item, - output_index=0, + output_index=state.reasoning_content_index_and_output + is not None, # fixed 0 -> 0 or 1 type="response.output_item.added", sequence_number=sequence_number.get_and_increment(), ) yield ResponseContentPartAddedEvent( content_index=state.refusal_content_index_and_output[0], item_id=FAKE_RESPONSES_ID, - output_index=0, + output_index=state.reasoning_content_index_and_output + is not None, # fixed 0 -> 0 or 1 part=ResponseOutputText( text="", type="output_text", @@ -167,7 +247,8 @@ async def handle_stream( content_index=state.refusal_content_index_and_output[0], delta=delta.refusal, item_id=FAKE_RESPONSES_ID, - output_index=0, + output_index=state.reasoning_content_index_and_output + is not None, # fixed 0 -> 0 or 1 type="response.refusal.delta", sequence_number=sequence_number.get_and_increment(), ) @@ -197,14 +278,37 @@ async def handle_stream( ) or "" state.function_calls[tc_delta.index].call_id += tc_delta.id or "" + if state.reasoning_content_index_and_output: + yield ResponseReasoningSummaryPartDoneEvent( + item_id=FAKE_RESPONSES_ID, + output_index=0, + summary_index=0, + part=DoneEventPart( + text=state.reasoning_content_index_and_output[1].summary[0].text, + type="summary_text", + ), + type="response.reasoning_summary_part.done", + sequence_number=sequence_number.get_and_increment(), + ) + yield ResponseOutputItemDoneEvent( + item=state.reasoning_content_index_and_output[1], + output_index=0, + type="response.output_item.done", + sequence_number=sequence_number.get_and_increment(), + ) + function_call_starting_index = 0 + if state.reasoning_content_index_and_output: + function_call_starting_index += 1 + if state.text_content_index_and_output: function_call_starting_index += 1 # Send end event for this content part yield ResponseContentPartDoneEvent( content_index=state.text_content_index_and_output[0], item_id=FAKE_RESPONSES_ID, - output_index=0, + output_index=state.reasoning_content_index_and_output + is not None, # fixed 0 -> 0 or 1 part=state.text_content_index_and_output[1], type="response.content_part.done", sequence_number=sequence_number.get_and_increment(), @@ -216,7 +320,8 @@ async def handle_stream( yield ResponseContentPartDoneEvent( content_index=state.refusal_content_index_and_output[0], item_id=FAKE_RESPONSES_ID, - output_index=0, + output_index=state.reasoning_content_index_and_output + is not None, # fixed 0 -> 0 or 1 part=state.refusal_content_index_and_output[1], type="response.content_part.done", sequence_number=sequence_number.get_and_increment(), @@ -261,6 +366,12 @@ async def handle_stream( # Finally, send the Response completed event outputs: list[ResponseOutputItem] = [] + + # include Reasoning item if it exists + if state.reasoning_content_index_and_output: + outputs.append(state.reasoning_content_index_and_output[1]) + + # include text or refusal content if they exist if state.text_content_index_and_output or state.refusal_content_index_and_output: assistant_msg = ResponseOutputMessage( id=FAKE_RESPONSES_ID, @@ -278,7 +389,8 @@ async def handle_stream( # send a ResponseOutputItemDone for the assistant message yield ResponseOutputItemDoneEvent( item=assistant_msg, - output_index=0, + output_index=state.reasoning_content_index_and_output + is not None, # fixed 0 -> 0 or 1 type="response.output_item.done", sequence_number=sequence_number.get_and_increment(), ) diff --git a/tests/test_reasoning_content.py b/tests/test_reasoning_content.py new file mode 100644 index 000000000..200989bd6 --- /dev/null +++ b/tests/test_reasoning_content.py @@ -0,0 +1,235 @@ +from __future__ import annotations + +from typing import Any + +import pytest +from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessage +from openai.types.chat.chat_completion_chunk import Choice +from openai.types.completion_usage import ( + CompletionTokensDetails, + CompletionUsage, + PromptTokensDetails, +) +from openai.types.responses import ( + Response, + ResponseOutputMessage, + ResponseOutputText, + ResponseReasoningItem, +) + +from agents.model_settings import ModelSettings +from agents.models.interface import ModelTracing +from agents.models.openai_chatcompletions import OpenAIChatCompletionsModel +from agents.models.openai_provider import OpenAIProvider + + +# Define our own ChoiceDelta since the import is causing issues +class ChoiceDelta: + def __init__(self, content=None, role=None, function_call=None, tool_calls=None): + self.content = content + self.role = role + self.function_call = function_call + self.tool_calls = tool_calls + # We'll add reasoning_content attribute dynamically later + + +@pytest.mark.allow_call_model_methods +@pytest.mark.asyncio +async def test_stream_response_yields_events_for_reasoning_content(monkeypatch) -> None: + """ + Validate that when a model streams reasoning content, + `stream_response` emits the appropriate sequence of events including + `response.reasoning_summary_text.delta` events for each chunk of the reasoning content and + constructs a completed response with a `ResponseReasoningItem` part. + """ + # Simulate reasoning content coming in two pieces + chunk1 = ChatCompletionChunk( + id="chunk-id", + created=1, + model="fake", + object="chat.completion.chunk", + choices=[ + Choice( + index=0, + delta=ChoiceDelta(content=None, role=None, function_call=None, tool_calls=None), # type: ignore + ) + ], + ) + chunk1.choices[0].delta.reasoning_content = "Let me think" # type: ignore[attr-defined] + + chunk2 = ChatCompletionChunk( + id="chunk-id", + created=1, + model="fake", + object="chat.completion.chunk", + choices=[ + Choice( + index=0, + delta=ChoiceDelta(content=None, role=None, function_call=None, tool_calls=None), # type: ignore + ) + ], + ) + chunk2.choices[0].delta.reasoning_content = " about this" # type: ignore[attr-defined] + + # Then regular content in two pieces + chunk3 = ChatCompletionChunk( + id="chunk-id", + created=1, + model="fake", + object="chat.completion.chunk", + choices=[ + Choice( + index=0, + delta=ChoiceDelta( + content="The answer", role=None, function_call=None, tool_calls=None # type: ignore + ), + ) + ], + ) + + chunk4 = ChatCompletionChunk( + id="chunk-id", + created=1, + model="fake", + object="chat.completion.chunk", + choices=[ + Choice( + index=0, + delta=ChoiceDelta(content=" is 42", role=None, function_call=None, tool_calls=None), # type: ignore + ) + ], + usage=CompletionUsage( + completion_tokens=4, + prompt_tokens=2, + total_tokens=6, + completion_tokens_details=CompletionTokensDetails(reasoning_tokens=2), + prompt_tokens_details=PromptTokensDetails(cached_tokens=0), + ), + ) + + async def fake_stream(): + for c in (chunk1, chunk2, chunk3, chunk4): + yield c + + async def patched_fetch_response(self, *args, **kwargs): + resp = Response( + id="resp-id", + created_at=0, + model="fake-model", + object="response", + output=[], + tool_choice="none", + tools=[], + parallel_tool_calls=False, + ) + return resp, fake_stream() + + monkeypatch.setattr(OpenAIChatCompletionsModel, "_fetch_response", patched_fetch_response) + model = OpenAIProvider(use_responses=False).get_model("gpt-4") + output_events = [] + async for event in model.stream_response( + system_instructions=None, + input="", + model_settings=ModelSettings(), + tools=[], + output_schema=None, + handoffs=[], + tracing=ModelTracing.DISABLED, + previous_response_id=None, + ): + output_events.append(event) + + # Verify reasoning content events were emitted + reasoning_delta_events = [ + e for e in output_events if e.type == "response.reasoning_summary_text.delta" + ] + assert len(reasoning_delta_events) == 2 + assert reasoning_delta_events[0].delta == "Let me think" + assert reasoning_delta_events[1].delta == " about this" + + # Verify regular content events were emitted + content_delta_events = [e for e in output_events if e.type == "response.output_text.delta"] + assert len(content_delta_events) == 2 + assert content_delta_events[0].delta == "The answer" + assert content_delta_events[1].delta == " is 42" + + # Verify the final response contains both types of content + response_event = output_events[-1] + assert response_event.type == "response.completed" + assert len(response_event.response.output) == 2 + + # First item should be reasoning + assert isinstance(response_event.response.output[0], ResponseReasoningItem) + assert response_event.response.output[0].summary[0].text == "Let me think about this" + + # Second item should be message with text + assert isinstance(response_event.response.output[1], ResponseOutputMessage) + assert isinstance(response_event.response.output[1].content[0], ResponseOutputText) + assert response_event.response.output[1].content[0].text == "The answer is 42" + + +@pytest.mark.allow_call_model_methods +@pytest.mark.asyncio +async def test_get_response_with_reasoning_content(monkeypatch) -> None: + """ + Test that when a model returns reasoning content in addition to regular content, + `get_response` properly includes both in the response output. + """ + # Create a mock completion with reasoning content + msg = ChatCompletionMessage( + role="assistant", + content="The answer is 42", + ) + # Add reasoning_content attribute dynamically + msg.reasoning_content = "Let me think about this question carefully" # type: ignore[attr-defined] + + # Using a dict directly to avoid type errors + mock_choice: dict[str, Any] = { + "index": 0, + "finish_reason": "stop", + "message": msg, + "delta": None + } + + chat = ChatCompletion( + id="resp-id", + created=0, + model="fake", + object="chat.completion", + choices=[mock_choice], # type: ignore[list-item] + usage=CompletionUsage( + completion_tokens=10, + prompt_tokens=5, + total_tokens=15, + completion_tokens_details=CompletionTokensDetails(reasoning_tokens=6), + prompt_tokens_details=PromptTokensDetails(cached_tokens=0), + ), + ) + + async def patched_fetch_response(self, *args, **kwargs): + return chat + + monkeypatch.setattr(OpenAIChatCompletionsModel, "_fetch_response", patched_fetch_response) + model = OpenAIProvider(use_responses=False).get_model("gpt-4") + resp = await model.get_response( + system_instructions=None, + input="", + model_settings=ModelSettings(), + tools=[], + output_schema=None, + handoffs=[], + tracing=ModelTracing.DISABLED, + previous_response_id=None, + ) + + # Should have produced a reasoning item and a message with text content + assert len(resp.output) == 2 + + # First output should be the reasoning item + assert isinstance(resp.output[0], ResponseReasoningItem) + assert resp.output[0].summary[0].text == "Let me think about this question carefully" + + # Second output should be the message with text content + assert isinstance(resp.output[1], ResponseOutputMessage) + assert isinstance(resp.output[1].content[0], ResponseOutputText) + assert resp.output[1].content[0].text == "The answer is 42"