From a11836e7ad479f91ab49d476742d0f701e0d308d Mon Sep 17 00:00:00 2001
From: Stefano Amorelli <stefano@amorelli.tech>
Date: Sun, 25 May 2025 18:34:14 +0300
Subject: [PATCH] feat(summarizing_conversation_manager): implement
 summarization strategy

---
 src/strands/agent/__init__.py                 |   8 +-
 .../agent/conversation_manager/__init__.py    |  10 +-
 .../summarizing_conversation_manager.py       | 222 +++++++
 ...rizing_conversation_manager_integration.py | 374 ++++++++++++
 .../test_summarizing_conversation_manager.py  | 566 ++++++++++++++++++
 5 files changed, 1178 insertions(+), 2 deletions(-)
 create mode 100644 src/strands/agent/conversation_manager/summarizing_conversation_manager.py
 create mode 100644 tests-integ/test_summarizing_conversation_manager_integration.py
 create mode 100644 tests/strands/agent/test_summarizing_conversation_manager.py

diff --git a/src/strands/agent/__init__.py b/src/strands/agent/__init__.py
index 4d2fa1fe5..6618d3328 100644
--- a/src/strands/agent/__init__.py
+++ b/src/strands/agent/__init__.py
@@ -8,7 +8,12 @@
 
 from .agent import Agent
 from .agent_result import AgentResult
-from .conversation_manager import ConversationManager, NullConversationManager, SlidingWindowConversationManager
+from .conversation_manager import (
+    ConversationManager,
+    NullConversationManager,
+    SlidingWindowConversationManager,
+    SummarizingConversationManager,
+)
 
 __all__ = [
     "Agent",
@@ -16,4 +21,5 @@
     "ConversationManager",
     "NullConversationManager",
     "SlidingWindowConversationManager",
+    "SummarizingConversationManager",
 ]
diff --git a/src/strands/agent/conversation_manager/__init__.py b/src/strands/agent/conversation_manager/__init__.py
index 685418775..c59623215 100644
--- a/src/strands/agent/conversation_manager/__init__.py
+++ b/src/strands/agent/conversation_manager/__init__.py
@@ -6,6 +6,8 @@
 - NullConversationManager: A no-op implementation that does not modify conversation history
 - SlidingWindowConversationManager: An implementation that maintains a sliding window of messages to control context
   size while preserving conversation coherence
+- SummarizingConversationManager: An implementation that summarizes older context instead
+  of simply trimming it
 
 Conversation managers help control memory usage and context length while maintaining relevant conversation state, which
 is critical for effective agent interactions.
@@ -14,5 +16,11 @@
 from .conversation_manager import ConversationManager
 from .null_conversation_manager import NullConversationManager
 from .sliding_window_conversation_manager import SlidingWindowConversationManager
+from .summarizing_conversation_manager import SummarizingConversationManager
 
-__all__ = ["ConversationManager", "NullConversationManager", "SlidingWindowConversationManager"]
+__all__ = [
+    "ConversationManager",
+    "NullConversationManager",
+    "SlidingWindowConversationManager",
+    "SummarizingConversationManager",
+]
diff --git a/src/strands/agent/conversation_manager/summarizing_conversation_manager.py b/src/strands/agent/conversation_manager/summarizing_conversation_manager.py
new file mode 100644
index 000000000..a6b112dd6
--- /dev/null
+++ b/src/strands/agent/conversation_manager/summarizing_conversation_manager.py
@@ -0,0 +1,222 @@
+"""Summarizing conversation history management with configurable options."""
+
+import logging
+from typing import TYPE_CHECKING, List, Optional
+
+from ...types.content import Message
+from ...types.exceptions import ContextWindowOverflowException
+from .conversation_manager import ConversationManager
+
+if TYPE_CHECKING:
+    from ..agent import Agent
+
+
+logger = logging.getLogger(__name__)
+
+
+DEFAULT_SUMMARIZATION_PROMPT = """You are a conversation summarizer. Provide a concise summary of the conversation \
+history.
+
+Format Requirements:
+- You MUST create a structured and concise summary in bullet-point format.
+- You MUST NOT respond conversationally.
+- You MUST NOT address the user directly.
+
+Task:
+Your task is to create a structured summary document:
+- It MUST contain bullet points with key topics and questions covered
+- It MUST contain bullet points for all significant tools executed and their results
+- It MUST contain bullet points for any code or technical information shared
+- It MUST contain a section of key insights gained
+- It MUST format the summary in the third person
+
+Example format:
+
+## Conversation Summary
+* Topic 1: Key information
+* Topic 2: Key information
+*
+## Tools Executed
+* Tool X: Result Y"""
+
+
+class SummarizingConversationManager(ConversationManager):
+    """Implements a summarizing window manager.
+
+    This manager provides a configurable option to summarize older context instead of
+    simply trimming it, helping preserve important information while staying within
+    context limits.
+    """
+
+    def __init__(
+        self,
+        summary_ratio: float = 0.3,
+        preserve_recent_messages: int = 10,
+        summarization_agent: Optional["Agent"] = None,
+        summarization_system_prompt: Optional[str] = None,
+    ):
+        """Initialize the summarizing conversation manager.
+
+        Args:
+            summary_ratio: Ratio of messages to summarize vs keep when context overflow occurs.
+                Value between 0.1 and 0.8. Defaults to 0.3 (summarize 30% of oldest messages).
+            preserve_recent_messages: Minimum number of recent messages to always keep.
+                Defaults to 10 messages.
+            summarization_agent: Optional agent to use for summarization instead of the parent agent.
+                If provided, this agent can use tools as part of the summarization process.
+            summarization_system_prompt: Optional system prompt override for summarization.
+                If None, uses the default summarization prompt.
+        """
+        if summarization_agent is not None and summarization_system_prompt is not None:
+            raise ValueError(
+                "Cannot provide both summarization_agent and summarization_system_prompt. "
+                "Agents come with their own system prompt."
+            )
+
+        self.summary_ratio = max(0.1, min(0.8, summary_ratio))
+        self.preserve_recent_messages = preserve_recent_messages
+        self.summarization_agent = summarization_agent
+        self.summarization_system_prompt = summarization_system_prompt
+
+    def apply_management(self, agent: "Agent") -> None:
+        """Apply management strategy to conversation history.
+
+        For the summarizing conversation manager, no proactive management is performed.
+        Summarization only occurs when there's a context overflow that triggers reduce_context.
+
+        Args:
+            agent: The agent whose conversation history will be managed.
+                The agent's messages list is modified in-place.
+        """
+        # No proactive management - summarization only happens on context overflow
+        pass
+
+    def reduce_context(self, agent: "Agent", e: Optional[Exception] = None) -> None:
+        """Reduce context using summarization.
+
+        Args:
+            agent: The agent whose conversation history will be reduced.
+                The agent's messages list is modified in-place.
+            e: The exception that triggered the context reduction, if any.
+
+        Raises:
+            ContextWindowOverflowException: If the context cannot be summarized.
+        """
+        try:
+            # Calculate how many messages to summarize
+            messages_to_summarize_count = max(1, int(len(agent.messages) * self.summary_ratio))
+
+            # Ensure we don't summarize recent messages
+            messages_to_summarize_count = min(
+                messages_to_summarize_count, len(agent.messages) - self.preserve_recent_messages
+            )
+
+            if messages_to_summarize_count <= 0:
+                raise ContextWindowOverflowException("Cannot summarize: insufficient messages for summarization")
+
+            # Adjust split point to avoid breaking ToolUse/ToolResult pairs
+            messages_to_summarize_count = self._adjust_split_point_for_tool_pairs(
+                agent.messages, messages_to_summarize_count
+            )
+
+            if messages_to_summarize_count <= 0:
+                raise ContextWindowOverflowException("Cannot summarize: insufficient messages for summarization")
+
+            # Extract messages to summarize
+            messages_to_summarize = agent.messages[:messages_to_summarize_count]
+            remaining_messages = agent.messages[messages_to_summarize_count:]
+
+            # Generate summary
+            summary_message = self._generate_summary(messages_to_summarize, agent)
+
+            # Replace the summarized messages with the summary
+            agent.messages[:] = [summary_message] + remaining_messages
+
+        except Exception as summarization_error:
+            logger.error("Summarization failed: %s", summarization_error)
+            raise summarization_error from e
+
+    def _generate_summary(self, messages: List[Message], agent: "Agent") -> Message:
+        """Generate a summary of the provided messages.
+
+        Args:
+            messages: The messages to summarize.
+            agent: The agent instance to use for summarization.
+
+        Returns:
+            A message containing the conversation summary.
+
+        Raises:
+            Exception: If summary generation fails.
+        """
+        # Choose which agent to use for summarization
+        summarization_agent = self.summarization_agent if self.summarization_agent is not None else agent
+
+        # Save original system prompt and messages to restore later
+        original_system_prompt = summarization_agent.system_prompt
+        original_messages = summarization_agent.messages.copy()
+
+        try:
+            # Only override system prompt if no agent was provided during initialization
+            if self.summarization_agent is None:
+                # Use custom system prompt if provided, otherwise use default
+                system_prompt = (
+                    self.summarization_system_prompt
+                    if self.summarization_system_prompt is not None
+                    else DEFAULT_SUMMARIZATION_PROMPT
+                )
+                # Temporarily set the system prompt for summarization
+                summarization_agent.system_prompt = system_prompt
+            summarization_agent.messages = messages
+
+            # Use the agent to generate summary with rich content (can use tools if needed)
+            result = summarization_agent("Please summarize this conversation.")
+
+            return result.message
+
+        finally:
+            # Restore original agent state
+            summarization_agent.system_prompt = original_system_prompt
+            summarization_agent.messages = original_messages
+
+    def _adjust_split_point_for_tool_pairs(self, messages: List[Message], split_point: int) -> int:
+        """Adjust the split point to avoid breaking ToolUse/ToolResult pairs.
+
+        Uses the same logic as SlidingWindowConversationManager for consistency.
+
+        Args:
+            messages: The full list of messages.
+            split_point: The initially calculated split point.
+
+        Returns:
+            The adjusted split point that doesn't break ToolUse/ToolResult pairs.
+
+        Raises:
+            ContextWindowOverflowException: If no valid split point can be found.
+        """
+        if split_point > len(messages):
+            raise ContextWindowOverflowException("Split point exceeds message array length")
+
+        if split_point == len(messages):
+            return split_point
+
+        # Find the next valid split_point
+        while split_point < len(messages):
+            if (
+                # Oldest message cannot be a toolResult because it needs a toolUse preceding it
+                any("toolResult" in content for content in messages[split_point]["content"])
+                or (
+                    # Oldest message can be a toolUse only if a toolResult immediately follows it.
+                    any("toolUse" in content for content in messages[split_point]["content"])
+                    and split_point + 1 < len(messages)
+                    and not any("toolResult" in content for content in messages[split_point + 1]["content"])
+                )
+            ):
+                split_point += 1
+            else:
+                break
+        else:
+            # If we didn't find a valid split_point, then we throw
+            raise ContextWindowOverflowException("Unable to trim conversation context!")
+
+        return split_point
diff --git a/tests-integ/test_summarizing_conversation_manager_integration.py b/tests-integ/test_summarizing_conversation_manager_integration.py
new file mode 100644
index 000000000..5dcf49443
--- /dev/null
+++ b/tests-integ/test_summarizing_conversation_manager_integration.py
@@ -0,0 +1,374 @@
+"""Integration tests for SummarizingConversationManager with actual AI models.
+
+These tests validate the end-to-end functionality of the SummarizingConversationManager
+by testing with real AI models and API calls. They ensure that:
+
+1. **Real summarization** - Tests that actual model-generated summaries work correctly
+2. **Context overflow handling** - Validates real context overflow scenarios and recovery
+3. **Tool preservation** - Ensures ToolUse/ToolResult pairs survive real summarization
+4. **Message structure** - Verifies real model outputs maintain proper message structure
+5. **Agent integration** - Tests that conversation managers work with real Agent workflows
+
+These tests require API keys (`ANTHROPIC_API_KEY`) and make real API calls, so they should be run sparingly
+and may be skipped in CI environments without proper credentials.
+"""
+
+import os
+
+import pytest
+
+import strands
+from strands import Agent
+from strands.agent.conversation_manager import SummarizingConversationManager
+from strands.models.anthropic import AnthropicModel
+
+
+@pytest.fixture
+def model():
+    """Real Anthropic model for integration testing."""
+    return AnthropicModel(
+        client_args={
+            "api_key": os.getenv("ANTHROPIC_API_KEY"),
+        },
+        model_id="claude-3-haiku-20240307",  # Using Haiku for faster/cheaper tests
+        max_tokens=1024,
+    )
+
+
+@pytest.fixture
+def summarization_model():
+    """Separate model instance for summarization to test dedicated agent functionality."""
+    return AnthropicModel(
+        client_args={
+            "api_key": os.getenv("ANTHROPIC_API_KEY"),
+        },
+        model_id="claude-3-haiku-20240307",
+        max_tokens=512,
+    )
+
+
+@pytest.fixture
+def tools():
+    """Real tools for testing tool preservation during summarization."""
+
+    @strands.tool
+    def get_current_time() -> str:
+        """Get the current time."""
+        return "2024-01-15 14:30:00"
+
+    @strands.tool
+    def get_weather(city: str) -> str:
+        """Get weather information for a city."""
+        return f"The weather in {city} is sunny and 72°F"
+
+    @strands.tool
+    def calculate_sum(a: int, b: int) -> int:
+        """Calculate the sum of two numbers."""
+        return a + b
+
+    return [get_current_time, get_weather, calculate_sum]
+
+
+@pytest.mark.skipif("ANTHROPIC_API_KEY" not in os.environ, reason="ANTHROPIC_API_KEY environment variable missing")
+def test_summarization_with_context_overflow(model):
+    """Test that summarization works when context overflow occurs."""
+    # Mock conversation data to avoid API calls
+    greeting_response = """
+    Hello! I'm here to help you test your conversation manager. What specifically would you like 
+    me to do as part of this test? I can respond to different types of prompts, maintain context 
+    throughout our conversation, or demonstrate other capabilities of the AI assistant. Just let 
+    me know what aspects you'd like to evaluate.
+    """.strip()
+
+    computer_history_response = """
+    # History of Computers
+
+    The history of computers spans many centuries, evolving from simple calculating tools to 
+    the powerful machines we use today.
+
+    ## Early Computing Devices
+    - **Ancient abacus** (3000 BCE): One of the earliest computing devices used for arithmetic calculations
+    - **Pascaline** (1642): Mechanical calculator invented by Blaise Pascal
+    - **Difference Engine** (1822): Designed by Charles Babbage to compute polynomial functions
+    - **Analytical Engine**: Babbage's more ambitious design, considered the first general-purpose computer concept
+    - **Hollerith's Tabulating Machine** (1890s): Used punch cards to process data for the US Census
+
+    ## Early Electronic Computers
+    - **ENIAC** (1945): First general-purpose electronic computer, weighed 30 tons
+    - **EDVAC** (1949): Introduced the stored program concept
+    - **UNIVAC I** (1951): First commercial computer in the United States
+    """.strip()
+
+    first_computers_response = """
+    # The First Computers
+
+    Early computers were dramatically different from today's machines in almost every aspect:
+
+    ## Physical Characteristics
+    - **Enormous size**: Room-filling or even building-filling machines
+    - **ENIAC** (1945) weighed about 30 tons, occupied 1,800 square feet
+    - Consisted of large metal frames or cabinets filled with components
+    - Required special cooling systems due to excessive heat generation
+
+    ## Technology and Components
+    - **Vacuum tubes**: Thousands of fragile glass tubes served as switches and amplifiers
+    - ENIAC contained over 17,000 vacuum tubes
+    - Generated tremendous heat and frequently failed
+    - **Memory**: Limited storage using delay lines, cathode ray tubes, or magnetic drums
+    """.strip()
+
+    messages = [
+        {"role": "user", "content": [{"text": "Hello, I'm testing a conversation manager."}]},
+        {"role": "assistant", "content": [{"text": greeting_response}]},
+        {"role": "user", "content": [{"text": "Can you tell me about the history of computers?"}]},
+        {"role": "assistant", "content": [{"text": computer_history_response}]},
+        {"role": "user", "content": [{"text": "What were the first computers like?"}]},
+        {"role": "assistant", "content": [{"text": first_computers_response}]},
+    ]
+
+    # Create agent with very aggressive summarization settings and pre-built conversation
+    agent = Agent(
+        model=model,
+        conversation_manager=SummarizingConversationManager(
+            summary_ratio=0.5,  # Summarize 50% of messages
+            preserve_recent_messages=2,  # Keep only 2 recent messages
+        ),
+        load_tools_from_directory=False,
+        messages=messages,
+    )
+
+    # Should have the pre-built conversation history
+    initial_message_count = len(agent.messages)
+    assert initial_message_count == 6  # 3 user + 3 assistant messages
+
+    # Store the last 2 messages before summarization to verify they're preserved
+    messages_before_summary = agent.messages[-2:].copy()
+
+    # Now manually trigger context reduction to test summarization
+    agent.conversation_manager.reduce_context(agent)
+
+    # Verify summarization occurred
+    assert len(agent.messages) < initial_message_count
+    # Should have: 1 summary + remaining messages
+    # With 6 messages, summary_ratio=0.5, preserve_recent_messages=2:
+    # messages_to_summarize = min(6 * 0.5, 6 - 2) = min(3, 4) = 3
+    # So we summarize 3 messages, leaving 3 remaining + 1 summary = 4 total
+    expected_total_messages = 4
+    assert len(agent.messages) == expected_total_messages
+
+    # First message should be the summary (assistant message)
+    summary_message = agent.messages[0]
+    assert summary_message["role"] == "assistant"
+    assert len(summary_message["content"]) > 0
+
+    # Verify the summary contains actual text content
+    summary_content = None
+    for content_block in summary_message["content"]:
+        if "text" in content_block:
+            summary_content = content_block["text"]
+            break
+
+    assert summary_content is not None
+    assert len(summary_content) > 50  # Should be a substantial summary
+
+    # Recent messages should be preserved - verify they're exactly the same
+    recent_messages = agent.messages[-2:]  # Last 2 messages should be preserved
+    assert len(recent_messages) == 2
+    assert recent_messages == messages_before_summary, "The last 2 messages should be preserved exactly as they were"
+
+    # Agent should still be functional after summarization
+    post_summary_result = agent("That's very interesting, thank you!")
+    assert post_summary_result.message["role"] == "assistant"
+
+
+@pytest.mark.skipif("ANTHROPIC_API_KEY" not in os.environ, reason="ANTHROPIC_API_KEY environment variable missing")
+def test_tool_preservation_during_summarization(model, tools):
+    """Test that ToolUse/ToolResult pairs are preserved during summarization."""
+    agent = Agent(
+        model=model,
+        tools=tools,
+        conversation_manager=SummarizingConversationManager(
+            summary_ratio=0.6,  # Aggressive summarization
+            preserve_recent_messages=3,
+        ),
+        load_tools_from_directory=False,
+    )
+
+    # Mock conversation with tool usage to avoid API calls and speed up tests
+    greeting_text = """
+    Hello! I'd be happy to help you with calculations. I have access to tools that can 
+    help with math, time, and weather information. What would you like me to calculate for you?
+    """.strip()
+
+    weather_response = "The weather in San Francisco is sunny and 72°F. Perfect weather for being outside!"
+
+    tool_conversation_data = [
+        # Initial greeting exchange
+        {"role": "user", "content": [{"text": "Hello, can you help me with some calculations?"}]},
+        {"role": "assistant", "content": [{"text": greeting_text}]},
+        # Time query with tool use/result pair
+        {"role": "user", "content": [{"text": "What's the current time?"}]},
+        {
+            "role": "assistant",
+            "content": [{"toolUse": {"toolUseId": "time_001", "name": "get_current_time", "input": {}}}],
+        },
+        {
+            "role": "user",
+            "content": [
+                {
+                    "toolResult": {
+                        "toolUseId": "time_001",
+                        "content": [{"text": "2024-01-15 14:30:00"}],
+                        "status": "success",
+                    }
+                }
+            ],
+        },
+        {"role": "assistant", "content": [{"text": "The current time is 2024-01-15 14:30:00."}]},
+        # Math calculation with tool use/result pair
+        {"role": "user", "content": [{"text": "What's 25 + 37?"}]},
+        {
+            "role": "assistant",
+            "content": [{"toolUse": {"toolUseId": "calc_001", "name": "calculate_sum", "input": {"a": 25, "b": 37}}}],
+        },
+        {
+            "role": "user",
+            "content": [{"toolResult": {"toolUseId": "calc_001", "content": [{"text": "62"}], "status": "success"}}],
+        },
+        {"role": "assistant", "content": [{"text": "25 + 37 = 62"}]},
+        # Weather query with tool use/result pair
+        {"role": "user", "content": [{"text": "What's the weather like in San Francisco?"}]},
+        {
+            "role": "assistant",
+            "content": [
+                {"toolUse": {"toolUseId": "weather_001", "name": "get_weather", "input": {"city": "San Francisco"}}}
+            ],
+        },
+        {
+            "role": "user",
+            "content": [
+                {
+                    "toolResult": {
+                        "toolUseId": "weather_001",
+                        "content": [{"text": "The weather in San Francisco is sunny and 72°F"}],
+                        "status": "success",
+                    }
+                }
+            ],
+        },
+        {"role": "assistant", "content": [{"text": weather_response}]},
+    ]
+
+    # Add all the mocked conversation messages to avoid real API calls
+    agent.messages.extend(tool_conversation_data)
+
+    # Force summarization
+    agent.conversation_manager.reduce_context(agent)
+
+    # Verify tool pairs are still balanced after summarization
+    post_summary_tool_use_count = 0
+    post_summary_tool_result_count = 0
+
+    for message in agent.messages:
+        for content in message.get("content", []):
+            if "toolUse" in content:
+                post_summary_tool_use_count += 1
+            if "toolResult" in content:
+                post_summary_tool_result_count += 1
+
+    # Tool uses and results should be balanced (no orphaned tools)
+    assert post_summary_tool_use_count == post_summary_tool_result_count, (
+        "Tool use and tool result counts should be balanced after summarization"
+    )
+
+    # Agent should still be able to use tools after summarization
+    agent("Calculate 15 + 28 for me.")
+
+    # Should have triggered the calculate_sum tool
+    found_calculation = False
+    for message in agent.messages[-2:]:  # Check recent messages
+        for content in message.get("content", []):
+            if "toolResult" in content and "43" in str(content):  # 15 + 28 = 43
+                found_calculation = True
+                break
+
+    assert found_calculation, "Tool should still work after summarization"
+
+
+@pytest.mark.skipif("ANTHROPIC_API_KEY" not in os.environ, reason="ANTHROPIC_API_KEY environment variable missing")
+def test_dedicated_summarization_agent(model, summarization_model):
+    """Test that a dedicated summarization agent works correctly."""
+    # Create a dedicated summarization agent
+    summarization_agent = Agent(
+        model=summarization_model,
+        system_prompt="You are a conversation summarizer. Create concise, structured summaries.",
+        load_tools_from_directory=False,
+    )
+
+    # Create main agent with dedicated summarization agent
+    agent = Agent(
+        model=model,
+        conversation_manager=SummarizingConversationManager(
+            summary_ratio=0.5,
+            preserve_recent_messages=2,
+            summarization_agent=summarization_agent,
+        ),
+        load_tools_from_directory=False,
+    )
+
+    # Mock conversation data for space exploration topic
+    space_intro_response = """
+    Space exploration has been one of humanity's greatest achievements, beginning with early 
+    satellite launches in the 1950s and progressing to human spaceflight, moon landings, and now 
+    commercial space ventures.
+    """.strip()
+
+    space_milestones_response = """
+    Key milestones include Sputnik 1 (1957), Yuri Gagarin's first human spaceflight (1961), 
+    the Apollo 11 moon landing (1969), the Space Shuttle program, and the International Space 
+    Station construction.
+    """.strip()
+
+    apollo_missions_response = """
+    The Apollo program was NASA's lunar exploration program from 1961-1975. Apollo 11 achieved 
+    the first moon landing in 1969 with Neil Armstrong and Buzz Aldrin, followed by five more 
+    successful lunar missions through Apollo 17.
+    """.strip()
+
+    spacex_response = """
+    SpaceX has revolutionized space travel with reusable rockets, reducing launch costs dramatically. 
+    They've achieved crew transportation to the ISS, satellite deployments, and are developing 
+    Starship for Mars missions.
+    """.strip()
+
+    conversation_pairs = [
+        ("I'm interested in learning about space exploration.", space_intro_response),
+        ("What were the key milestones in space exploration?", space_milestones_response),
+        ("Tell me about the Apollo missions.", apollo_missions_response),
+        ("What about modern space exploration with SpaceX?", spacex_response),
+    ]
+
+    # Manually build the conversation history to avoid real API calls
+    for user_input, assistant_response in conversation_pairs:
+        agent.messages.append({"role": "user", "content": [{"text": user_input}]})
+        agent.messages.append({"role": "assistant", "content": [{"text": assistant_response}]})
+
+    # Force summarization
+    original_length = len(agent.messages)
+    agent.conversation_manager.reduce_context(agent)
+
+    # Verify summarization occurred
+    assert len(agent.messages) < original_length
+
+    # Get the summary message
+    summary_message = agent.messages[0]
+    assert summary_message["role"] == "assistant"
+
+    # Extract summary text
+    summary_text = None
+    for content in summary_message["content"]:
+        if "text" in content:
+            summary_text = content["text"]
+            break
+
+    assert summary_text
diff --git a/tests/strands/agent/test_summarizing_conversation_manager.py b/tests/strands/agent/test_summarizing_conversation_manager.py
new file mode 100644
index 000000000..9952203e7
--- /dev/null
+++ b/tests/strands/agent/test_summarizing_conversation_manager.py
@@ -0,0 +1,566 @@
+from typing import TYPE_CHECKING, cast
+from unittest.mock import Mock, patch
+
+import pytest
+
+from strands.agent.conversation_manager.summarizing_conversation_manager import SummarizingConversationManager
+from strands.types.content import Messages
+from strands.types.exceptions import ContextWindowOverflowException
+
+if TYPE_CHECKING:
+    from strands.agent.agent import Agent
+
+
+class MockAgent:
+    """Mock agent for testing summarization."""
+
+    def __init__(self, summary_response="This is a summary of the conversation."):
+        self.summary_response = summary_response
+        self.system_prompt = None
+        self.messages = []
+        self.model = Mock()
+        self.call_tracker = Mock()
+
+    def __call__(self, prompt):
+        """Mock agent call that returns a summary."""
+        self.call_tracker(prompt)
+        result = Mock()
+        result.message = {"role": "assistant", "content": [{"text": self.summary_response}]}
+        return result
+
+
+def create_mock_agent(summary_response="This is a summary of the conversation.") -> "Agent":
+    """Factory function that returns a properly typed MockAgent."""
+    return cast("Agent", MockAgent(summary_response))
+
+
+@pytest.fixture
+def mock_agent():
+    """Fixture for mock agent."""
+    return create_mock_agent()
+
+
+@pytest.fixture
+def summarizing_manager():
+    """Fixture for summarizing conversation manager with default settings."""
+    return SummarizingConversationManager(
+        summary_ratio=0.5,
+        preserve_recent_messages=2,
+    )
+
+
+def test_init_default_values():
+    """Test initialization with default values."""
+    manager = SummarizingConversationManager()
+
+    assert manager.summarization_agent is None
+    assert manager.summary_ratio == 0.3
+    assert manager.preserve_recent_messages == 10
+
+
+def test_init_clamps_summary_ratio():
+    """Test that summary_ratio is clamped to valid range."""
+    # Test lower bound
+    manager = SummarizingConversationManager(summary_ratio=0.05)
+    assert manager.summary_ratio == 0.1
+
+    # Test upper bound
+    manager = SummarizingConversationManager(summary_ratio=0.95)
+    assert manager.summary_ratio == 0.8
+
+
+def test_reduce_context_raises_when_no_agent():
+    """Test that reduce_context raises exception when agent has no messages."""
+    manager = SummarizingConversationManager()
+
+    # Create a mock agent with no messages
+    mock_agent = Mock()
+    empty_messages: Messages = []
+    mock_agent.messages = empty_messages
+
+    with pytest.raises(ContextWindowOverflowException, match="insufficient messages for summarization"):
+        manager.reduce_context(mock_agent)
+
+
+def test_reduce_context_with_summarization(summarizing_manager, mock_agent):
+    """Test reduce_context with summarization enabled."""
+    test_messages: Messages = [
+        {"role": "user", "content": [{"text": "Message 1"}]},
+        {"role": "assistant", "content": [{"text": "Response 1"}]},
+        {"role": "user", "content": [{"text": "Message 2"}]},
+        {"role": "assistant", "content": [{"text": "Response 2"}]},
+        {"role": "user", "content": [{"text": "Message 3"}]},
+        {"role": "assistant", "content": [{"text": "Response 3"}]},
+    ]
+    mock_agent.messages = test_messages
+
+    summarizing_manager.reduce_context(mock_agent)
+
+    # Should have: 1 summary message + 2 preserved recent messages + remaining from summarization
+    assert len(mock_agent.messages) == 4
+
+    # First message should be the summary
+    assert mock_agent.messages[0]["role"] == "assistant"
+    first_content = mock_agent.messages[0]["content"][0]
+    assert "text" in first_content and "This is a summary of the conversation." in first_content["text"]
+
+    # Recent messages should be preserved
+    assert "Message 3" in str(mock_agent.messages[-2]["content"])
+    assert "Response 3" in str(mock_agent.messages[-1]["content"])
+
+
+def test_reduce_context_too_few_messages_raises_exception(summarizing_manager, mock_agent):
+    """Test that reduce_context raises exception when there are too few messages to summarize effectively."""
+    # Create a scenario where calculation results in 0 messages to summarize
+    manager = SummarizingConversationManager(
+        summary_ratio=0.1,  # Very small ratio
+        preserve_recent_messages=5,  # High preservation
+    )
+
+    insufficient_test_messages: Messages = [
+        {"role": "user", "content": [{"text": "Message 1"}]},
+        {"role": "assistant", "content": [{"text": "Response 1"}]},
+        {"role": "user", "content": [{"text": "Message 2"}]},
+    ]
+    mock_agent.messages = insufficient_test_messages  # 5 messages, preserve_recent_messages=5, so nothing to summarize
+
+    with pytest.raises(ContextWindowOverflowException, match="insufficient messages for summarization"):
+        manager.reduce_context(mock_agent)
+
+
+def test_reduce_context_insufficient_messages_for_summarization(mock_agent):
+    """Test reduce_context when there aren't enough messages to summarize."""
+    manager = SummarizingConversationManager(
+        summary_ratio=0.5,
+        preserve_recent_messages=3,
+    )
+
+    insufficient_messages: Messages = [
+        {"role": "user", "content": [{"text": "Message 1"}]},
+        {"role": "assistant", "content": [{"text": "Response 1"}]},
+        {"role": "user", "content": [{"text": "Message 2"}]},
+    ]
+    mock_agent.messages = insufficient_messages
+
+    # This should raise an exception since there aren't enough messages to summarize
+    with pytest.raises(ContextWindowOverflowException, match="insufficient messages for summarization"):
+        manager.reduce_context(mock_agent)
+
+
+def test_reduce_context_raises_on_summarization_failure():
+    """Test that reduce_context raises exception when summarization fails."""
+    # Create an agent that will fail
+    failing_agent = Mock()
+    failing_agent.side_effect = Exception("Agent failed")
+    failing_agent.system_prompt = None
+    failing_agent_messages: Messages = [
+        {"role": "user", "content": [{"text": "Message 1"}]},
+        {"role": "assistant", "content": [{"text": "Response 1"}]},
+        {"role": "user", "content": [{"text": "Message 2"}]},
+        {"role": "assistant", "content": [{"text": "Response 2"}]},
+    ]
+    failing_agent.messages = failing_agent_messages
+
+    manager = SummarizingConversationManager(
+        summary_ratio=0.5,
+        preserve_recent_messages=1,
+    )
+
+    with patch("strands.agent.conversation_manager.summarizing_conversation_manager.logger") as mock_logger:
+        with pytest.raises(Exception, match="Agent failed"):
+            manager.reduce_context(failing_agent)
+
+        # Should log the error
+        mock_logger.error.assert_called_once()
+
+
+def test_generate_summary(summarizing_manager, mock_agent):
+    """Test the _generate_summary method."""
+    test_messages: Messages = [
+        {"role": "user", "content": [{"text": "Hello"}]},
+        {"role": "assistant", "content": [{"text": "Hi there"}]},
+    ]
+
+    summary = summarizing_manager._generate_summary(test_messages, mock_agent)
+
+    summary_content = summary["content"][0]
+    assert "text" in summary_content and summary_content["text"] == "This is a summary of the conversation."
+
+
+def test_generate_summary_with_tool_content(summarizing_manager, mock_agent):
+    """Test summary generation with tool use and results."""
+    tool_messages: Messages = [
+        {"role": "user", "content": [{"text": "Use a tool"}]},
+        {"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "test_tool", "input": {}}}]},
+        {
+            "role": "user",
+            "content": [
+                {"toolResult": {"toolUseId": "123", "content": [{"text": "Tool output"}], "status": "success"}}
+            ],
+        },
+    ]
+
+    summary = summarizing_manager._generate_summary(tool_messages, mock_agent)
+
+    summary_content = summary["content"][0]
+    assert "text" in summary_content and summary_content["text"] == "This is a summary of the conversation."
+
+
+def test_generate_summary_raises_on_agent_failure():
+    """Test that _generate_summary raises exception when agent fails."""
+    failing_agent = Mock()
+    failing_agent.side_effect = Exception("Agent failed")
+    failing_agent.system_prompt = None
+    empty_failing_messages: Messages = []
+    failing_agent.messages = empty_failing_messages
+
+    manager = SummarizingConversationManager()
+
+    messages: Messages = [
+        {"role": "user", "content": [{"text": "Hello"}]},
+        {"role": "assistant", "content": [{"text": "Hi there"}]},
+    ]
+
+    # Should raise the exception from the agent
+    with pytest.raises(Exception, match="Agent failed"):
+        manager._generate_summary(messages, failing_agent)
+
+
+def test_adjust_split_point_for_tool_pairs(summarizing_manager):
+    """Test that the split point is adjusted to avoid breaking ToolUse/ToolResult pairs."""
+    messages: Messages = [
+        {"role": "user", "content": [{"text": "Message 1"}]},
+        {"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "test_tool", "input": {}}}]},
+        {
+            "role": "user",
+            "content": [
+                {
+                    "toolResult": {
+                        "toolUseId": "123",
+                        "content": [{"text": "Tool output"}],
+                        "status": "success",
+                    }
+                }
+            ],
+        },
+        {"role": "assistant", "content": [{"text": "Response after tool"}]},
+    ]
+
+    # If we try to split at message 2 (the ToolResult), it should move forward to message 3
+    adjusted_split = summarizing_manager._adjust_split_point_for_tool_pairs(messages, 2)
+    assert adjusted_split == 3  # Should move to after the ToolResult
+
+    # If we try to split at message 3, it should be fine (no tool issues)
+    adjusted_split = summarizing_manager._adjust_split_point_for_tool_pairs(messages, 3)
+    assert adjusted_split == 3
+
+    # If we try to split at message 1 (toolUse with following toolResult), it should be valid
+    adjusted_split = summarizing_manager._adjust_split_point_for_tool_pairs(messages, 1)
+    assert adjusted_split == 1  # Should be valid because toolResult follows
+
+
+def test_apply_management_no_op(summarizing_manager, mock_agent):
+    """Test apply_management does not modify messages (no-op behavior)."""
+    apply_test_messages: Messages = [
+        {"role": "user", "content": [{"text": "Hello"}]},
+        {"role": "assistant", "content": [{"text": "Hi"}]},
+        {"role": "user", "content": [{"text": "More messages"}]},
+        {"role": "assistant", "content": [{"text": "Even more"}]},
+    ]
+    mock_agent.messages = apply_test_messages
+    original_messages = mock_agent.messages.copy()
+
+    summarizing_manager.apply_management(mock_agent)
+
+    # Should never modify messages - summarization only happens on context overflow
+    assert mock_agent.messages == original_messages
+
+
+def test_init_with_custom_parameters():
+    """Test initialization with custom parameters."""
+    mock_agent = create_mock_agent()
+
+    manager = SummarizingConversationManager(
+        summary_ratio=0.4,
+        preserve_recent_messages=5,
+        summarization_agent=mock_agent,
+    )
+    assert manager.summary_ratio == 0.4
+    assert manager.preserve_recent_messages == 5
+    assert manager.summarization_agent == mock_agent
+    assert manager.summarization_system_prompt is None
+
+
+def test_init_with_both_agent_and_prompt_raises_error():
+    """Test that providing both agent and system prompt raises ValueError."""
+    mock_agent = create_mock_agent()
+    custom_prompt = "Custom summarization prompt"
+
+    with pytest.raises(ValueError, match="Cannot provide both summarization_agent and summarization_system_prompt"):
+        SummarizingConversationManager(
+            summarization_agent=mock_agent,
+            summarization_system_prompt=custom_prompt,
+        )
+
+
+def test_uses_summarization_agent_when_provided():
+    """Test that summarization_agent is used when provided."""
+    summary_agent = create_mock_agent("Custom summary from dedicated agent")
+    manager = SummarizingConversationManager(summarization_agent=summary_agent)
+
+    messages: Messages = [
+        {"role": "user", "content": [{"text": "Hello"}]},
+        {"role": "assistant", "content": [{"text": "Hi there"}]},
+    ]
+
+    parent_agent = create_mock_agent("Parent agent summary")
+    summary = manager._generate_summary(messages, parent_agent)
+
+    # Should use the dedicated summarization agent, not the parent agent
+    summary_content = summary["content"][0]
+    assert "text" in summary_content and summary_content["text"] == "Custom summary from dedicated agent"
+
+    # Assert that the summarization agent was called
+    summary_agent.call_tracker.assert_called_once()
+
+
+def test_uses_parent_agent_when_no_summarization_agent():
+    """Test that parent agent is used when no summarization_agent is provided."""
+    manager = SummarizingConversationManager()
+
+    messages: Messages = [
+        {"role": "user", "content": [{"text": "Hello"}]},
+        {"role": "assistant", "content": [{"text": "Hi there"}]},
+    ]
+
+    parent_agent = create_mock_agent("Parent agent summary")
+    summary = manager._generate_summary(messages, parent_agent)
+
+    # Should use the parent agent
+    summary_content = summary["content"][0]
+    assert "text" in summary_content and summary_content["text"] == "Parent agent summary"
+
+    # Assert that the parent agent was called
+    parent_agent.call_tracker.assert_called_once()
+
+
+def test_uses_custom_system_prompt():
+    """Test that custom system prompt is used when provided."""
+    custom_prompt = "Custom system prompt for summarization"
+    manager = SummarizingConversationManager(summarization_system_prompt=custom_prompt)
+    mock_agent = create_mock_agent()
+
+    messages: Messages = [
+        {"role": "user", "content": [{"text": "Hello"}]},
+        {"role": "assistant", "content": [{"text": "Hi there"}]},
+    ]
+
+    # Capture the agent's system prompt changes
+    original_prompt = mock_agent.system_prompt
+    manager._generate_summary(messages, mock_agent)
+
+    # The agent's system prompt should be restored after summarization
+    assert mock_agent.system_prompt == original_prompt
+
+
+def test_agent_state_restoration():
+    """Test that agent state is properly restored after summarization."""
+    manager = SummarizingConversationManager()
+    mock_agent = create_mock_agent()
+
+    # Set initial state
+    original_system_prompt = "Original system prompt"
+    original_messages: Messages = [{"role": "user", "content": [{"text": "Original message"}]}]
+    mock_agent.system_prompt = original_system_prompt
+    mock_agent.messages = original_messages.copy()
+
+    messages: Messages = [
+        {"role": "user", "content": [{"text": "Hello"}]},
+        {"role": "assistant", "content": [{"text": "Hi there"}]},
+    ]
+
+    manager._generate_summary(messages, mock_agent)
+
+    # State should be restored
+    assert mock_agent.system_prompt == original_system_prompt
+    assert mock_agent.messages == original_messages
+
+
+def test_agent_state_restoration_on_exception():
+    """Test that agent state is restored even when summarization fails."""
+    manager = SummarizingConversationManager()
+
+    # Create an agent that fails during summarization
+    mock_agent = Mock()
+    mock_agent.system_prompt = "Original prompt"
+    agent_messages: Messages = [{"role": "user", "content": [{"text": "Original"}]}]
+    mock_agent.messages = agent_messages
+    mock_agent.side_effect = Exception("Summarization failed")
+
+    messages: Messages = [
+        {"role": "user", "content": [{"text": "Hello"}]},
+        {"role": "assistant", "content": [{"text": "Hi there"}]},
+    ]
+
+    # Should restore state even on exception
+    with pytest.raises(Exception, match="Summarization failed"):
+        manager._generate_summary(messages, mock_agent)
+
+    # State should still be restored
+    assert mock_agent.system_prompt == "Original prompt"
+
+
+def test_reduce_context_tool_pair_adjustment_works_with_forward_search():
+    """Test that tool pair adjustment works correctly with the forward-search logic."""
+    manager = SummarizingConversationManager(
+        summary_ratio=0.5,
+        preserve_recent_messages=1,
+    )
+
+    mock_agent = create_mock_agent()
+    # Create messages where the split point would be adjusted to 0 due to tool pairs
+    tool_pair_messages: Messages = [
+        {"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "test_tool", "input": {}}}]},
+        {
+            "role": "user",
+            "content": [
+                {"toolResult": {"toolUseId": "123", "content": [{"text": "Tool output"}], "status": "success"}}
+            ],
+        },
+        {"role": "user", "content": [{"text": "Latest message"}]},
+    ]
+    mock_agent.messages = tool_pair_messages
+
+    # With 3 messages, preserve_recent_messages=1, summary_ratio=0.5:
+    # messages_to_summarize_count = (3 - 1) * 0.5 = 1
+    # But split point adjustment will move forward from the toolUse, potentially increasing count
+    manager.reduce_context(mock_agent)
+    # Should have summary + remaining messages
+    assert len(mock_agent.messages) == 2
+
+    # First message should be the summary
+    assert mock_agent.messages[0]["role"] == "assistant"
+    summary_content = mock_agent.messages[0]["content"][0]
+    assert "text" in summary_content and "This is a summary of the conversation." in summary_content["text"]
+
+    # Last message should be the preserved recent message
+    assert mock_agent.messages[1]["role"] == "user"
+    assert mock_agent.messages[1]["content"][0]["text"] == "Latest message"
+
+
+def test_adjust_split_point_exceeds_message_length(summarizing_manager):
+    """Test that split point exceeding message array length raises exception."""
+    messages: Messages = [
+        {"role": "user", "content": [{"text": "Message 1"}]},
+        {"role": "assistant", "content": [{"text": "Response 1"}]},
+    ]
+
+    # Try to split at point 5 when there are only 2 messages
+    with pytest.raises(ContextWindowOverflowException, match="Split point exceeds message array length"):
+        summarizing_manager._adjust_split_point_for_tool_pairs(messages, 5)
+
+
+def test_adjust_split_point_equals_message_length(summarizing_manager):
+    """Test that split point equal to message array length returns unchanged."""
+    messages: Messages = [
+        {"role": "user", "content": [{"text": "Message 1"}]},
+        {"role": "assistant", "content": [{"text": "Response 1"}]},
+    ]
+
+    # Split point equals message length (2) - should return unchanged
+    result = summarizing_manager._adjust_split_point_for_tool_pairs(messages, 2)
+    assert result == 2
+
+
+def test_adjust_split_point_no_tool_result_at_split(summarizing_manager):
+    """Test split point that doesn't contain tool result, ensuring we reach return split_point."""
+    messages: Messages = [
+        {"role": "user", "content": [{"text": "Message 1"}]},
+        {"role": "assistant", "content": [{"text": "Response 1"}]},
+        {"role": "user", "content": [{"text": "Message 2"}]},
+    ]
+
+    # Split point message is not a tool result, so it should directly return split_point
+    result = summarizing_manager._adjust_split_point_for_tool_pairs(messages, 1)
+    assert result == 1
+
+
+def test_adjust_split_point_tool_result_without_tool_use(summarizing_manager):
+    """Test that having tool results without tool uses raises exception."""
+    messages: Messages = [
+        {"role": "user", "content": [{"text": "Message 1"}]},
+        {
+            "role": "user",
+            "content": [
+                {"toolResult": {"toolUseId": "123", "content": [{"text": "Tool output"}], "status": "success"}}
+            ],
+        },
+    ]
+
+    # Has tool result but no tool use - invalid state
+    with pytest.raises(ContextWindowOverflowException, match="Unable to trim conversation context!"):
+        summarizing_manager._adjust_split_point_for_tool_pairs(messages, 1)
+
+
+def test_adjust_split_point_tool_result_moves_to_end(summarizing_manager):
+    """Test tool result at split point moves forward to valid position at end."""
+    messages: Messages = [
+        {"role": "user", "content": [{"text": "Message 1"}]},
+        {"role": "assistant", "content": [{"text": "Response 1"}]},
+        {
+            "role": "user",
+            "content": [
+                {"toolResult": {"toolUseId": "123", "content": [{"text": "Tool output"}], "status": "success"}}
+            ],
+        },
+        {"role": "assistant", "content": [{"toolUse": {"toolUseId": "456", "name": "different_tool", "input": {}}}]},
+    ]
+
+    # Split at message 2 (toolResult) - will move forward to message 3 (toolUse at end is valid)
+    result = summarizing_manager._adjust_split_point_for_tool_pairs(messages, 2)
+    assert result == 3
+
+
+def test_adjust_split_point_tool_result_no_forward_position(summarizing_manager):
+    """Test tool result at split point where forward search finds no valid position."""
+    messages: Messages = [
+        {"role": "user", "content": [{"text": "Message 1"}]},
+        {"role": "assistant", "content": [{"toolUse": {"toolUseId": "123", "name": "test_tool", "input": {}}}]},
+        {"role": "user", "content": [{"text": "Message between"}]},
+        {
+            "role": "user",
+            "content": [
+                {"toolResult": {"toolUseId": "123", "content": [{"text": "Tool output"}], "status": "success"}}
+            ],
+        },
+    ]
+
+    # Split at message 3 (toolResult) - will try to move forward but no valid position exists
+    with pytest.raises(ContextWindowOverflowException, match="Unable to trim conversation context!"):
+        summarizing_manager._adjust_split_point_for_tool_pairs(messages, 3)
+
+
+def test_reduce_context_adjustment_returns_zero():
+    """Test that tool pair adjustment can return zero, triggering the check at line 122."""
+    manager = SummarizingConversationManager(
+        summary_ratio=0.5,
+        preserve_recent_messages=1,
+    )
+
+    # Mock the adjustment method to return 0
+    def mock_adjust(messages, split_point):
+        return 0  # This should trigger the <= 0 check at line 122
+
+    manager._adjust_split_point_for_tool_pairs = mock_adjust
+
+    mock_agent = Mock()
+    simple_messages: Messages = [
+        {"role": "user", "content": [{"text": "Message 1"}]},
+        {"role": "assistant", "content": [{"text": "Response 1"}]},
+        {"role": "user", "content": [{"text": "Message 2"}]},
+    ]
+    mock_agent.messages = simple_messages
+
+    # The adjustment method will return 0, which should trigger line 122-123
+    with pytest.raises(ContextWindowOverflowException, match="insufficient messages for summarization"):
+        manager.reduce_context(mock_agent)