diff --git a/openhands-sdk/openhands/sdk/agent/agent.py b/openhands-sdk/openhands/sdk/agent/agent.py index d1f59d6ffd..d2be8cc448 100644 --- a/openhands-sdk/openhands/sdk/agent/agent.py +++ b/openhands-sdk/openhands/sdk/agent/agent.py @@ -504,9 +504,10 @@ def step( "skipping hook check for legacy conversation state." ) - # Prepare LLM messages using the utility function + # Prepare LLM messages from the cached, incrementally-maintained view. + # See https://github.com/OpenHands/software-agent-sdk/issues/3053. _messages_or_condensation = prepare_llm_messages( - state.events, condenser=self.condenser, llm=self.llm + state.view, condenser=self.condenser, llm=self.llm ) # Process condensation event before agent sampels another action @@ -554,6 +555,13 @@ def step( "triggering condensation retry with condensed history: " f"{e}" ) + # The incremental view we just sent may itself be the source of + # the malformed history (e.g., a manipulation-indices bug that + # let a property slip). Re-derive it from the event log with + # full `enforce_properties` before kicking off the condensation + # retry, so the condenser operates on a clean, enforced view. + # See https://github.com/OpenHands/software-agent-sdk/issues/3053. + state.rebuild_view() on_event(CondensationRequest()) return logger.warning( diff --git a/openhands-sdk/openhands/sdk/agent/utils.py b/openhands-sdk/openhands/sdk/agent/utils.py index 1106835b62..8aac573527 100644 --- a/openhands-sdk/openhands/sdk/agent/utils.py +++ b/openhands-sdk/openhands/sdk/agent/utils.py @@ -8,7 +8,7 @@ import subprocess import textwrap import types -from collections.abc import Collection, Sequence +from collections.abc import Collection from typing import ( Annotated, Any, @@ -21,7 +21,7 @@ from openhands.sdk.context.condenser.base import CondenserBase from openhands.sdk.context.view import View from openhands.sdk.conversation.types import ConversationTokenCallbackType -from openhands.sdk.event.base import Event, LLMConvertibleEvent +from openhands.sdk.event.base import LLMConvertibleEvent from openhands.sdk.event.condenser import Condensation from openhands.sdk.llm import LLM, LLMResponse, Message from openhands.sdk.tool import Action, ToolDefinition @@ -444,7 +444,7 @@ def normalize_tool_call( @overload def prepare_llm_messages( - events: Sequence[Event], + view: View, condenser: None = None, additional_messages: list[Message] | None = None, llm: LLM | None = None, @@ -453,7 +453,7 @@ def prepare_llm_messages( @overload def prepare_llm_messages( - events: Sequence[Event], + view: View, condenser: CondenserBase, additional_messages: list[Message] | None = None, llm: LLM | None = None, @@ -461,19 +461,25 @@ def prepare_llm_messages( def prepare_llm_messages( - events: Sequence[Event], + view: View, condenser: CondenserBase | None = None, additional_messages: list[Message] | None = None, llm: LLM | None = None, ) -> list[Message] | Condensation: - """Prepare LLM messages from conversation context. + """Prepare LLM messages from a conversation view. This utility function extracts the common logic for preparing conversation context that is shared between agent.step() and ask_agent() methods. It handles condensation internally and calls the callback when needed. + Callers should pass the cached `ConversationState.view`, which is + maintained incrementally as events are appended. This avoids paying the + O(n) `View.from_events` (with `enforce_properties`) cost on every step. + See https://github.com/OpenHands/software-agent-sdk/issues/3053. + Args: - events: Sequence of events to prepare messages from + view: A `View` of the conversation history. The view is treated as + read-only — see `CondenserBase.condense` for the same contract. condenser: Optional condenser for handling context window limits additional_messages: Optional additional messages to append llm: Optional LLM instance from the agent, passed to condenser for @@ -482,12 +488,7 @@ def prepare_llm_messages( Returns: List of messages ready for LLM completion, or a Condensation event if condensation is needed - - Raises: - RuntimeError: If condensation is needed but no callback is provided """ - - view = View.from_events(events) llm_convertible_events: list[LLMConvertibleEvent] = view.events # If a condenser is registered, we need to give it an diff --git a/openhands-sdk/openhands/sdk/context/condenser/base.py b/openhands-sdk/openhands/sdk/context/condenser/base.py index 4b80ec8b85..ca16503fce 100644 --- a/openhands-sdk/openhands/sdk/context/condenser/base.py +++ b/openhands-sdk/openhands/sdk/context/condenser/base.py @@ -39,6 +39,12 @@ def condense(self, view: View, agent_llm: LLM | None = None) -> View | Condensat Args: view: A view of the history containing all events that should be condensed. + **Implementations must treat this view as read-only.** The view may be + a cached projection owned by `ConversationState` + (see https://github.com/OpenHands/software-agent-sdk/issues/3053), and + mutating it in place will corrupt that cache and silently desynchronize + the conversation from its event log. Return a new `View` (e.g. + `View(events=view.events[k:])`) or a `Condensation` event instead. agent_llm: LLM instance used by the agent. Condensers use this for token counting purposes. Defaults to None. diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 0ab9dc1cb9..515e5e1908 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -198,7 +198,11 @@ def _default_callback(e): # This callback runs while holding the conversation state's lock # (see BaseConversation.compose_callbacks usage inside `with self._state:` # regions), so updating state here is thread-safe. - self._state.events.append(e) + # Using `append_event` (instead of `state.events.append`) also + # incrementally updates the cached `state.view`, keeping the view + # in sync without paying the O(n) `View.from_events` cost on each + # step. + self._state.append_event(e) # Track user MessageEvent IDs here so hook callbacks (which may # synthesize or alter user messages) are captured in one place. if isinstance(e, MessageEvent) and e.source == "user": @@ -383,9 +387,13 @@ def fork( ) # Deep-copy events from source → fork so the source stays - # immutable. + # immutable. Use `append_event` so the fork's cached view is + # built up incrementally alongside the event log, then call + # `rebuild_view` once at the end to run a full enforcement pass + # over the freshly-populated log (same posture as cold load). for event in self._state.events: - fork_conv._state.events.append(event.model_copy(deep=True)) + fork_conv._state.append_event(event.model_copy(deep=True)) + fork_conv._state.rebuild_view() # Copy runtime state that accumulated during the source # conversation. activated_knowledge_skills is list[str] – strings @@ -1054,7 +1062,7 @@ def ask_agent(self, question: str) -> str: ) messages = prepare_llm_messages( - self.state.events, additional_messages=[user_message] + self.state.view, additional_messages=[user_message] ) # Get or create the specialized ask-agent LLM diff --git a/openhands-sdk/openhands/sdk/conversation/state.py b/openhands-sdk/openhands/sdk/conversation/state.py index 7e137929f2..7b6d3f00e0 100644 --- a/openhands-sdk/openhands/sdk/conversation/state.py +++ b/openhands-sdk/openhands/sdk/conversation/state.py @@ -9,6 +9,7 @@ from pydantic import Field, PrivateAttr from openhands.sdk.agent.base import AgentBase +from openhands.sdk.context.view import View from openhands.sdk.conversation.conversation_stats import ConversationStats from openhands.sdk.conversation.event_store import EventLog from openhands.sdk.conversation.fifo_lock import FIFOLock @@ -205,6 +206,12 @@ class ConversationState(OpenHandsModel): # ===== Private attrs (NOT Fields) ===== _fs: FileStore = PrivateAttr() # filestore for persistence _events: EventLog = PrivateAttr() # now the storage for events + # Cached, incrementally-maintained projection of `_events`. Derived state, + # so it is intentionally not a model field and never persisted. Updated in + # `append_event` on every new event and fully re-derived (with property + # enforcement) via `rebuild_view` on cold load, fork, or error recovery. + # See https://github.com/OpenHands/software-agent-sdk/issues/3053. + _view: View = PrivateAttr(default_factory=View) _cipher: Cipher | None = PrivateAttr(default=None) # cipher for secret encryption _autosave_enabled: bool = PrivateAttr( default=False @@ -225,6 +232,59 @@ class ConversationState(OpenHandsModel): def events(self) -> EventLog: return self._events + @property + def view(self) -> View: + """Cached, incrementally-maintained `View` of the conversation events. + + The view is updated in lockstep with `events` via `append_event`, so it + always reflects the current `_events` log without paying the O(n) cost + of `View.from_events` on every read. It is fully re-derived (with + property enforcement) only by `rebuild_view`, which is intended for + cold load, fork, and explicit error recovery — not the per-step hot + path. + + Callers must treat the returned view as read-only. Mutating it + breaks the cache invariant and will cause silent divergence from + `events`. + """ + return self._view + + def append_event(self, event: Event) -> None: + """Append an event to the conversation, updating the cached view. + + This is the only sanctioned write path for adding events to a + running conversation: it persists the event via `EventLog.append` + and incrementally updates the cached `View` so the two stay in + sync. Callers (`_default_callback`, fork, etc.) must hold the + conversation lock; see callers in `LocalConversation` for the + canonical pattern. + + The incremental view update is O(1) for the common + `LLMConvertibleEvent` case and O(view-size) only when a + `Condensation` event is applied. It does not run + `enforce_properties`; that fallback path runs only via + `rebuild_view`. + """ + self._events.append(event) + self._view.append_event(event) + + def rebuild_view(self) -> None: + """Re-derive the cached view from the full event log. + + Runs `View.from_events` over the persisted log, which applies all + view property enforcement. This is the fallback path described in + `ViewPropertyBase` and should be used only on: + + - Cold load (resuming a persisted `ConversationState`), since the + on-disk events may have been written by an older version or + could be corrupted. + - Fork creation, after deep-copying events from the source state. + - Explicit error recovery (e.g., when the LLM rejects the current + message history as malformed and we want to re-validate the + view before retrying with condensation). + """ + self._view = View.from_events(self._events) + @property def env_observation_persistence_dir(self) -> str | None: """Directory for persisting environment observation files.""" @@ -355,6 +415,13 @@ def create( state._events = EventLog(file_store, dir_path=EVENTS_DIR) state._cipher = cipher + # Build the cached view from the persisted event log. This is the + # one place where we pay the full `View.from_events` cost (which + # runs property enforcement) — events may have been written by an + # older version of the code or otherwise be in a bad state, so + # treating this as a cold-load checkpoint is appropriate. + state.rebuild_view() + # Verify compatibility (agent class + tools) agent.verify(state.agent, events=state._events) diff --git a/tests/agent_server/test_conversation_service.py b/tests/agent_server/test_conversation_service.py index 8fd3df7eac..a3748310db 100644 --- a/tests/agent_server/test_conversation_service.py +++ b/tests/agent_server/test_conversation_service.py @@ -234,7 +234,7 @@ async def test_second_service_does_not_resume_active_running_conversation(tmp_pa primary_state = await primary_event_service.get_state() running_action = _create_running_terminal_action() - primary_state.events.append(running_action) + primary_state.append_event(running_action) primary_state.execution_status = ConversationExecutionStatus.RUNNING async with ConversationService( @@ -243,7 +243,7 @@ async def test_second_service_does_not_resume_active_running_conversation(tmp_pa assert secondary._event_services is not None assert conversation_info.id not in secondary._event_services - primary_state.events.append( + primary_state.append_event( ObservationEvent( observation=TerminalObservation.from_text( "done", @@ -284,7 +284,7 @@ async def test_stale_owner_cannot_append_after_lease_takeover(tmp_path): primary_state = await primary_event_service.get_state() running_action = _create_running_terminal_action() - primary_state.events.append(running_action) + primary_state.append_event(running_action) primary_state.execution_status = ConversationExecutionStatus.RUNNING _expire_conversation_lease(conversations_dir, conversation_info.id) @@ -301,7 +301,7 @@ async def test_stale_owner_cannot_append_after_lease_takeover(tmp_path): ) with pytest.raises(ConversationOwnershipLostError): - primary_state.events.append( + primary_state.append_event( ObservationEvent( observation=TerminalObservation.from_text( "late result", diff --git a/tests/cross/test_agent_loading.py b/tests/cross/test_agent_loading.py index 2875369eab..16269f08d6 100644 --- a/tests/cross/test_agent_loading.py +++ b/tests/cross/test_agent_loading.py @@ -263,7 +263,7 @@ def test_conversation_fails_when_used_tool_is_missing(): ), llm_response_id="test-response-1", ) - conversation.state.events.append(action_event) + conversation.state.append_event(action_event) conversation_id = conversation.state.id del conversation diff --git a/tests/cross/test_stuck_detector.py b/tests/cross/test_stuck_detector.py index d268d77b4e..cea984f775 100644 --- a/tests/cross/test_stuck_detector.py +++ b/tests/cross/test_stuck_detector.py @@ -40,7 +40,7 @@ def test_history_too_short(): source="user", llm_message=Message(role="user", content=[TextContent(text="Hello")]), ) - state.events.append(user_message) + state.append_event(user_message) # Add a single action-observation pair action = ActionEvent( @@ -57,7 +57,7 @@ def test_history_too_short(): ), llm_response_id="response_1", ) - state.events.append(action) + state.append_event(action) observation = ObservationEvent( source="environment", @@ -70,7 +70,7 @@ def test_history_too_short(): tool_name="terminal", tool_call_id="call_1", ) - state.events.append(observation) + state.append_event(observation) # Should not be stuck with only one action-observation pair after user message assert stuck_detector.is_stuck() is False @@ -281,7 +281,7 @@ def test_repeating_action_observation_not_stuck_less_than_4_repeats(): source="user", llm_message=Message(role="user", content=[TextContent(text="Please run ls")]), ) - state.events.append(user_message) + state.append_event(user_message) # Add 3 identical action-observation pairs to trigger stuck detection for i in range(3): @@ -299,7 +299,7 @@ def test_repeating_action_observation_not_stuck_less_than_4_repeats(): ), llm_response_id=f"response_{i}", ) - state.events.append(action) + state.append_event(action) observation = ObservationEvent( source="environment", @@ -312,7 +312,7 @@ def test_repeating_action_observation_not_stuck_less_than_4_repeats(): tool_name="terminal", tool_call_id=f"call_{i}", ) - state.events.append(observation) + state.append_event(observation) # Should be stuck with 4 identical action-observation pairs assert stuck_detector.is_stuck() is False @@ -332,7 +332,7 @@ def test_repeating_action_observation_stuck(): source="user", llm_message=Message(role="user", content=[TextContent(text="Please run ls")]), ) - state.events.append(user_message) + state.append_event(user_message) # Add 4 identical action-observation pairs to trigger stuck detection for i in range(4): @@ -350,7 +350,7 @@ def test_repeating_action_observation_stuck(): ), llm_response_id=f"response_{i}", ) - state.events.append(action) + state.append_event(action) observation = ObservationEvent( source="environment", @@ -363,7 +363,7 @@ def test_repeating_action_observation_stuck(): tool_name="terminal", tool_call_id=f"call_{i}", ) - state.events.append(observation) + state.append_event(observation) # Should be stuck with 4 identical action-observation pairs assert stuck_detector.is_stuck() is True @@ -385,7 +385,7 @@ def test_repeating_action_error_stuck(): role="user", content=[TextContent(text="Please run the invalid command")] ), ) - state.events.append(user_message) + state.append_event(user_message) def create_action_and_error(i): action = ActionEvent( @@ -413,16 +413,16 @@ def create_action_and_error(i): # Add 2 identical actions that result in errors for i in range(2): action, error = create_action_and_error(i) - state.events.append(action) - state.events.append(error) + state.append_event(action) + state.append_event(error) # Should not stuck with 2 identical action-error pairs assert stuck_detector.is_stuck() is False # Add 1 more identical action-error pair to trigger stuck detection action, error = create_action_and_error(2) - state.events.append(action) - state.events.append(error) + state.append_event(action) + state.append_event(error) # Should be stuck with 3 identical action-error pairs assert stuck_detector.is_stuck() is True @@ -442,7 +442,7 @@ def test_agent_monologue_stuck(): source="user", llm_message=Message(role="user", content=[TextContent(text="Hello")]), ) - state.events.append(user_message) + state.append_event(user_message) # Add 3 consecutive agent messages (monologue) for i in range(3): @@ -452,7 +452,7 @@ def test_agent_monologue_stuck(): role="assistant", content=[TextContent(text=f"I'm thinking... {i}")] ), ) - state.events.append(agent_message) + state.append_event(agent_message) # Should be stuck due to agent monologue assert stuck_detector.is_stuck() is True @@ -474,7 +474,7 @@ def test_not_stuck_with_different_actions(): role="user", content=[TextContent(text="Please run different commands")] ), ) - state.events.append(user_message) + state.append_event(user_message) # Add different actions commands = ["ls", "pwd", "whoami", "date"] @@ -493,7 +493,7 @@ def test_not_stuck_with_different_actions(): ), llm_response_id=f"response_{i}", ) - state.events.append(action) + state.append_event(action) observation = ObservationEvent( source="environment", @@ -506,7 +506,7 @@ def test_not_stuck_with_different_actions(): tool_name="terminal", tool_call_id=f"call_{i}", ) - state.events.append(observation) + state.append_event(observation) # Should not be stuck with different actions assert stuck_detector.is_stuck() is False @@ -526,7 +526,7 @@ def test_reset_after_user_message(): source="user", llm_message=Message(role="user", content=[TextContent(text="Please run ls")]), ) - state.events.append(user_message) + state.append_event(user_message) # Add 4 identical action-observation pairs to trigger stuck detection for i in range(4): @@ -544,7 +544,7 @@ def test_reset_after_user_message(): ), llm_response_id=f"response_{i}", ) - state.events.append(action) + state.append_event(action) observation = ObservationEvent( source="environment", @@ -557,7 +557,7 @@ def test_reset_after_user_message(): tool_name="terminal", tool_call_id=f"call_{i}", ) - state.events.append(observation) + state.append_event(observation) # Should be stuck assert stuck_detector.is_stuck() is True @@ -569,7 +569,7 @@ def test_reset_after_user_message(): role="user", content=[TextContent(text="Try something else")] ), ) - state.events.append(new_user_message) + state.append_event(new_user_message) # Should not be stuck after new user message (history is reset) assert stuck_detector.is_stuck() is False @@ -589,7 +589,7 @@ def test_reset_after_user_message(): ), llm_response_id="response_new", ) - state.events.append(action) + state.append_event(action) observation = ObservationEvent( source="environment", @@ -600,7 +600,7 @@ def test_reset_after_user_message(): tool_name="terminal", tool_call_id="call_new", ) - state.events.append(observation) + state.append_event(observation) # Still not stuck with just one action after user message assert stuck_detector.is_stuck() is False diff --git a/tests/cross/test_stuck_detector_config.py b/tests/cross/test_stuck_detector_config.py index 47727de4fc..1717f934b8 100644 --- a/tests/cross/test_stuck_detector_config.py +++ b/tests/cross/test_stuck_detector_config.py @@ -54,8 +54,8 @@ def create_action_obs(): # Add 4 pairs (would trigger default threshold of 4) for _ in range(4): action, observation = create_action_obs() - conv._state.events.append(action) - conv._state.events.append(observation) + conv._state.append_event(action) + conv._state.append_event(observation) # Should NOT be stuck with threshold=6 assert conv._stuck_detector is not None @@ -64,8 +64,8 @@ def create_action_obs(): # Add 2 more pairs to reach threshold of 6 for _ in range(2): action, observation = create_action_obs() - conv._state.events.append(action) - conv._state.events.append(observation) + conv._state.append_event(action) + conv._state.append_event(observation) # Now should be stuck assert conv._stuck_detector.is_stuck() diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 4d19773e85..2cabe4eccc 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -836,14 +836,14 @@ def test_step_wires_on_activity(self, tmp_path): state = _make_state(tmp_path) # Wire up a user message - state.events.append( + state.append_event( SystemPromptEvent( source="agent", system_prompt=TextContent(text="sys"), tools=[], ) ) - state.events.append( + state.append_event( MessageEvent( source="user", llm_message=Message(role="user", content=[TextContent(text="test")]), @@ -892,14 +892,14 @@ class TestACPAgentStep: def _make_conversation_with_message(self, tmp_path, text="Hello"): """Create a mock conversation with a user message.""" state = _make_state(tmp_path) - state.events.append( + state.append_event( SystemPromptEvent( source="agent", system_prompt=TextContent(text="ACP-managed agent"), tools=[], ) ) - state.events.append( + state.append_event( MessageEvent( source="user", llm_message=Message(role="user", content=[TextContent(text=text)]), @@ -969,7 +969,7 @@ def test_step_sends_skill_catalog_to_acp_server(self, tmp_path): ) ) state = _make_state(tmp_path) - state.events.append( + state.append_event( MessageEvent( source="user", llm_message=Message( @@ -1024,7 +1024,7 @@ def test_step_sends_legacy_repo_context_to_acp_server(self, tmp_path): ) ) state = _make_state(tmp_path) - state.events.append( + state.append_event( MessageEvent( source="user", llm_message=Message( @@ -1078,7 +1078,7 @@ def test_step_sends_triggered_skill_content_to_acp_server(self, tmp_path): ) ) state = _make_state(tmp_path) - state.events.append( + state.append_event( MessageEvent( source="user", llm_message=Message( @@ -1118,7 +1118,7 @@ def test_step_does_not_re_inject_suffix_on_second_turn(self, tmp_path): ) ) state = _make_state(tmp_path) - state.events.append( + state.append_event( MessageEvent( source="user", llm_message=Message(role="user", content=[TextContent(text="Turn 2.")]), @@ -1146,7 +1146,7 @@ def test_step_suffix_install_state_transitions_to_installed(self, tmp_path): ) ) state = _make_state(tmp_path) - state.events.append( + state.append_event( MessageEvent( source="user", llm_message=Message(role="user", content=[TextContent(text="First.")]), @@ -1426,14 +1426,14 @@ class TestACPAgentTelemetry: def _make_conversation_with_message(self, tmp_path, text="Hello"): """Create a mock conversation with a user message.""" state = _make_state(tmp_path) - state.events.append( + state.append_event( SystemPromptEvent( source="agent", system_prompt=TextContent(text="ACP-managed agent"), tools=[], ) ) - state.events.append( + state.append_event( MessageEvent( source="user", llm_message=Message(role="user", content=[TextContent(text=text)]), @@ -2133,14 +2133,14 @@ def test_retry_cancels_pending_events_before_reset(self, tmp_path): agent = _make_agent() state = _make_state(tmp_path) - state.events.append( + state.append_event( SystemPromptEvent( source="agent", system_prompt=TextContent(text="sys"), tools=[], ) ) - state.events.append( + state.append_event( MessageEvent( source="user", llm_message=Message(role="user", content=[TextContent(text="go")]), @@ -2227,14 +2227,14 @@ class TestACPToolCallEmission: def _make_conversation_with_message(self, tmp_path, text="Hello"): """Create a mock conversation with a user message.""" state = _make_state(tmp_path) - state.events.append( + state.append_event( SystemPromptEvent( source="agent", system_prompt=TextContent(text="ACP-managed agent"), tools=[], ) ) - state.events.append( + state.append_event( MessageEvent( source="user", llm_message=Message(role="user", content=[TextContent(text=text)]), @@ -2816,14 +2816,14 @@ class TestACPPromptRetry: def _make_conversation_with_message(self, tmp_path, text="Hello"): """Create a mock conversation with a user message.""" state = _make_state(tmp_path) - state.events.append( + state.append_event( SystemPromptEvent( source="agent", system_prompt=TextContent(text="ACP-managed agent"), tools=[], ) ) - state.events.append( + state.append_event( MessageEvent( source="user", llm_message=Message(role="user", content=[TextContent(text=text)]), diff --git a/tests/sdk/agent/test_agent_context_window_condensation.py b/tests/sdk/agent/test_agent_context_window_condensation.py index 28beda25d1..6bff123096 100644 --- a/tests/sdk/agent/test_agent_context_window_condensation.py +++ b/tests/sdk/agent/test_agent_context_window_condensation.py @@ -118,6 +118,39 @@ def on_event(e): ) +def test_agent_rebuilds_view_on_malformed_history_recovery(monkeypatch): + """The malformed-history recovery path must re-derive the cached view. + + If the incremental view itself is what produced the malformed history + (e.g., a manipulation-indices bug let a property slip), the condensation + retry needs a freshly enforced view to work from. See #3053. + """ + llm = MalformedHistoryRaisingLLM() + agent = Agent(llm=llm, tools=[], condenser=HandlesRequestsCondenser()) + convo = Conversation(agent=agent) + + convo._ensure_agent_ready() + + call_count = 0 + original_rebuild = type(convo.state).rebuild_view + + def counting_rebuild(self): + nonlocal call_count + call_count += 1 + return original_rebuild(self) + + monkeypatch.setattr(type(convo.state), "rebuild_view", counting_rebuild) + + seen = [] + agent.step(convo, on_event=seen.append) + + assert any(isinstance(e, CondensationRequest) for e in seen) + assert call_count == 1, ( + f"rebuild_view should be called exactly once on malformed-history " + f"recovery, got {call_count}" + ) + + @pytest.mark.parametrize("force_responses", [True, False]) def test_agent_raises_ctx_exceeded_when_no_condenser(force_responses: bool): llm = RaisingLLM(force_responses=force_responses) diff --git a/tests/sdk/agent/test_agent_init_state_invariants.py b/tests/sdk/agent/test_agent_init_state_invariants.py index 7759fbbc1b..8c43bc21a3 100644 --- a/tests/sdk/agent/test_agent_init_state_invariants.py +++ b/tests/sdk/agent/test_agent_init_state_invariants.py @@ -49,7 +49,7 @@ def on_event(e): def test_agent_init_state_skips_when_system_prompt_already_present(tmp_path) -> None: agent = _make_agent() state = _make_state(agent, tmp_path) - state.events.append( + state.append_event( SystemPromptEvent( source="agent", system_prompt=TextContent(text="x"), @@ -73,8 +73,8 @@ def test_agent_init_state_skips_when_system_prompt_is_second_event_remote_prefix ) -> None: agent = _make_agent() state = _make_state(agent, tmp_path) - state.events.append(ConversationStateUpdateEvent(key="stats", value={})) - state.events.append( + state.append_event(ConversationStateUpdateEvent(key="stats", value={})) + state.append_event( SystemPromptEvent( source="agent", system_prompt=TextContent(text="x"), @@ -100,7 +100,7 @@ def test_agent_init_state_raises_if_user_message_before_system_prompt_in_prefix( state = _make_state(agent, tmp_path) from openhands.sdk.llm import Message - state.events.append( + state.append_event( MessageEvent( source="user", llm_message=Message(role="user", content=[TextContent(text="hi")]), diff --git a/tests/sdk/agent/test_agent_utils.py b/tests/sdk/agent/test_agent_utils.py index bec339bdb5..1a146656ad 100644 --- a/tests/sdk/agent/test_agent_utils.py +++ b/tests/sdk/agent/test_agent_utils.py @@ -126,39 +126,28 @@ def sample_tools(): # --------------------------------------------------------------------------- -@patch("openhands.sdk.agent.utils.View.from_events") @patch("openhands.sdk.event.base.LLMConvertibleEvent.events_to_messages") def test_prepare_llm_messages_without_condenser( - mock_events_to_messages, mock_from_events, sample_events, sample_messages + mock_events_to_messages, sample_events, sample_messages ): """Test prepare_llm_messages without condenser.""" - # Setup mocks - mock_view = Mock(spec=View) - mock_view.events = sample_events - mock_from_events.return_value = mock_view mock_events_to_messages.return_value = sample_messages + view = View(events=sample_events) - # Call function - result = prepare_llm_messages(sample_events) + result = prepare_llm_messages(view) - # Verify results assert result == sample_messages - mock_from_events.assert_called_once_with(sample_events) - mock_events_to_messages.assert_called_once_with(sample_events) + mock_events_to_messages.assert_called_once_with(view.events) -@patch("openhands.sdk.agent.utils.View.from_events") @patch("openhands.sdk.event.base.LLMConvertibleEvent.events_to_messages") def test_prepare_llm_messages_with_additional_messages( - mock_events_to_messages, mock_from_events, sample_events, sample_messages + mock_events_to_messages, sample_events, sample_messages ): """Test prepare_llm_messages with additional messages.""" - # Setup mocks - mock_view = Mock(spec=View) - mock_view.events = sample_events - mock_from_events.return_value = mock_view - # Create a copy to avoid mutation issues + # Copy to avoid mutation issues with the extend() inside prepare_llm_messages. mock_events_to_messages.return_value = sample_messages.copy() + view = View(events=sample_events) additional_messages = [ Message( @@ -167,60 +156,40 @@ def test_prepare_llm_messages_with_additional_messages( ) ] - # Call function - result = prepare_llm_messages( - sample_events, additional_messages=additional_messages - ) + result = prepare_llm_messages(view, additional_messages=additional_messages) - # Verify results - expected_messages = sample_messages + additional_messages - assert result == expected_messages - mock_from_events.assert_called_once_with(sample_events) - mock_events_to_messages.assert_called_once_with(sample_events) + assert result == sample_messages + additional_messages + mock_events_to_messages.assert_called_once_with(view.events) -@patch("openhands.sdk.agent.utils.View.from_events") @patch("openhands.sdk.event.base.LLMConvertibleEvent.events_to_messages") def test_prepare_llm_messages_with_condenser_returns_view( mock_events_to_messages, - mock_from_events, sample_events, sample_messages, mock_condenser, ): """Test prepare_llm_messages with condenser that returns a View.""" - # Setup mocks - mock_view = Mock(spec=View) - mock_view.events = sample_events - mock_from_events.return_value = mock_view - - condensed_events = sample_events[:2] # Simulate condensation reducing events - condensed_view = Mock(spec=View) - condensed_view.events = condensed_events + view = View(events=sample_events) + + condensed_view = View(events=sample_events[:2]) mock_condenser.condense.return_value = condensed_view condensed_messages = sample_messages[:2] mock_events_to_messages.return_value = condensed_messages - # Call function - result = prepare_llm_messages(sample_events, condenser=mock_condenser) + result = prepare_llm_messages(view, condenser=mock_condenser) - # Verify results assert result == condensed_messages - mock_from_events.assert_called_once_with(sample_events) - mock_condenser.condense.assert_called_once_with(mock_view, agent_llm=None) - mock_events_to_messages.assert_called_once_with(condensed_events) + mock_condenser.condense.assert_called_once_with(view, agent_llm=None) + mock_events_to_messages.assert_called_once_with(condensed_view.events) -@patch("openhands.sdk.agent.utils.View.from_events") def test_prepare_llm_messages_with_condenser_returns_condensation( - mock_from_events, sample_events, mock_condenser + sample_events, mock_condenser ): """Test prepare_llm_messages with condenser that returns a Condensation.""" - # Setup mocks - mock_view = Mock(spec=View) - mock_view.events = sample_events - mock_from_events.return_value = mock_view + view = View(events=sample_events) condensation = Condensation( summary="Test condensation summary", @@ -228,34 +197,61 @@ def test_prepare_llm_messages_with_condenser_returns_condensation( ) mock_condenser.condense.return_value = condensation - # Call function - result = prepare_llm_messages(sample_events, condenser=mock_condenser) + result = prepare_llm_messages(view, condenser=mock_condenser) - # Verify results assert result == condensation - mock_from_events.assert_called_once_with(sample_events) - mock_condenser.condense.assert_called_once_with(mock_view, agent_llm=None) + mock_condenser.condense.assert_called_once_with(view, agent_llm=None) -@patch("openhands.sdk.agent.utils.View.from_events") @patch("openhands.sdk.event.base.LLMConvertibleEvent.events_to_messages") -def test_prepare_llm_messages_empty_events(mock_events_to_messages, mock_from_events): - """Test prepare_llm_messages with empty events list.""" - # Setup mocks - mock_view = Mock(spec=View) - mock_view.events = [] - mock_from_events.return_value = mock_view +def test_prepare_llm_messages_empty_view(mock_events_to_messages): + """Test prepare_llm_messages with an empty view.""" mock_events_to_messages.return_value = [] + view = View() - # Call function - result = prepare_llm_messages([]) + result = prepare_llm_messages(view) - # Verify results assert result == [] - mock_from_events.assert_called_once_with([]) mock_events_to_messages.assert_called_once_with([]) +def test_prepare_llm_messages_does_not_rebuild_view(monkeypatch, sample_events) -> None: + """The hot path must consume the passed view directly without rebuilding it. + + This is the core perf invariant from #3053: the cached view on + `ConversationState` is what eliminates the per-step `O(n)` + `enforce_properties` cost, so `prepare_llm_messages` must not call + `View.from_events` (which would run that enforcement again). + """ + from_events_calls = 0 + enforce_calls = 0 + original_from_events = View.from_events + original_enforce = View.enforce_properties + + def counting_from_events(events): + nonlocal from_events_calls + from_events_calls += 1 + return original_from_events(events) + + def counting_enforce(self, all_events): + nonlocal enforce_calls + enforce_calls += 1 + return original_enforce(self, all_events) + + monkeypatch.setattr(View, "from_events", staticmethod(counting_from_events)) + monkeypatch.setattr(View, "enforce_properties", counting_enforce) + + view = View(events=sample_events) + prepare_llm_messages(view) + + assert from_events_calls == 0, ( + "prepare_llm_messages must not call View.from_events on the hot path" + ) + assert enforce_calls == 0, ( + "prepare_llm_messages must not call enforce_properties on the hot path" + ) + + # --------------------------------------------------------------------------- # Tests for make_llm_completion # --------------------------------------------------------------------------- @@ -420,17 +416,13 @@ def test_make_llm_completion_empty_messages(mock_llm): # --------------------------------------------------------------------------- -@patch("openhands.sdk.agent.utils.View.from_events") @patch("openhands.sdk.event.base.LLMConvertibleEvent.events_to_messages") def test_prepare_llm_messages_and_make_llm_completion_integration( - mock_events_to_messages, mock_from_events, sample_events, sample_messages, mock_llm + mock_events_to_messages, sample_events, sample_messages, mock_llm ): """Test integration between prepare_llm_messages and make_llm_completion.""" - # Setup mocks for prepare_llm_messages - mock_view = Mock(spec=View) - mock_view.events = sample_events - mock_from_events.return_value = mock_view mock_events_to_messages.return_value = sample_messages + view = View(events=sample_events) # Setup mocks for make_llm_completion mock_llm.uses_responses_api.return_value = False @@ -438,7 +430,7 @@ def test_prepare_llm_messages_and_make_llm_completion_integration( mock_llm.completion.return_value = mock_response # Call functions in sequence (simulating real usage) - messages = prepare_llm_messages(sample_events) + messages = prepare_llm_messages(view) result = make_llm_completion(mock_llm, messages) # Verify results diff --git a/tests/sdk/conversation/local/test_conversation_core.py b/tests/sdk/conversation/local/test_conversation_core.py index af517d5830..6a880c3197 100644 --- a/tests/sdk/conversation/local/test_conversation_core.py +++ b/tests/sdk/conversation/local/test_conversation_core.py @@ -60,7 +60,7 @@ def test_conversation_event_log_functionality(): ] for event in events: - conv.state.events.append(event) + conv.state.append_event(event) # Test basic EventLog functionality total_events = len(conv.state.events) @@ -87,7 +87,7 @@ def test_conversation_state_persistence(): # Add an event event = create_test_event("persist-test", "Persistence test") - conv.state.events.append(event) + conv.state.append_event(event) # State should auto-save when events are added # Check that files were created @@ -136,14 +136,14 @@ def test_conversation_event_id_validation(): # Add first event event1 = create_test_event("unique-id-1", "First event") - conv.state.events.append(event1) + conv.state.append_event(event1) # Add event with duplicate ID - should raise ValueError event2 = create_test_event("unique-id-1", "Second event") with pytest.raises( ValueError, match=r"Event with ID 'unique-id-1' already exists at index \d+" ): - conv.state.events.append(event2) + conv.state.append_event(event2) # Only the first event should be in the log our_events = [e for e in conv.state.events if e.id == "unique-id-1"] @@ -169,7 +169,7 @@ def test_conversation_large_event_handling(): num_events = 5000 # Large number to test memory usage for i in range(num_events): event = create_test_event(f"bulk-event-{i:04d}", f"Message {i}") - conv.state.events.append(event) + conv.state.append_event(event) # Check memory usage periodically if i % 1000 == 0 and i > 0: @@ -241,7 +241,7 @@ def test_conversation_memory_vs_local_filestore(): conv = Conversation(agent=agent, persistence_dir=temp_dir, workspace=temp_dir) event = create_test_event("local-test", "Local test") - conv.state.events.append(event) + conv.state.append_event(event) # State auto-saves when events are added # Verify files were created diff --git a/tests/sdk/conversation/local/test_fork.py b/tests/sdk/conversation/local/test_fork.py index cb1f448ece..d73273b74c 100644 --- a/tests/sdk/conversation/local/test_fork.py +++ b/tests/sdk/conversation/local/test_fork.py @@ -53,8 +53,8 @@ def test_fork_copies_events(): """Events from the source must appear in the fork.""" with tempfile.TemporaryDirectory() as tmpdir: src = Conversation(agent=_agent(), persistence_dir=tmpdir, workspace=tmpdir) - src.state.events.append(_msg("evt-1", "hello")) - src.state.events.append(_msg("evt-2", "world")) + src.state.append_event(_msg("evt-1", "hello")) + src.state.append_event(_msg("evt-2", "world")) fork = src.fork() @@ -68,11 +68,11 @@ def test_fork_source_unmodified(): """Appending to the fork must not affect the source.""" with tempfile.TemporaryDirectory() as tmpdir: src = Conversation(agent=_agent(), persistence_dir=tmpdir, workspace=tmpdir) - src.state.events.append(_msg("src-evt")) + src.state.append_event(_msg("src-evt")) src_event_count = len(src.state.events) fork = src.fork() - fork.state.events.append(_msg("fork-only")) + fork.state.append_event(_msg("fork-only")) # Source should not grow assert len(src.state.events) == src_event_count @@ -179,7 +179,7 @@ def test_fork_event_deep_copy_isolation(): """Mutating an event object in the fork must not affect the source.""" with tempfile.TemporaryDirectory() as tmpdir: src = Conversation(agent=_agent(), persistence_dir=tmpdir, workspace=tmpdir) - src.state.events.append(_msg("deep-evt", "original")) + src.state.append_event(_msg("deep-evt", "original")) fork = src.fork() @@ -229,8 +229,8 @@ def test_fork_persisted_events_survive_reload(): with tempfile.TemporaryDirectory() as tmpdir: src = Conversation(agent=_agent(), persistence_dir=tmpdir, workspace=tmpdir) - src.state.events.append(_msg(evt_id_1, "hello")) - src.state.events.append(_msg(evt_id_2, "world")) + src.state.append_event(_msg(evt_id_1, "hello")) + src.state.append_event(_msg(evt_id_2, "world")) fork = src.fork() fork_id = fork.id diff --git a/tests/sdk/conversation/local/test_rerun_actions.py b/tests/sdk/conversation/local/test_rerun_actions.py index 2304773718..edb9bcfcdf 100644 --- a/tests/sdk/conversation/local/test_rerun_actions.py +++ b/tests/sdk/conversation/local/test_rerun_actions.py @@ -179,10 +179,10 @@ def test_rerun_actions_basic(): # Manually add action events to simulate a conversation history conversation._ensure_agent_ready() action_event = _make_action_event("rerun_test", action1, "tc1") - conversation._state.events.append(action_event) + conversation._state.append_event(action_event) action_event2 = _make_action_event("rerun_test", action2, "tc2") - conversation._state.events.append(action_event2) + conversation._state.append_event(action_event2) # Now rerun all actions result = conversation.rerun_actions() @@ -202,7 +202,7 @@ def test_rerun_actions_preserves_original_observations(): conversation._ensure_agent_ready() action = RerunTestAction(value="preserve_test") action_event = _make_action_event("rerun_test", action, "tc1") - conversation._state.events.append(action_event) + conversation._state.append_event(action_event) # Count events before rerun events_before = len(list(conversation._state.events)) @@ -236,12 +236,12 @@ def test_rerun_actions_skips_none_actions(): llm_response_id="resp1", action=None, # Failed validation ) - conversation._state.events.append(action_event_none) + conversation._state.append_event(action_event_none) # Add a valid action event action = RerunTestAction(value="valid") action_event_valid = _make_action_event("rerun_test", action, "tc2") - conversation._state.events.append(action_event_valid) + conversation._state.append_event(action_event_valid) # Rerun should only execute the valid action and succeed result = conversation.rerun_actions() @@ -260,7 +260,7 @@ def test_rerun_actions_missing_tool_raises(): # Add an action event for a tool that doesn't exist action = RerunTestAction(value="test") action_event = _make_action_event("rerun_test", action, "tc1") - conversation._state.events.append(action_event) + conversation._state.append_event(action_event) with pytest.raises(KeyError) as exc_info: conversation.rerun_actions() @@ -277,7 +277,7 @@ def test_rerun_can_be_called_manually(): conversation._ensure_agent_ready() action = RerunTestAction(value="manual") action_event = _make_action_event("rerun_test", action, "tc1") - conversation._state.events.append(action_event) + conversation._state.append_event(action_event) # Call rerun manually (not during init) result = conversation.rerun_actions() @@ -455,7 +455,7 @@ def test_rerun_reproduces_file_state(tmp_path: Path, monkeypatch: pytest.MonkeyP test_file = tmp_path / "test_file.txt" action = FileWriteAction(filepath=str(test_file), content="hello world") action_event = _make_action_event("file_write", action, "tc1") - conversation._state.events.append(action_event) + conversation._state.append_event(action_event) # First rerun creates the file result = conversation.rerun_actions() @@ -495,7 +495,7 @@ def test_rerun_non_idempotent_with_log(tmp_path: Path, monkeypatch: pytest.Monke test_file = tmp_path / "new_file.txt" action = FileCreateAction(filepath=str(test_file), content="content") action_event = _make_action_event("file_create", action, "tc1") - conversation._state.events.append(action_event) + conversation._state.append_event(action_event) log_dir = tmp_path / "rerun_log" @@ -555,16 +555,16 @@ def test_rerun_early_exit_on_failure(tmp_path: Path, monkeypatch: pytest.MonkeyP # Add a successful action test_file1 = tmp_path / "file1.txt" action1 = FileWriteAction(filepath=str(test_file1), content="first") - conversation._state.events.append(_make_action_event("file_write", action1, "tc1")) + conversation._state.append_event(_make_action_event("file_write", action1, "tc1")) # Add a failing action (raises exception) action2 = FailingAction(message="intentional") - conversation._state.events.append(_make_action_event("failing", action2, "tc2")) + conversation._state.append_event(_make_action_event("failing", action2, "tc2")) # Add another successful action (should NOT be executed due to early exit) test_file2 = tmp_path / "file2.txt" action3 = FileWriteAction(filepath=str(test_file2), content="second") - conversation._state.events.append(_make_action_event("file_write", action3, "tc3")) + conversation._state.append_event(_make_action_event("file_write", action3, "tc3")) log_dir = tmp_path / "rerun_log" @@ -611,7 +611,7 @@ def test_rerun_multiple_files(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): filepath=str(tmp_path / filename), content=content, ) - conversation._state.events.append( + conversation._state.append_event( _make_action_event("file_write", action, f"tc{i}") ) diff --git a/tests/sdk/conversation/local/test_state_serialization.py b/tests/sdk/conversation/local/test_state_serialization.py index 65377cb9c1..6515625c40 100644 --- a/tests/sdk/conversation/local/test_state_serialization.py +++ b/tests/sdk/conversation/local/test_state_serialization.py @@ -74,8 +74,8 @@ def test_conversation_state_basic_serialization(): source="user", llm_message=Message(role="user", content=[TextContent(text="hello")]), ) - state.events.append(event1) - state.events.append(event2) + state.append_event(event1) + state.append_event(event2) # Test serialization - note that events are not included in base state serialized = state.model_dump_json(exclude_none=True) @@ -132,8 +132,8 @@ def test_conversation_state_persistence_save_load(): source="user", llm_message=Message(role="user", content=[TextContent(text="hello")]), ) - state.events.append(event1) - state.events.append(event2) + state.append_event(event1) + state.append_event(event2) # Note: Do NOT register LLM stats here - this test verifies pure event # persistence. LLM stats registration happens during agent initialization # which is now lazy. @@ -195,7 +195,7 @@ def test_conversation_state_incremental_save(): event1 = SystemPromptEvent( source="agent", system_prompt=TextContent(text="system"), tools=[] ) - state.events.append(event1) + state.append_event(event1) # Note: Do NOT register LLM stats here - LLM registration happens during # agent initialization which is now lazy. @@ -208,7 +208,7 @@ def test_conversation_state_incremental_save(): source="user", llm_message=Message(role="user", content=[TextContent(text="hello")]), ) - state.events.append(event2) + state.append_event(event2) # Verify additional event file was created event_files = list(Path(persist_path_for_state, "events").glob("*.json")) @@ -428,7 +428,7 @@ def test_conversation_state_exclude_from_base_state(): event = SystemPromptEvent( source="agent", system_prompt=TextContent(text="system"), tools=[] ) - state.events.append(event) + state.append_event(event) # State auto-saves, read base state file directly base_state_path = Path(temp_dir) / "base_state.json" diff --git a/tests/sdk/conversation/test_ask_agent.py b/tests/sdk/conversation/test_ask_agent.py index 90ea21ef5d..6ef01180c8 100644 --- a/tests/sdk/conversation/test_ask_agent.py +++ b/tests/sdk/conversation/test_ask_agent.py @@ -257,7 +257,7 @@ def test_ask_agent_with_existing_events_and_tool_calls( # 0. SystemPromptEvent (required for proper conversation state) # In a real conversation, this is always added by init_state before user messages - conv.state.events.append( + conv.state.append_event( SystemPromptEvent( source="agent", system_prompt=TextContent(text="You are a helpful assistant."), @@ -266,7 +266,7 @@ def test_ask_agent_with_existing_events_and_tool_calls( ) # 1. Prior user message - conv.state.events.append( + conv.state.append_event( MessageEvent( source="user", llm_message=Message( @@ -283,7 +283,7 @@ def test_ask_agent_with_existing_events_and_tool_calls( arguments=json.dumps({"command": "ls -la"}), origin="completion", ) - conv.state.events.append( + conv.state.append_event( ActionEvent( source="agent", thought=[TextContent(text="I'll list the files using the terminal")], @@ -302,7 +302,7 @@ def test_ask_agent_with_existing_events_and_tool_calls( "drwxr-xr-x 3 user user 4096 Nov 25 09:59 ..\n" "-rw-r--r-- 1 user user 12 Nov 25 10:00 test.txt" ) - conv.state.events.append( + conv.state.append_event( ObservationEvent( source="environment", observation=MockObservation(result=observation_result), diff --git a/tests/sdk/conversation/test_condense.py b/tests/sdk/conversation/test_condense.py index 284e01d46d..8eb9c70b01 100644 --- a/tests/sdk/conversation/test_condense.py +++ b/tests/sdk/conversation/test_condense.py @@ -119,7 +119,7 @@ def test_local_conversation_condense_without_condenser(tmp_path, agent): ) # Add some events to create history - conv.state.events.append( + conv.state.append_event( MessageEvent( source="user", llm_message=Message( @@ -158,7 +158,7 @@ def test_local_conversation_condense_with_condenser( ) # Add some events to create history - conv.state.events.append( + conv.state.append_event( MessageEvent( source="user", llm_message=Message( @@ -202,7 +202,7 @@ def test_local_conversation_condense_copies_llm_config(tmp_path): ) # Add some events to create history - conv.state.events.append( + conv.state.append_event( MessageEvent( source="user", llm_message=Message( @@ -230,7 +230,7 @@ def test_local_conversation_condense_with_existing_events_and_tool_calls( ) # 1. Prior user message - conv.state.events.append( + conv.state.append_event( MessageEvent( source="user", llm_message=Message( @@ -247,7 +247,7 @@ def test_local_conversation_condense_with_existing_events_and_tool_calls( arguments=json.dumps({"command": "ls -la"}), origin="completion", ) - conv.state.events.append( + conv.state.append_event( ActionEvent( source="agent", thought=[TextContent(text="I'll list the files using the terminal")], @@ -266,7 +266,7 @@ def test_local_conversation_condense_with_existing_events_and_tool_calls( "drwxr-xr-x 3 user user 4096 Nov 25 09:59 ..\n" "-rw-r--r-- 1 user user 12 Nov 25 10:00 test.txt" ) - conv.state.events.append( + conv.state.append_event( ObservationEvent( source="environment", observation=CondenseTestMockObservation(result=observation_result), @@ -292,7 +292,7 @@ def test_local_conversation_condense_force_condenser_bypasses_window(tmp_path, a ) # Add minimal events (normally wouldn't trigger condensation) - conv.state.events.append( + conv.state.append_event( MessageEvent( source="user", llm_message=Message( @@ -433,7 +433,7 @@ def test_local_conversation_condense_raises_context_window_error(tmp_path, agent ) # Add some events to create history - conv.state.events.append( + conv.state.append_event( MessageEvent( source="user", llm_message=Message( @@ -459,7 +459,7 @@ def test_local_conversation_condense_handles_empty_response(tmp_path, agent): ) # Add some events to create history - conv.state.events.append( + conv.state.append_event( MessageEvent( source="user", llm_message=Message( @@ -585,7 +585,7 @@ def test_local_conversation_condense_llm_registry_isolation(tmp_path, agent): ) # Add some events to create history - conv.state.events.append( + conv.state.append_event( MessageEvent( source="user", llm_message=Message( diff --git a/tests/sdk/conversation/test_generate_title.py b/tests/sdk/conversation/test_generate_title.py index 00b65e8a59..5bb444d8e4 100644 --- a/tests/sdk/conversation/test_generate_title.py +++ b/tests/sdk/conversation/test_generate_title.py @@ -77,7 +77,7 @@ def test_generate_title_without_llm_uses_agent_llm(mock_completion): conv = Conversation(agent=agent, visualizer=None) user_message = create_user_message_event("Help me create a Python script") - conv.state.events.append(user_message) + conv.state.append_event(user_message) mock_completion.return_value = create_mock_llm_response("Create Python Script") @@ -109,7 +109,7 @@ def test_generate_title_llm_error_fallback(mock_completion): # Add a user message user_message = create_user_message_event("Fix the bug in my application") - conv.state.events.append(user_message) + conv.state.append_event(user_message) # Create an LLM to pass explicitly custom_llm = LLM(model="gpt-4o-mini", api_key=SecretStr("key"), usage_id="err") @@ -133,7 +133,7 @@ def test_generate_title_truncation_respects_max_length(mock_completion): # Add a user message that is longer than max_length long_message = "Create a web application with advanced features and database" user_message = create_user_message_event(long_message) - conv.state.events.append(user_message) + conv.state.append_event(user_message) # Force LLM failure to exercise the truncation fallback path mock_completion.side_effect = Exception("LLM error") @@ -152,7 +152,7 @@ def test_generate_title_with_llm_truncates_long_response(mock_completion): # Add a user message user_message = create_user_message_event("Create a web application") - conv.state.events.append(user_message) + conv.state.append_event(user_message) # Create an LLM to pass explicitly custom_llm = LLM(model="gpt-4o-mini", api_key=SecretStr("key"), usage_id="test") @@ -179,7 +179,7 @@ def test_generate_title_with_custom_llm(mock_completion): # Add a user message user_message = create_user_message_event("Debug my code") - conv.state.events.append(user_message) + conv.state.append_event(user_message) # Create a custom LLM custom_llm = LLM( @@ -205,7 +205,7 @@ def test_generate_title_empty_llm_response_fallback(mock_completion): # Add a user message user_message = create_user_message_event("Help with testing") - conv.state.events.append(user_message) + conv.state.append_event(user_message) # Create an LLM to pass explicitly custom_llm = LLM(model="gpt-4o-mini", api_key=SecretStr("key"), usage_id="empty") diff --git a/tests/sdk/conversation/test_state_view_cache.py b/tests/sdk/conversation/test_state_view_cache.py new file mode 100644 index 0000000000..61d6b51b56 --- /dev/null +++ b/tests/sdk/conversation/test_state_view_cache.py @@ -0,0 +1,166 @@ +"""Tests for the cached `ConversationState.view` and the `append_event` / +`rebuild_view` write paths added for issue #3053. + +These tests assert that the incremental view stays in sync with the event log +without paying the cost of `View.from_events` on every append, and that the +full `enforce_properties` pass is reserved for explicit `rebuild_view` calls +(cold load, fork, error recovery). +""" + +from __future__ import annotations + +import uuid + +import pytest +from pydantic import SecretStr + +from openhands.sdk import LLM, Agent +from openhands.sdk.context.view import View +from openhands.sdk.conversation.event_store import EventLog +from openhands.sdk.conversation.state import ConversationState +from openhands.sdk.event.condenser import Condensation +from openhands.sdk.io import InMemoryFileStore +from openhands.sdk.workspace import LocalWorkspace +from tests.sdk.context.view.conftest import message_event + + +@pytest.fixture +def state() -> ConversationState: + """A bare ConversationState with an in-memory event log attached. + + We do not use `ConversationState.create` here because that path also + touches a LocalFileStore on disk; for these unit tests an in-memory + store is sufficient. + """ + llm = LLM(model="gpt-4o-mini", api_key=SecretStr("test-key"), usage_id="test-llm") + agent = Agent(llm=llm) + workspace = LocalWorkspace(working_dir="/tmp/test") + + state = ConversationState( + id=uuid.uuid4(), + workspace=workspace, + persistence_dir=None, + agent=agent, + ) + state._fs = InMemoryFileStore() + state._events = EventLog(state._fs) + return state + + +def test_fresh_state_has_empty_view(state: ConversationState) -> None: + assert len(state.view) == 0 + assert state.view.events == [] + + +def test_append_event_updates_both_event_log_and_view( + state: ConversationState, +) -> None: + msg = message_event("hello") + state.append_event(msg) + + # EventLog rehydrates from disk on read, so we compare by id rather than + # identity for the underlying log; the view holds direct references and + # can be compared with `is`. + assert len(state.events) == 1 + assert state.events[0].id == msg.id + assert len(state.view) == 1 + assert state.view.events[0] is msg + + +def test_view_stays_in_parity_with_from_events_after_many_appends( + state: ConversationState, +) -> None: + msgs = [message_event(f"msg {i}") for i in range(5)] + for msg in msgs: + state.append_event(msg) + + rebuilt = View.from_events(state.events) + assert [e.id for e in state.view.events] == [e.id for e in rebuilt.events] + assert ( + state.view.unhandled_condensation_request + == rebuilt.unhandled_condensation_request + ) + + +def test_condensation_event_is_applied_incrementally( + state: ConversationState, +) -> None: + msgs = [message_event(f"msg {i}") for i in range(3)] + for msg in msgs: + state.append_event(msg) + + condensation = Condensation( + forgotten_event_ids={msgs[0].id, msgs[2].id}, + llm_response_id="resp_1", + ) + state.append_event(condensation) + + # The Condensation is still in the underlying log (it is not LLM-convertible + # but is part of the persisted history); the view, however, should reflect + # the condensation by dropping the forgotten messages. + assert len(state.view) == 1 + assert state.view.events[0] is msgs[1] + + +def test_append_event_does_not_run_enforce_on_hot_path( + state: ConversationState, monkeypatch: pytest.MonkeyPatch +) -> None: + """The hot path must not pay the cost of `enforce_properties`. + + This is the core perf invariant from #3053: incremental updates rely on + manipulation indices keeping the view well-formed, so enforcement should + only run on `rebuild_view`. + """ + call_count = 0 + original_enforce = View.enforce_properties + + def counting_enforce(self: View, all_events): # type: ignore[no-untyped-def] + nonlocal call_count + call_count += 1 + return original_enforce(self, all_events) + + monkeypatch.setattr(View, "enforce_properties", counting_enforce) + + for i in range(10): + state.append_event(message_event(f"msg {i}")) + + assert call_count == 0, ( + "append_event must not invoke enforce_properties on the hot path" + ) + + +def test_rebuild_view_runs_enforce( + state: ConversationState, monkeypatch: pytest.MonkeyPatch +) -> None: + """rebuild_view is the one place where full property enforcement runs.""" + call_count = 0 + original_enforce = View.enforce_properties + + def counting_enforce(self: View, all_events): # type: ignore[no-untyped-def] + nonlocal call_count + call_count += 1 + return original_enforce(self, all_events) + + monkeypatch.setattr(View, "enforce_properties", counting_enforce) + + for i in range(3): + state.append_event(message_event(f"msg {i}")) + + # Sanity: no enforce so far. + assert call_count == 0 + + state.rebuild_view() + assert call_count >= 1 + # Parity check after the rebuild. + assert [e.id for e in state.view.events] == [e.id for e in state.events] + + +def test_rebuild_view_replaces_cached_instance(state: ConversationState) -> None: + """rebuild_view should produce a fresh View instance derived from the log.""" + state.append_event(message_event("hello")) + before = state.view + state.rebuild_view() + after = state.view + + assert before is not after + assert [e.id for e in after.events] == [e.id for e in before.events] diff --git a/tests/sdk/hooks/test_integration.py b/tests/sdk/hooks/test_integration.py index 2e9bdc42f8..c7357f3ceb 100644 --- a/tests/sdk/hooks/test_integration.py +++ b/tests/sdk/hooks/test_integration.py @@ -416,7 +416,7 @@ def test_post_tool_use_finds_action_from_events( ) # Add action to state events (simulating what Conversation does) - mock_conversation_state.events.append(action_event) + mock_conversation_state.append_event(action_event) # Create a corresponding observation event observation_event = ObservationEvent( diff --git a/tests/sdk/llm/test_prompt_caching_cross_conversation.py b/tests/sdk/llm/test_prompt_caching_cross_conversation.py index 55a7c257cd..9de7c64011 100644 --- a/tests/sdk/llm/test_prompt_caching_cross_conversation.py +++ b/tests/sdk/llm/test_prompt_caching_cross_conversation.py @@ -107,7 +107,7 @@ def test_end_to_end_caching_flow(tmp_path, dynamic_context, expect_dynamic): def on_event(event): collected_events.append(event) - state.events.append(event) + state.append_event(event) agent.init_state(state, on_event=on_event) @@ -123,7 +123,7 @@ def on_event(event): content=[TextContent(text="Hello")], ), ) - state.events.append(user_message) + state.append_event(user_message) llm_convertible_events = [ e for e in state.events if isinstance(e, LLMConvertibleEvent) @@ -217,7 +217,7 @@ def test_cross_conversation_cache_sharing(tmp_path, first_suffix, second_suffix) def on_event(event): collected_events.append(event) - state.events.append(event) + state.append_event(event) agent.init_state(state, on_event=on_event) @@ -231,7 +231,7 @@ def on_event(event): content=[TextContent(text="Hi")], ), ) - state.events.append(user_message) + state.append_event(user_message) llm_convertible_events = [ e for e in state.events if isinstance(e, LLMConvertibleEvent)