diff --git a/py/src/braintrust/integrations/claude_agent_sdk/test_claude_agent_sdk.py b/py/src/braintrust/integrations/claude_agent_sdk/test_claude_agent_sdk.py index 6fe29edd..8e1d53b8 100644 --- a/py/src/braintrust/integrations/claude_agent_sdk/test_claude_agent_sdk.py +++ b/py/src/braintrust/integrations/claude_agent_sdk/test_claude_agent_sdk.py @@ -166,6 +166,8 @@ async def calculator_handler(args): assert task_span["output"] is not None assert result_message is not None, "Should have received result message" + assert getattr(result_message, "result", None) is not None + assert task_span["output"] == result_message.result if hasattr(result_message, "num_turns"): assert task_span.get("metadata", {}).get("num_turns") is not None if hasattr(result_message, "session_id"): diff --git a/py/src/braintrust/integrations/claude_agent_sdk/tracing.py b/py/src/braintrust/integrations/claude_agent_sdk/tracing.py index 4dea63d6..398d66f8 100644 --- a/py/src/braintrust/integrations/claude_agent_sdk/tracing.py +++ b/py/src/braintrust/integrations/claude_agent_sdk/tracing.py @@ -585,6 +585,7 @@ def __init__( self._task_order: list[str | None] = [] self._final_results: list[dict[str, Any]] = [] + self._result_output: Any | None = None self._task_events: list[dict[str, Any]] = [] _thread_local.tool_span_tracker = self._tool_tracker @@ -609,7 +610,11 @@ def add(self, message: Any) -> None: self._handle_system(message) def log_output(self) -> None: - """Log the last accumulated assistant message as the root span output.""" + """Log the canonical root span output for the request.""" + if self._result_output is not None: + self._root_span.log(output=self._result_output) + return + if self._final_results: self._root_span.log(output=self._final_results[-1]) @@ -715,6 +720,11 @@ def _handle_result(self, message: Any) -> None: ctx = self._get_context(None) if ctx.llm_span and (usage_metrics or usage_metadata): ctx.llm_span.log(metrics=usage_metrics or None, metadata=usage_metadata or None) + + result_value = getattr(message, "result", None) + if result_value is not None: + self._result_output = result_value + result_metadata = { k: v for k, v in {