From e61d143ebc51a3a2555942e727758e6131600f36 Mon Sep 17 00:00:00 2001 From: adagradschool Date: Tue, 17 Mar 2026 16:50:36 -0400 Subject: [PATCH] Emit full-session Claude prompt history --- plugins/trace/hooks/hooks.json | 2 +- plugins/trace/hooks/lib.sh | 15 +--- plugins/trace/hooks/parse_stop_transcript.py | 77 +++++++++-------- plugins/trace/hooks/session_end.sh | 20 ----- plugins/trace/hooks/session_start.sh | 9 -- plugins/trace/hooks/stop_hook.sh | 55 ------------ .../testdata/stop_input_full_history.json | 1 + .../stop_transcript_full_history.jsonl | 6 ++ scripts/assert_otlp_full_history.py | 84 +++++++++++++++++++ scripts/assert_parsed_full_history.py | 76 +++++++++++++++++ scripts/e2e_smoke.py | 19 ++--- scripts/replay-fixtures.sh | 27 ++++++ 12 files changed, 249 insertions(+), 142 deletions(-) create mode 100644 plugins/trace/testdata/stop_input_full_history.json create mode 100644 plugins/trace/testdata/stop_transcript_full_history.jsonl create mode 100644 scripts/assert_otlp_full_history.py create mode 100644 scripts/assert_parsed_full_history.py diff --git a/plugins/trace/hooks/hooks.json b/plugins/trace/hooks/hooks.json index 38670d9..059db6c 100644 --- a/plugins/trace/hooks/hooks.json +++ b/plugins/trace/hooks/hooks.json @@ -40,7 +40,7 @@ { "type": "command", "command": "bash ${CLAUDE_PLUGIN_ROOT}/hooks/stop_hook.sh", - "async": true + "async": false } ] } diff --git a/plugins/trace/hooks/lib.sh b/plugins/trace/hooks/lib.sh index d936140..d4678d7 100755 --- a/plugins/trace/hooks/lib.sh +++ b/plugins/trace/hooks/lib.sh @@ -244,13 +244,12 @@ ensure_session_initialized() { local requested_start_ns="${2:-}" [[ -z "$sid" ]] && return 1 - local trace_id session_span_id session_parent_span_id session_start_ns init_source root_emitted pending_tool_calls + local trace_id session_span_id session_parent_span_id session_start_ns init_source pending_tool_calls trace_id="$(get_session_state "$sid" trace_id)" session_span_id="$(get_session_state "$sid" session_span_id)" session_parent_span_id="$(get_session_state "$sid" session_parent_span_id)" session_start_ns="$(get_session_state "$sid" session_start_ns)" init_source="$(get_session_state "$sid" session_init_source)" - root_emitted="$(get_session_state "$sid" session_root_emitted)" pending_tool_calls="$(get_session_state "$sid" pending_tool_calls)" # Normal path: SessionStart already created state. @@ -262,9 +261,6 @@ ensure_session_initialized() { if [[ -z "$init_source" ]]; then set_session_state "$sid" session_init_source "unknown" fi - if [[ -z "$root_emitted" ]]; then - set_session_state "$sid" session_root_emitted "false" - fi if [[ -z "$pending_tool_calls" ]]; then set_session_state "$sid" pending_tool_calls "[]" fi @@ -280,12 +276,6 @@ ensure_session_initialized() { if [[ -z "$(get_session_state "$sid" trace_context_source)" ]]; then set_session_state "$sid" trace_context_source "generated" fi - if [[ -z "$(get_session_state "$sid" session_end_requested)" ]]; then - set_session_state "$sid" session_end_requested "false" - fi - if [[ -z "$(get_session_state "$sid" stop_in_flight)" ]]; then - set_session_state "$sid" stop_in_flight "false" - fi return 0 fi @@ -306,9 +296,6 @@ ensure_session_initialized() { set_session_state "$sid" session_traceparent_version "${PL_INITIAL_TRACEPARENT_VERSION:-}" set_session_state "$sid" session_trace_flags "${PL_INITIAL_TRACE_FLAGS:-}" set_session_state "$sid" trace_context_source "${PL_INITIAL_TRACE_CONTEXT_SOURCE:-generated}" - set_session_state "$sid" session_root_emitted "false" - set_session_state "$sid" session_end_requested "false" - set_session_state "$sid" stop_in_flight "false" log "INFO" "Session initialized lazily session_id=$sid trace_id=$trace_id" } diff --git a/plugins/trace/hooks/parse_stop_transcript.py b/plugins/trace/hooks/parse_stop_transcript.py index 48d51e6..74a4b0d 100755 --- a/plugins/trace/hooks/parse_stop_transcript.py +++ b/plugins/trace/hooks/parse_stop_transcript.py @@ -78,6 +78,17 @@ def flatten_indexed(prefix, items, out): out[attr_key] = value +def append_history_item(history, item): + if ( + item.get("role") == "user" + and history + and history[-1].get("role") == "user" + and history[-1].get("content") == item.get("content") + ): + return + history.append(item) + + def is_tool_result_user(rec): if rec.get("type") != "user": return False @@ -121,9 +132,7 @@ def parse_transcript(transcript_path, turn_start_fallback, pending_payloads, exp turn_start_idx = i break - turn_records = records[turn_start_idx:] history = [] - llm_input_cursor = 0 tools = [] llms = [] pending_tool_uses = [] @@ -134,9 +143,10 @@ def parse_transcript(transcript_path, turn_start_fallback, pending_payloads, exp turn_end_ns = turn_start_fallback last_input_ns = turn_start_fallback - for rec in turn_records: + for idx, rec in enumerate(records): + emit_for_turn = idx >= turn_start_idx timestamp_ns = parse_iso_to_ns(rec.get("timestamp")) - if timestamp_ns is not None: + if emit_for_turn and timestamp_ns is not None: if turn_start_ns is None or timestamp_ns < turn_start_ns: turn_start_ns = timestamp_ns if turn_end_ns is None or timestamp_ns > turn_end_ns: @@ -148,7 +158,7 @@ def parse_transcript(transcript_path, turn_start_fallback, pending_payloads, exp if operation == "enqueue": content = content_to_text(rec.get("content")) if content: - history.append({"role": "user", "content": content}) + append_history_item(history, {"role": "user", "content": content}) last_input_ns = timestamp_ns or last_input_ns saw_human_input = True continue @@ -171,7 +181,7 @@ def parse_transcript(transcript_path, turn_start_fallback, pending_payloads, exp tool_use = pending_tool_uses.pop(match_idx) if match_idx is not None else {} payload = {} - if pending_payload_idx < len(pending_payloads): + if emit_for_turn and pending_payload_idx < len(pending_payloads): maybe_payload = pending_payloads[pending_payload_idx] pending_payload_idx += 1 if isinstance(maybe_payload, dict): @@ -191,21 +201,22 @@ def parse_transcript(transcript_path, turn_start_fallback, pending_payloads, exp if tool_end_ns is None: tool_end_ns = tool_start_ns - tools.append( - { - "name": f"Tool: {tool_name}", - "start_ns": int(tool_start_ns), - "end_ns": int(tool_end_ns), - "attributes": { - "source": "claude-code", - "hook": "PostToolUse", - "node_type": "CODE_EXECUTION", - "tool_name": tool_name, - "function_input": function_input, - "function_output": function_output, - }, - } - ) + if emit_for_turn: + tools.append( + { + "name": f"Tool: {tool_name}", + "start_ns": int(tool_start_ns), + "end_ns": int(tool_end_ns), + "attributes": { + "source": "claude-code", + "hook": "PostToolUse", + "node_type": "CODE_EXECUTION", + "tool_name": tool_name, + "function_input": function_input, + "function_output": function_output, + }, + } + ) history.append( { @@ -218,7 +229,7 @@ def parse_transcript(transcript_path, turn_start_fallback, pending_payloads, exp continue user_text = content_to_text(content) - history.append({"role": "user", "content": user_text}) + append_history_item(history, {"role": "user", "content": user_text}) last_input_ns = timestamp_ns or last_input_ns saw_human_input = True continue @@ -284,6 +295,7 @@ def parse_transcript(transcript_path, turn_start_fallback, pending_payloads, exp "source": "claude-code", "hook": "Stop", "node_type": "PROMPT_TEMPLATE", + "promptlayer.prompt_history_mode": "full_session", "gen_ai.operation.name": "chat", "gen_ai.provider.name": provider, "gen_ai.request.model": model, @@ -296,8 +308,7 @@ def parse_transcript(transcript_path, turn_start_fallback, pending_payloads, exp if stop_reason: attrs["gen_ai.completion.0.finish_reason"] = stop_reason - immediate_input = history[llm_input_cursor:] - flatten_indexed("gen_ai.prompt", immediate_input, attrs) + flatten_indexed("gen_ai.prompt", history, attrs) completion_item = {"role": "assistant", "content": output_text} if tool_calls: @@ -306,20 +317,20 @@ def parse_transcript(transcript_path, turn_start_fallback, pending_payloads, exp span_name = "LLM Call (User)" if saw_human_input else "LLM call" - llms.append( - { - "name": span_name, - "start_ns": int(llm_start_ns), - "end_ns": int(llm_end_ns), - "attributes": attrs, - } - ) + if emit_for_turn: + llms.append( + { + "name": span_name, + "start_ns": int(llm_start_ns), + "end_ns": int(llm_end_ns), + "attributes": attrs, + } + ) assistant_history = {"role": "assistant", "content": output_text} if tool_calls: assistant_history["tool_calls"] = tool_calls history.append(assistant_history) - llm_input_cursor = len(history) saw_human_input = False if turn_start_ns is None: diff --git a/plugins/trace/hooks/session_end.sh b/plugins/trace/hooks/session_end.sh index 3c2dd2f..18ae1b6 100755 --- a/plugins/trace/hooks/session_end.sh +++ b/plugins/trace/hooks/session_end.sh @@ -19,17 +19,8 @@ trace_id="$(get_session_state "$session_id" trace_id)" session_span_id="$(get_session_state "$session_id" session_span_id)" session_parent_span_id="$(get_session_state "$session_id" session_parent_span_id)" session_start_ns="$(get_session_state "$session_id" session_start_ns)" -stop_in_flight="$(get_session_state "$session_id" stop_in_flight)" -current_turn_start_ns="$(get_session_state "$session_id" current_turn_start_ns)" [[ -z "$trace_id" || -z "$session_span_id" ]] && exit 0 [[ -z "$session_start_ns" ]] && session_start_ns="$(now_ns)" -[[ -z "$stop_in_flight" ]] && stop_in_flight="false" - -if [[ -n "$current_turn_start_ns" || "$stop_in_flight" == "true" ]]; then - set_session_state "$session_id" session_end_requested "true" - log "INFO" "SessionEnd deferred until Stop session_id=$session_id" - exit 0 -fi release_session_lock trap - EXIT @@ -42,16 +33,5 @@ emit_span "$trace_id" "$session_span_id" "$session_parent_span_id" "Claude Code acquire_session_lock "$session_id" || exit 0 trap 'release_session_lock' EXIT - -stop_in_flight="$(get_session_state "$session_id" stop_in_flight)" -current_turn_start_ns="$(get_session_state "$session_id" current_turn_start_ns)" -[[ -z "$stop_in_flight" ]] && stop_in_flight="false" -if [[ -n "$current_turn_start_ns" || "$stop_in_flight" == "true" ]]; then - set_session_state "$session_id" session_end_requested "true" - log "INFO" "SessionEnd deferred until Stop session_id=$session_id" - exit 0 -fi - -set_session_state "$session_id" session_root_emitted "true" rm -f "$PL_SESSION_STATE_DIR/$session_id.json" log "INFO" "SessionEnd finalized session_id=$session_id" diff --git a/plugins/trace/hooks/session_start.sh b/plugins/trace/hooks/session_start.sh index fe3d282..282fa51 100755 --- a/plugins/trace/hooks/session_start.sh +++ b/plugins/trace/hooks/session_start.sh @@ -33,12 +33,6 @@ if [[ -n "$existing_trace_id" && -n "$existing_session_span_id" ]]; then if [[ -z "$(get_session_state "$session_id" trace_context_source)" ]]; then set_session_state "$session_id" trace_context_source "generated" fi - if [[ -z "$(get_session_state "$session_id" session_end_requested)" ]]; then - set_session_state "$session_id" session_end_requested "false" - fi - if [[ -z "$(get_session_state "$session_id" stop_in_flight)" ]]; then - set_session_state "$session_id" stop_in_flight "false" - fi log "INFO" "SessionStart ignored existing state session_id=$session_id trace_id=$existing_trace_id" exit 0 fi @@ -59,8 +53,5 @@ set_session_state "$session_id" session_init_source "session_start_hook" set_session_state "$session_id" session_traceparent_version "${PL_INITIAL_TRACEPARENT_VERSION:-}" set_session_state "$session_id" session_trace_flags "${PL_INITIAL_TRACE_FLAGS:-}" set_session_state "$session_id" trace_context_source "${PL_INITIAL_TRACE_CONTEXT_SOURCE:-generated}" -set_session_state "$session_id" session_root_emitted "false" -set_session_state "$session_id" session_end_requested "false" -set_session_state "$session_id" stop_in_flight "false" log "INFO" "SessionStart captured session_id=$session_id trace_id=$trace_id" diff --git a/plugins/trace/hooks/stop_hook.sh b/plugins/trace/hooks/stop_hook.sh index fe7c0eb..407d406 100755 --- a/plugins/trace/hooks/stop_hook.sh +++ b/plugins/trace/hooks/stop_hook.sh @@ -46,26 +46,21 @@ session_span_id="$(get_session_state "$session_id" session_span_id)" session_parent_span_id="$(get_session_state "$session_id" session_parent_span_id)" turn_start_ns="$(get_session_state "$session_id" current_turn_start_ns)" pending_tool_calls="$(get_session_state "$session_id" pending_tool_calls)" -session_end_requested="$(get_session_state "$session_id" session_end_requested)" session_init_source="$(get_session_state "$session_id" session_init_source)" session_start_ns="$(get_session_state "$session_id" session_start_ns)" [[ -z "$trace_id" || -z "$session_span_id" ]] && exit 0 [[ -z "$pending_tool_calls" ]] && pending_tool_calls='[]' -[[ -z "$session_end_requested" ]] && session_end_requested="false" [[ -z "$session_start_ns" ]] && session_start_ns="$(now_ns)" [[ -z "$turn_start_ns" ]] && turn_start_ns="$(now_ns)" # Keep lock scope short: snapshot + clear turn-specific mutable state. -set_session_state "$session_id" stop_in_flight "true" set_session_state "$session_id" current_turn_start_ns "" set_session_state "$session_id" pending_tool_calls "[]" release_session_lock -emitted_root="false" - parse_transcript_with_retry() { local attempts=0 local parsed llm_count @@ -101,7 +96,6 @@ else fi session_attrs="{\"source\":\"claude-code\",\"hook\":\"$session_hook_attr\",\"node_type\":\"WORKFLOW\",\"session.lifecycle\":\"$session_lifecycle_attr\"}" add_span_to_batch "$trace_id" "$session_span_id" "$session_parent_span_id" "Claude Code session" "1" "$session_start_ns" "$turn_end_ns" "$session_attrs" || true - emitted_root="true" while IFS= read -r tool; do [[ -z "$tool" ]] && continue @@ -124,55 +118,6 @@ else done < <(echo "$parsed" | jq -c '.llms[]?') fi -# If SessionEnd arrived while Stop was running, re-emit root span with final end time. -if [[ "$session_end_requested" == "true" ]]; then - end_ns="$(now_ns)" - session_end_attrs='{"source":"claude-code","hook":"SessionEnd","node_type":"WORKFLOW","session.lifecycle":"deferred_finalize"}' - add_span_to_batch "$trace_id" "$session_span_id" "$session_parent_span_id" "Claude Code session" "1" "$session_start_ns" "$end_ns" "$session_end_attrs" || true - emitted_root="true" -fi - emit_spans_batch_file "$spans_file" || true -acquire_session_lock "$session_id" || exit 0 - -# Stop is no longer actively processing this turn. -set_session_state "$session_id" stop_in_flight "false" -if [[ "$emitted_root" == "true" ]]; then - set_session_state "$session_id" session_root_emitted "true" -fi - -latest_end_requested="$(get_session_state "$session_id" session_end_requested)" -latest_turn_start_ns="$(get_session_state "$session_id" current_turn_start_ns)" -latest_trace_id="$(get_session_state "$session_id" trace_id)" -latest_session_span_id="$(get_session_state "$session_id" session_span_id)" -latest_session_parent_span_id="$(get_session_state "$session_id" session_parent_span_id)" -latest_session_start_ns="$(get_session_state "$session_id" session_start_ns)" -[[ -z "$latest_end_requested" ]] && latest_end_requested="false" -[[ -z "$latest_session_start_ns" ]] && latest_session_start_ns="$(now_ns)" - -need_finalize_root="false" -if [[ "$latest_end_requested" == "true" && -z "$latest_turn_start_ns" ]]; then - need_finalize_root="true" -fi - -if [[ "$need_finalize_root" == "true" && -n "$latest_trace_id" && -n "$latest_session_span_id" ]]; then - release_session_lock - end_ns="$(now_ns)" - finalize_attrs='{"source":"claude-code","hook":"SessionEnd","node_type":"WORKFLOW","session.lifecycle":"deferred_finalize"}' - emit_span "$latest_trace_id" "$latest_session_span_id" "$latest_session_parent_span_id" "Claude Code session" "1" "$latest_session_start_ns" "$end_ns" "$finalize_attrs" || true - acquire_session_lock "$session_id" || exit 0 - set_session_state "$session_id" stop_in_flight "false" - set_session_state "$session_id" session_root_emitted "true" -fi - -latest_end_requested="$(get_session_state "$session_id" session_end_requested)" -latest_turn_start_ns="$(get_session_state "$session_id" current_turn_start_ns)" -[[ -z "$latest_end_requested" ]] && latest_end_requested="false" - -if [[ "$latest_end_requested" == "true" && -z "$latest_turn_start_ns" ]]; then - rm -f "$PL_SESSION_STATE_DIR/$session_id.json" - log "INFO" "SessionEnd finalized by Stop session_id=$session_id" -fi - log "INFO" "Stop finalized session_id=$session_id" diff --git a/plugins/trace/testdata/stop_input_full_history.json b/plugins/trace/testdata/stop_input_full_history.json new file mode 100644 index 0000000..2d1235b --- /dev/null +++ b/plugins/trace/testdata/stop_input_full_history.json @@ -0,0 +1 @@ +{"session_id":"example-session-id","transcript_path":"plugins/trace/testdata/stop_transcript_full_history.jsonl"} diff --git a/plugins/trace/testdata/stop_transcript_full_history.jsonl b/plugins/trace/testdata/stop_transcript_full_history.jsonl new file mode 100644 index 0000000..ae0dfa9 --- /dev/null +++ b/plugins/trace/testdata/stop_transcript_full_history.jsonl @@ -0,0 +1,6 @@ +{"sessionId":"example-session-id","type":"user","timestamp":"2026-03-16T12:00:00Z","message":{"content":[{"type":"text","text":"hello"}]}} +{"sessionId":"example-session-id","type":"assistant","timestamp":"2026-03-16T12:00:01Z","message":{"model":"claude-sonnet-4-6","id":"msg_example_1","stop_reason":"end_turn","usage":{"input_tokens":12,"output_tokens":6},"content":[{"type":"text","text":"Hi there"}]}} +{"sessionId":"example-session-id","type":"user","timestamp":"2026-03-16T12:01:00Z","message":{"content":[{"type":"text","text":"check the current status"}]}} +{"sessionId":"example-session-id","type":"assistant","timestamp":"2026-03-16T12:01:01Z","message":{"model":"claude-sonnet-4-6","id":"msg_example_2","stop_reason":"tool_use","usage":{"input_tokens":40,"output_tokens":12},"content":[{"type":"tool_use","id":"toolu_example_1","name":"DocsSearch","input":{"query":"current status"}}]}} +{"sessionId":"example-session-id","type":"user","timestamp":"2026-03-16T12:01:02Z","message":{"content":[{"type":"tool_result","tool_use_id":"toolu_example_1","content":"Current status: all systems operational.","is_error":false}]}} +{"sessionId":"example-session-id","type":"assistant","timestamp":"2026-03-16T12:01:03Z","message":{"model":"claude-sonnet-4-6","id":"msg_example_3","stop_reason":"end_turn","usage":{"input_tokens":55,"output_tokens":18},"content":[{"type":"text","text":"All systems are operational."}]}} diff --git a/scripts/assert_otlp_full_history.py b/scripts/assert_otlp_full_history.py new file mode 100644 index 0000000..b61e299 --- /dev/null +++ b/scripts/assert_otlp_full_history.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 + +import json +import sys + + +def attribute_map(span): + attrs = {} + for item in span.get("attributes", []): + key = item.get("key") + value = item.get("value", {}) + if "stringValue" in value: + attrs[key] = value["stringValue"] + elif "boolValue" in value: + attrs[key] = value["boolValue"] + elif "intValue" in value: + attrs[key] = value["intValue"] + elif "doubleValue" in value: + attrs[key] = value["doubleValue"] + else: + attrs[key] = None + return attrs + + +def main() -> int: + queue_file = sys.argv[1] + with open(queue_file, encoding="utf-8") as f: + lines = [line.strip() for line in f if line.strip()] + + assert lines, "expected queued OTLP payload" + payload = json.loads(lines[-1]) + spans = payload["resourceSpans"][0]["scopeSpans"][0]["spans"] + + tool_call_llm = None + final_llm = None + tool_span = None + + for span in spans: + attrs = attribute_map(span) + name = span.get("name") + if name == "Tool: DocsSearch": + tool_span = span + elif name == "LLM Call (User)" and attrs.get("gen_ai.completion.0.tool_calls"): + tool_call_llm = span + elif name == "LLM call" and attrs.get("gen_ai.completion.0.content") == "All systems are operational.": + final_llm = span + + assert tool_span is not None, "expected emitted DocsSearch tool span" + assert tool_call_llm is not None, "expected emitted tool-call llm span" + assert final_llm is not None, "expected emitted final llm span" + + first_attrs = attribute_map(tool_call_llm) + final_attrs = attribute_map(final_llm) + + assert first_attrs.get("promptlayer.prompt_history_mode") == "full_session", ( + "expected full-session history marker on first emitted llm span" + ) + assert final_attrs.get("promptlayer.prompt_history_mode") == "full_session", ( + "expected full-session history marker on final emitted llm span" + ) + assert first_attrs.get("gen_ai.prompt.0.content") == "hello", "expected turn 1 user in tool-call prompt" + assert first_attrs.get("gen_ai.prompt.1.content") == "Hi there", ( + "expected turn 1 assistant in tool-call prompt" + ) + assert first_attrs.get("gen_ai.prompt.2.content") == "check the current status", ( + "expected current user in tool-call prompt" + ) + assert "DocsSearch" in (final_attrs.get("gen_ai.prompt.3.tool_calls") or ""), ( + "expected prior tool call in final prompt" + ) + assert final_attrs.get("gen_ai.prompt.4.content") == "Current status: all systems operational.", ( + "expected tool result in final prompt" + ) + assert final_attrs.get("gen_ai.completion.0.content") == "All systems are operational.", ( + "expected final completion content" + ) + return 0 + + +if __name__ == "__main__": + try: + raise SystemExit(main()) + except AssertionError as exc: + raise SystemExit(f"Assertion failed: {exc}") from exc diff --git a/scripts/assert_parsed_full_history.py b/scripts/assert_parsed_full_history.py new file mode 100644 index 0000000..8616de2 --- /dev/null +++ b/scripts/assert_parsed_full_history.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 + +import json +import sys + + +def main() -> int: + parsed_file = sys.argv[1] + with open(parsed_file, encoding="utf-8") as f: + parsed = json.load(f) + + llms = parsed.get("llms", []) + tools = parsed.get("tools", []) + + assert len(llms) == 2, f"expected 2 llm spans, got {len(llms)}" + assert len(tools) == 1, f"expected 1 tool span, got {len(tools)}" + + tool_call_llm = llms[0] + final_llm = llms[1] + + assert tool_call_llm["name"] == "LLM Call (User)", "expected first llm to be user-initiated" + assert final_llm["name"] == "LLM call", "expected final llm to be tool-result continuation" + + first_attrs = tool_call_llm["attributes"] + final_attrs = final_llm["attributes"] + + assert first_attrs.get("promptlayer.prompt_history_mode") == "full_session", ( + "expected full-session history marker on first llm span" + ) + assert final_attrs.get("promptlayer.prompt_history_mode") == "full_session", ( + "expected full-session history marker on final llm span" + ) + + assert first_attrs.get("gen_ai.prompt.0.content") == "hello", "expected first prompt item to include turn 1 user" + assert first_attrs.get("gen_ai.prompt.1.content") == "Hi there", ( + "expected first prompt item to include turn 1 assistant" + ) + assert first_attrs.get("gen_ai.prompt.2.content") == "check the current status", ( + "expected first tool-call prompt to include current user message" + ) + assert first_attrs.get("gen_ai.prompt.2.role") == "user", "expected third prompt item to be current user" + assert "DocsSearch" in (first_attrs.get("gen_ai.completion.0.tool_calls") or ""), ( + "expected tool call completion on first llm span" + ) + + assert final_attrs.get("gen_ai.prompt.0.content") == "hello", "expected final prompt to retain turn 1 user" + assert final_attrs.get("gen_ai.prompt.1.content") == "Hi there", ( + "expected final prompt to retain turn 1 assistant" + ) + assert final_attrs.get("gen_ai.prompt.2.content") == "check the current status", ( + "expected final prompt to retain current user message" + ) + assert final_attrs.get("gen_ai.prompt.3.role") == "assistant", ( + "expected final prompt to include prior assistant tool-call message" + ) + assert "DocsSearch" in (final_attrs.get("gen_ai.prompt.3.tool_calls") or ""), ( + "expected final prompt to include prior assistant tool call" + ) + assert final_attrs.get("gen_ai.prompt.4.role") == "tool", ( + "expected final prompt to include tool result role" + ) + assert final_attrs.get("gen_ai.prompt.4.content") == "Current status: all systems operational.", ( + "expected final prompt to include tool result content" + ) + assert final_attrs.get("gen_ai.completion.0.content") == "All systems are operational.", ( + "expected final completion content" + ) + assert tools[0]["name"] == "Tool: DocsSearch", "expected tool span name" + return 0 + + +if __name__ == "__main__": + try: + raise SystemExit(main()) + except AssertionError as exc: + raise SystemExit(f"Assertion failed: {exc}") from exc diff --git a/scripts/e2e_smoke.py b/scripts/e2e_smoke.py index ab172ef..f31b97d 100755 --- a/scripts/e2e_smoke.py +++ b/scripts/e2e_smoke.py @@ -132,7 +132,7 @@ def validate_span_graph(spans: list[dict]) -> tuple[list[str], dict]: if not spans: return ["No spans found in captured OTLP payloads"], {} - traces: dict[str, list[dict]] = {} + traces: dict[str, dict[str, dict]] = {} for span in spans: trace_id = span.get("traceId") span_id = span.get("spanId") @@ -142,7 +142,7 @@ def validate_span_graph(spans: list[dict]) -> tuple[list[str], dict]: if not span_id: errors.append("Span missing spanId") continue - traces.setdefault(trace_id, []).append(span) + traces.setdefault(trace_id, {})[span_id] = span trace_count = len(traces) if trace_count != 1: @@ -153,13 +153,11 @@ def validate_span_graph(spans: list[dict]) -> tuple[list[str], dict]: session_root_found = False unresolved_parent_ids: set[str] = set() - for trace_id, trace_spans in traces.items(): - by_id: dict[str, dict] = {} - for span in trace_spans: - span_id = span["spanId"] - if span_id in by_id: - errors.append(f"Duplicate spanId in trace {trace_id}") - by_id[span_id] = span + unique_span_count = 0 + + for trace_id, by_id in traces.items(): + trace_spans = list(by_id.values()) + unique_span_count += len(trace_spans) trace_roots = 0 for span in trace_spans: @@ -212,7 +210,8 @@ def validate_span_graph(spans: list[dict]) -> tuple[list[str], dict]: ) metrics = { - "span_count": len(spans), + "raw_span_count": len(spans), + "span_count": unique_span_count, "trace_count": trace_count, "root_count": root_count, "edge_count": edge_count, diff --git a/scripts/replay-fixtures.sh b/scripts/replay-fixtures.sh index 6ca04ef..b84e258 100755 --- a/scripts/replay-fixtures.sh +++ b/scripts/replay-fixtures.sh @@ -119,6 +119,31 @@ test_valid_traceparent_stop_hook() { assert_session_span_payload "$home/.claude/state/promptlayer_otlp_queue.ndjson" "$TRACE_ID_VALID" "$PARENT_SPAN_ID_VALID" } +test_full_history_parser() { + local parsed_file + parsed_file="$(mktemp "${TMPDIR:-/tmp}/pl-full-history-parse.XXXXXX")" + trap 'rm -f "$parsed_file"' RETURN + + PL_PENDING_TOOL_CALLS='[{"tool_name":"DocsSearch","function_input":{"query":"current status"},"function_output":{"content":"Current status: all systems operational."}}]' \ + python3 plugins/trace/hooks/parse_stop_transcript.py \ + plugins/trace/testdata/stop_transcript_full_history.jsonl \ + 0 \ + "$SESSION_ID" >"$parsed_file" + + python3 scripts/assert_parsed_full_history.py "$parsed_file" +} + +test_full_history_stop_hook() { + local home + home="$(new_home)" + trap 'cleanup_home "$home"' RETURN + + run_hook "$home" "plugins/trace/hooks/session_start.sh" "plugins/trace/testdata/session_start_input.json" + run_hook "$home" "plugins/trace/hooks/stop_hook.sh" "plugins/trace/testdata/stop_input_full_history.json" + + python3 scripts/assert_otlp_full_history.py "$home/.claude/state/promptlayer_otlp_queue.ndjson" +} + test_missing_traceparent_fallback() { local home trace_id home="$(new_home)" @@ -185,6 +210,8 @@ test_future_version_traceparent_with_suffix() { test_valid_traceparent_session_end test_valid_traceparent_stop_hook +test_full_history_parser +test_full_history_stop_hook test_missing_traceparent_fallback test_invalid_traceparent_fallback test_non_zero_zero_version_traceparent