diff --git a/.env.defaults b/.env.defaults index bf19a9f..1c6b539 100644 --- a/.env.defaults +++ b/.env.defaults @@ -14,3 +14,13 @@ ROOTCELL_SUBNET_POOL_END=192.168.254.0 # FIREWALL_IP=192.168.109.10 # AGENT_IP=192.168.109.11 # NETWORK_PREFIX=24 + +# Browser spy is opt-in. When enabled, run `./rootcell provision` so the +# firewall service and generated /etc/agent-vm/spy.env are updated. +ROOTCELL_SPY_ENABLED=false +# ROOTCELL_SPY_RETENTION_DAYS=7 +# ROOTCELL_SPY_MAX_BYTES=6442450944 +# ROOTCELL_SPY_SPOOL_MAX_BYTES=1073741824 +# ROOTCELL_SPY_STORE_RAW=false +# ROOTCELL_SPY_BIND=127.0.0.1 +# ROOTCELL_SPY_PORT=6174 diff --git a/README.md b/README.md index 8ea1bd9..0beedc5 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,8 @@ rootcell gives a coding agent disposable NixOS VMs where it can use root without touching your host filesystem. All outbound traffic passes through a separate firewall VM with DNS, HTTPS, and SSH allowlists. HTTPS is routed through a transparent decrypting proxy, so rootcell can enforce host policy and -`./rootcell spy` can show formatted Bedrock Runtime traffic when you need to see -what the agent is sending. +`./rootcell spy` can open a local browser view of captured Bedrock Runtime +traffic when you need to see what the agent is sending. rootcell is provider-backed: the same agent/firewall model can run locally on macOS with Lima or remotely in AWS EC2. @@ -222,9 +222,7 @@ state root. ./rootcell list # list rootcell VMs and their current state ./rootcell stop --instance dev # stop the dev instance VMs ./rootcell remove --instance dev # stop dev and delete its provider VM state -./rootcell spy # tail formatted Bedrock Runtime traffic -./rootcell spy --raw # include sanitized raw JSON bodies too -./rootcell spy --tui # browse Bedrock Runtime traffic interactively +./rootcell spy # open the browser spy through a local SSH tunnel ./rootcell -i aws-dev --init-env aws-ec2 # initialize a provider-specific instance .env ./rootcell -i local --init-env macos-lima # initialize an explicit local Lima .env @@ -234,6 +232,9 @@ state root. ./rootcell --instance dev allow # reload only the dev instance allowlists ``` +Detailed browser spy operator and developer notes live in +[src/spy/README.md](src/spy/README.md). + ## Allowing Network Access Network policy is per instance. On first run, `./rootcell` copies each tracked @@ -379,8 +380,8 @@ secrets.env.defaults seed provider-qualified secret mappings for per-instanc instances/ per-instance state, allowlists, CA, SSH keys, and generated files proxy/ allowlists and mitmproxy/dnsmasq firewall code - agent_spy.py Bedrock Runtime formatter for `./rootcell spy` - agent_spy_tui.py Textual browser for `./rootcell spy --tui` + agent_spy.py Bedrock Runtime spool shim for the browser spy +src/spy/ browser spy service, Bedrock adapter, React UI, and docs pi/agent/ global pi instructions, skills, and extensions ``` @@ -524,12 +525,10 @@ the checked-in defaults. ## Troubleshooting -See formatted Bedrock Runtime requests and responses: +Open the browser spy for captured Bedrock Runtime requests and responses: ```bash ./rootcell spy -./rootcell spy --raw -./rootcell spy --tui ``` Check that firewall services are listening: diff --git a/SPY_PLAN.md b/SPY_PLAN.md new file mode 100644 index 0000000..bee1bc8 --- /dev/null +++ b/SPY_PLAN.md @@ -0,0 +1,964 @@ +# Rootcell Browser Spy Plan + +## Goal + +Replace the current `rootcell spy --tui` / terminal spy workflow with an opt-in, +firewall-hosted browser system for inspecting LLM provider traffic. + +The system should make it easy to answer: + +- What did the harness send to the model? +- How much visible context was included with a short request? +- Which sections are system context, tools, history, current user input, + assistant output, thinking/reasoning, cache markers, and provider metadata? +- What did the provider report for input, output, cache read, and cache write + token usage? +- How did this request differ from the previous comparable request? +- What happened on the exact network request/response when deeper inspection is + needed? + +The first implementation targets Pi.dev using Amazon Bedrock. The architecture +must make future direct Anthropic, OpenAI, Claude Code, Codex, Cursor, and +multi-conversation support straightforward, but those are not v1 scope. + +## Decisions + +- `./rootcell spy` launches the browser UI. It no longer tails the terminal. +- Remove the old Textual TUI and old NDJSON spy format. There is no migration or + compatibility requirement. +- The spy system is opt-in. `ROOTCELL_SPY_ENABLED=false` is the default. +- When enabled, capture is always-on even when the browser is closed. +- Capture and the web service run on the firewall VM. +- The browser reaches the service through a rootcell-managed SSH local port + forward. No public web port is exposed. +- The browser UI is a local operator tool, not a hardened multi-user web app. + No auth system, no collaboration, and no public exposure in v1. +- Only LLM-provider traffic is captured. Non-provider HTTPS traffic must never be + spooled or persisted. +- Spy does not broaden network access. Bedrock/provider endpoints still must be + allowed through the normal DNS/HTTPS allowlists for the harness to use them. +- Python remains only as a minimal mitmproxy shim. The main spy system is + TypeScript under `src/spy`. +- The Python shim provider-gates and redacts auth headers/query credentials, + then writes bounded raw provider events to a spool. +- TypeScript owns validation, provider normalization, persistence, retention, + API, SSE, and UI serving. +- SQLite is the persistent store. +- V1 stores normalized semantic content by default. Exact raw payload storage is + optional via `ROOTCELL_SPY_STORE_RAW=false`. +- Request/response bodies that pass provider gating are sensitive and may + contain secrets. Do not attempt body secret redaction beyond binary/media + summarization. +- Desktop-only UI. Do not spend v1 scope on mobile support. +- No keyboard shortcut requirement. Design the browser UX on its own terms, not + as a TUI clone. +- Token counting for highlighted text and per-block token estimates are v1.5, + not v1. +- Automated compaction detection is v1.5, not v1. +- Broader charts/visual regression screenshots are v1.5 or later. + +## Architecture + +### Firewall Components + +Provision these components on every firewall VM, even when spy is disabled: + +- Minimal Python mitmproxy shim, replacing `proxy/agent_spy.py` and + `proxy/agent_spy_tui.py`. +- TypeScript spy service under `src/spy`. +- Built static React app served by the TypeScript service. +- Persistent directories: + - SQLite store: `/var/lib/rootcell-spy/spy.sqlite` + - transient spool: `/var/spool/rootcell-spy/` + - generated config: `/etc/agent-vm/spy.env` + +When `ROOTCELL_SPY_ENABLED=false`, the service should be stopped/disabled and +the shim must not write spool files. Existing spy data is preserved until the +user enables spy and retention runs, or explicitly clears it. + +### Python Shim + +The shim must be tiny and traffic-safe: + +- Check generated spy config/marker before doing any capture work. +- Detect only registered LLM provider candidates, starting with Bedrock Runtime + host/path patterns. +- Redact auth headers and credential query parameters. +- Emit separate spool events for request, response metadata/body, error, and + stream/chunk observations when available. +- Do not reassemble streams in Python. If mitmproxy exposes safe chunk hooks, + write one sanitized spool event per observed chunk. Otherwise write the + provider response body and let TypeScript decode logical stream frames. +- Enforce `ROOTCELL_SPY_SPOOL_MAX_BYTES` before appending. +- Never open SQLite. +- Never perform deep provider normalization. +- Never alter allow/deny decisions or block agent traffic if capture fails. + +If the spool is full, the shim should stop writing new capture payloads and, if +there is room, write a small rate-limited dropped-capture marker. Agent traffic +must continue under the existing firewall policy. + +### TypeScript Service + +Use Bun and TypeScript. Prefer Bun built-ins and pure JS/TS dependencies. Avoid +native npm packages unless they are explicitly target-built by Nix for the +firewall architecture. + +The service responsibilities: + +- Validate spool events with Zod. +- Ingest and delete spool files after successful commit. +- Decode Bedrock payloads and AWS event-stream frames. +- Pair request/response events by flow id. +- Persist provider calls, normalized blocks, stream events, raw payloads when + enabled, health counters, and service metadata. +- Run SQLite migrations on startup. +- Enforce retention by age and size while running. +- Serve a same-origin JSON API, SSE endpoint, and built React assets. +- Expose health/status data for capture and service state. + +Use Bun's native HTTP server for v1 unless routing becomes painful. Use Bun's +SQLite support if available in the pinned guest Bun; otherwise choose a +Nix-provisioned, target-native SQLite option. + +### Build And Delivery + +Static frontend assets may be built on the host because HTML, CSS, and browser +JavaScript bundles are architecture-neutral. + +The TypeScript service runtime and dependencies must be target-native on the +firewall VM: + +- Do not copy host `node_modules` into the firewall VM. +- Do not rely on macOS-built native npm artifacts. +- Avoid native npm dependencies in `src/spy` where feasible. +- If a native dependency becomes necessary, build/provision it through Nix for + the firewall target architecture. +- The firewall should not download npm packages or CDN assets at runtime. + +### Browser UI + +Use React + TypeScript + Vite, Tailwind, and local shadcn/ui-style components. +Vendor only the needed shadcn components. Do not depend on CDN assets, remote +fonts, or runtime package downloads. + +The first screen should be a live conversation-analysis surface: + +- Default load mode is "live from now"; do not auto-load historical events. +- `./rootcell spy` passes a viewer launch timestamp so the UI starts clean. +- Historical loading is explicit through time range controls such as last + 10 minutes, last hour, today, and custom range. +- Timeline rows are provider calls styled as conversation events. +- Each request/response pair is directly selectable. +- The right-side inspector is call-native: it shows exactly one provider call. +- The inspector includes request details, response details, network metadata, + headers, usage, cache markers, stream events on demand, and diff against the + previous comparable request. + +Performance requirements: + +- Virtualize the live timeline. +- Fetch summaries first and details on demand. +- Paginate historical queries. +- Keep stream events and raw payload details collapsed and loaded only on + request. +- Use semantic highlighting instead of editor-style highlighting as the primary + visual language. +- Avoid rendering giant JSON/code blocks into the DOM. + +Semantic highlighting should distinguish: + +- provider/request envelope +- harness/system context +- user-visible messages +- prior conversation history +- current user input +- assistant output +- thinking/reasoning +- tool definitions +- tool calls and tool results +- cache markers +- media summaries +- unknown/unclassified content + +JSON/code highlighting inside raw detail panels is secondary and should only be +used when cheap and bounded. + +## Configuration + +Seed these settings into `.env.defaults` as explicit defaults/comments: + +```sh +ROOTCELL_SPY_ENABLED=false +# ROOTCELL_SPY_RETENTION_DAYS=7 +# ROOTCELL_SPY_MAX_BYTES=6442450944 +# ROOTCELL_SPY_SPOOL_MAX_BYTES=1073741824 +# ROOTCELL_SPY_STORE_RAW=false +# ROOTCELL_SPY_BIND=127.0.0.1 +# ROOTCELL_SPY_PORT=6174 +``` + +Defaults: + +- Spy disabled unless `ROOTCELL_SPY_ENABLED=true`. +- Retain for 7 days. +- Total spy store budget: 6 GiB. +- Spool budget: 1 GiB. +- Raw exact payload storage disabled. +- Firewall service binds `127.0.0.1:6174`. + +`./rootcell spy` should choose host-local port `6174` when available and fall +back to another available local port if needed. The SSH tunnel forwards the +host-local port to `127.0.0.1:6174` on the firewall VM. + +## CLI And Provisioning + +### `rootcell provision` + +Always provision the spy service files, UI assets, directories, config template, +and systemd units. + +When enabled: + +- Render `/etc/agent-vm/spy.env`. +- Enable/start the TypeScript spy service. +- Enable shim writes through generated config/marker. +- Preserve existing spy data. + +When disabled: + +- Render config with spy disabled. +- Stop/disable the TypeScript spy service. +- Ensure the shim returns without spooling. +- Preserve existing spy data. + +Do not implement migration/remediation for existing small disks. There are no +existing users to support. + +### `rootcell spy` + +Required behavior: + +- If spy is disabled, print clear instructions to set + `ROOTCELL_SPY_ENABLED=true` in the selected instance `.env` and run + `./rootcell provision`. +- If service files/assets are missing or stale, tell the user to run + `./rootcell provision`; do not auto-provision. +- Ensure the firewall VM and service are reachable. +- Start an SSH local port forward through the provider abstraction. +- Print the local URL. +- Open the browser by default, with `--no-open` available. +- Stay in the foreground to keep the tunnel alive. +- Exit on Ctrl-C, closing only the tunnel. + +Remove `--tui`, `--raw`, and `--no-dedupe` from the user-facing CLI. + +Implement host-side launcher and tunnel lifecycle in TypeScript using provider +and transport abstractions. Avoid POSIX shell assumptions because Windows host +support is a future goal. + +## Data Model + +Use Zod schemas and SQLite migrations checked into `src/spy`. + +V1 durable unit: + +- `provider_call` + +Attached records: + +- request event metadata +- response event metadata +- normalized request blocks +- normalized response blocks +- stream events decoded from provider payloads +- usage records reported by the provider +- optional raw sanitized payload records +- content hashes for repeated-context and diffing +- health/drop/error counters + +Reserve future grouping concepts such as `turn_id` or conversation grouping, but +do not build turn grouping behavior in v1. + +Normalized blocks should preserve: + +- original order +- role/type +- source/provenance +- provider payload location when possible +- character and byte size +- content hash +- cache marker metadata +- media summaries instead of full binary/media bytes + +Do not compute or display per-block token estimates in v1. + +## API Shape + +Endpoint names are provisional, but v1 should expose these boundaries: + +- `GET /api/health` +- `GET /api/calls?since=&provider=&model_id=&operation=&status=&cursor=&limit=` +- `GET /api/calls/:id` +- `GET /api/calls/:id/diff` +- `GET /api/calls/:id/stream-events` +- `GET /api/search?q=&since=&provider=&model_id=&operation=&status=&cursor=&limit=` +- `POST /api/clear` +- `GET /api/events` for SSE + +Use SSE for small live notifications such as new/updated call summaries and +health changes. Use normal paginated/detail endpoints for content. + +Do not enable broad CORS. V1 does not need a per-launch access token, auth +system, CSP/security-header hardening, or public web exposure. + +`POST /api/clear` should: + +- Take an ingestion lock. +- Stop ingestion briefly. +- Delete captured call data and pending spool files. +- Reset relevant capture counters. +- Store a clear baseline timestamp/generation. +- Resume ingestion. +- Keep schema/migration metadata. + +## Persistence And Retention + +SQLite is the source of truth. Spool files are transient and sensitive. + +Retention runs inside the TypeScript service only: + +- Run on startup and periodically while the service is running. +- Enforce age and size caps. +- Delete oldest `provider_call` rows first. +- Cascade delete related normalized blocks, stream events, and raw payloads. +- Enforce spool cleanup after successful ingestion. + +No separate systemd retention timer. + +If the TypeScript service is stopped, the Python shim can only fill the bounded +spool, then it must stop writing. + +## Provider And Harness Layers + +Keep two independent extension layers: + +- Provider adapters answer "what happened on the wire?" +- Harness analyzers answer "what does this mean for Pi/Codex/Claude Code/etc?" + +V1 provider: + +- Amazon Bedrock only. +- Decode Bedrock Runtime request/response shapes. +- Decode AWS event-stream frames in TypeScript. +- Extract provider-reported usage, status, metrics, stop reasons, text, + thinking/reasoning, tool deltas, and cache markers. + +V1 harness: + +- Pi.dev only. +- Use empirical fixtures to classify obvious Pi-added context and provenance. +- Fall back to generic Bedrock roles and `unknown` when unsure. + +Do not introduce LiteLLM or any translation proxy. The point of spy is to +observe real harness/provider behavior, not normalize traffic through another +provider abstraction. + +## UI V1 Scope + +V1 includes: + +- Desktop-only browser UI. +- Live-from-now default timeline. +- Explicit historical time range loading. +- Search and filtering by time, provider/model, event type, and normalized text. +- Provider call summaries with status, duration, model, operation, and + provider-reported usage totals. +- Request composition summary using exact structural measures: + - section presence + - message count + - character/byte size by section + - tool count and tool schema size + - cache markers + - media summaries + - provider-reported total input/output/cache usage when available +- Clear visual distinction between short current user input and large repeated + system/tool/history context. +- Repeated/new/changed cues based on block content hashes compared to the + previous comparable request. +- Cache markers clearly visible in timeline summaries and inspector details. +- Call-native right inspector. +- Diff against previous comparable request. +- On-demand stream event section. +- Network metadata and headers. +- Raw payload panels only when raw storage is enabled; otherwise show that raw + storage was disabled. +- Health/settings area showing enabled state, DB size, spool size, caps, + retention days, dropped capture count, last ingest time, and service version. +- Manual "clear spy data" action with confirmation. + +V1 excludes: + +- Automated compaction detection. +- Highlighted text token counting. +- Local token estimates. +- Exact provider token-count calls. +- Broad charts/dashboards. +- Mobile support. +- Keyboard shortcut requirements. +- Annotations/bookmarks/labels. +- Import/export. +- Multi-instance aggregation. +- Multi-conversation grouping. +- Public access/auth hardening. + +## Roadmap + +### Completed + +- Captured sanitized real Pi/Bedrock traffic from the existing running + `default` instance using Pi provider `amazon-bedrock` and model + `us.anthropic.claude-sonnet-4-6`. +- Added `src/spy/fixtures/bedrock-pi-us-sonnet-4-6.ndjson` with real + request/response pairs for simple streaming, two-turn history, cache markers, + toolUse, toolResult, and provider-reported usage. +- Added initial `src/spy` TypeScript contract: + - Zod spool event, provider call, normalized block, usage, stream event, raw + payload, and diff schemas. + - AWS event-stream decoder with CRC validation. + - V1 SQLite migration helper and initial schema. + - Fixture validation, event-stream decoding, and migration tests. +- Verified `bun run typecheck`, `bun run lint`, `bun run test`, direct + `bun:sqlite` migration execution, and a fixture credential audit. +- Replaced the mitmproxy-facing Python capture path with a minimal Bedrock + spool shim: + - Reads `/etc/agent-vm/spy.env` and captures only when + `ROOTCELL_SPY_ENABLED=true`. + - Provider-gates Bedrock Runtime request, response, and error events. + - Redacts auth headers and credential query parameters before spooling. + - Writes one atomic schema-shaped JSON file per event under + `/var/spool/rootcell-spy`. + - Enforces `ROOTCELL_SPY_SPOOL_MAX_BYTES` and emits rate-limited dropped + markers when the spool is full. + - Stores AWS event-stream responses as base64 with + `body_encoding=aws-eventstream` for TypeScript decoding. + - Added firewall group/tmpfiles/systemd sandbox permissions so mitmproxy can + write the sensitive spool path. + - Added Python unit coverage for disabled/default behavior, config parsing, + Bedrock detection, redaction, event-stream response encoding, provider-gated + errors, spool cap behavior, dropped markers, and failure swallowing. +- Implemented the TypeScript Bedrock adapter on top of the captured fixtures: + - Added `src/spy/bedrock.ts` with `normalizeBedrockCall` and + `normalizeBedrockSpoolEvents` entrypoints. + - Normalizes paired Bedrock request/response spool events into provider + calls, semantic request/response blocks, provider-reported usage records, + decoded stream events, and opt-in raw payload records. + - Supports the captured Bedrock Converse Stream request shape, AWS + event-stream response decoding, response text reconstruction, tool use + reconstruction, usage extraction, stable IDs, stable content hashes, and + conservative Pi/Bedrock provenance classification. + - Added fixture-backed unit coverage for all five real Pi/Bedrock + request/response pairs, request block classification, response tool/text + extraction, usage extraction, stream events, raw payload gating, and hash + stability. + - Verified `bun run typecheck`, `bun run lint`, and `bun run test`. +- Implemented SQLite persistence, retention, and clear-data for spy capture: + - Added `src/spy/store.ts` with `openSpyStore`, spool batch ingestion, + request persistence, response completion, retention, clear-data, health + snapshots, and close lifecycle. + - Added request-only and response-only Bedrock normalization entrypoints while + preserving the paired fixture normalizer. + - Added typed HTTP event records and a schema v2 migration with + `normalized_block_fts` synchronization triggers. + - Persists pending and completed provider calls, HTTP metadata, normalized + blocks, usage records, stream events, optional raw payloads, dropped/error + counters, and service metadata. + - Defers unmatched response spool files, deletes malformed spool files after + recording counters/metadata, and deletes valid spool files only after + successful commit. + - Converted `src/spy` tests to Bun's native test runner so `bun:sqlite` runs + directly, with the remaining unit tests still running under Vitest. + - Added fixture-backed store coverage for ingestion, pending-to-complete + updates, idempotency, raw payload gating, malformed/drop/error events, + retention with FTS cleanup, and clear-data. + - Verified `bun run typecheck`, `bun run lint`, and `bun run test`. +- Implemented the TypeScript spy web service, API, SSE, and static asset + serving: + - Added `src/spy/service.ts` and `src/bin/spy-service.ts` for the Bun HTTP + service runtime. + - Added environment-backed service config with V1 defaults for bind address, + port, SQLite path, spool path, retention, size caps, raw payload storage, + ingestion cadence, and retention cadence. + - Extended `src/spy/store.ts` with read-side APIs for paginated call + summaries, call details, stream event pages, FTS search, and previous-call + request diffs. + - Implemented same-origin JSON endpoints for health, call list/detail, diff, + stream events, search, and confirmed clear-data. + - Implemented SSE notifications for initial connection, health changes, call + changes, clear-data events, and keepalive comments. + - Added static asset serving with index fallback for browser routes and path + traversal rejection. + - Added fixture-backed Bun coverage for API behavior, pagination, raw payload + gating, clear-data confirmation, SSE updates, static serving, and bad input + handling. + - Verified `bun run typecheck`, `bun run lint`, and `bun run test`. +- Implemented the React desktop spy UI with virtualized timeline and call + inspector: + - Added a Vite + React + TypeScript app under `src/spy/ui` with Tailwind, + local shadcn-style primitives, lucide icons, and TanStack virtualization. + - Added UI package scripts for dev, build, unit tests, and Playwright e2e + tests, plus TSX-aware lint/typecheck wiring and locked frontend + dependencies. + - Built the live-from-now conversation-analysis screen with explicit + historical range controls, search, status/model/block-kind filters, + virtualized provider-call timeline rows, SSE refresh, and call selection. + - Built the call-native inspector with request/response block rendering, + semantic highlighting, composition summaries, provider usage, request diff, + network metadata and headers, on-demand stream event loading, raw payload + availability, health/settings data, and confirmed clear-data. + - Added a fixture-backed UI test server and Playwright coverage for app load, + SSE live updates, call selection, inspector sections, historical loading, + search, stream events on demand, and clear-data confirmation. + - Reduced the service SSE keepalive interval so long-lived browser event + streams stay open under Bun's default idle timeout. + - Verified `bun run typecheck`, `bun run lint`, `bun run test`, + `bun run test:spy-ui:unit`, `bun run build:spy-ui`, and + `bun run test:spy-ui:e2e`. +- Wired browser spy provisioning and launcher: + - Added host build scripts for the bundled Bun spy service and static React UI + artifacts. + - Added the firewall `rootcell-spy.service`, persistent store/spool + directories, generated `/etc/agent-vm/spy.env`, and provision-time + enable/start or disable/stop behavior. + - Added a systemd generator so the firewall service is wanted only when the + generated spy env enables it, while the Nix unit remains installed in every + firewall VM. + - Replaced the old `rootcell spy` terminal/TUI launcher with a browser + launcher using an SSH local port forward and `--no-open`. + - Removed the old user-facing `rootcell spy --tui`, `--raw`, and + `--no-dedupe` CLI options from argument parsing. + - Added provider/transport local port forwarding support shared by Lima and + AWS EC2. + - Verified `bun run typecheck`, `bun run lint`, `bun run build:spy`, + `bun run test`, and targeted firewall Nix service evaluation. + - Ran live provider integration against the already provisioned `default` + Lima VMs: enabled spy provisioning, confirmed firewall service health, + verified host-local tunnel fallback to port 6175, checked agent traffic + still routes through the firewall allowlist, closed the tunnel, and restored + the instance to `ROOTCELL_SPY_ENABLED=false`. +- Raised firewall disk/root volume defaults to 64 GiB and verified provider + contracts: + - Updated Lima firewall sizing to emit `disk: "64GiB"` while preserving the + agent `60GiB` disk default and existing CPU/RAM defaults. + - Updated the AWS EC2 firewall root EBS default to 64 GiB while preserving the + agent 60 GiB default and explicit root-volume environment overrides. + - Updated provider docs to describe the Lima and AWS firewall/agent disk + defaults and AWS override environment variables. + - Added unit coverage for AWS default Terraform variables, AWS override + behavior, Lima YAML disk rendering, provider docs, and the spy CLI flag + removal regression where `spy --tui --help` must still fail. + - Rebased onto `origin/main`, retained the merged Lima control-path restart + fix, and raised generated ProxyJump SSH `ConnectTimeout` to 15 seconds after + repeated integration failures showed Lima's vsock-backed SSH endpoint could + accept a connection but miss the previous 5 second banner deadline. + - Verified `bun run typecheck`, `bun run lint`, `bun run test`, + `bun run test:integration`, and `bun run test:integration:clean`. + - Confirmed fresh Lima integration VM sizing with `limactl list`: + `firewall-test` uses `64GiB` and `agent-test` remains `60GiB`. +- Removed the old terminal/TUI spy implementation: + - Trimmed `proxy/agent_spy.py` to the mitmproxy-facing Bedrock spool shim. + - Deleted the Textual TUI module, terminal tail workflow, and Python tests for + that legacy path. + - Stopped provisioning `agent_spy_tui.py` and the legacy + `/run/agent-vm-spy` NDJSON directory. + - Updated host provisioning and integration assertions so `agent_spy.py` is + treated as a shipped shim, not an executable terminal tool. + - Updated main/proxy docs so `./rootcell spy` is only the browser launcher + over the SSH tunnel, with `--no-open` as the supported non-default flag. + - Added regression coverage that rejects the removed `--tui`, `--raw`, and + `--no-dedupe` paths and checks the firewall module no longer references the + removed TUI shim or runtime directory. + - Verified `python3 -m compileall proxy`, Python unit discovery, + `bun run typecheck`, `bun run lint`, `bun run test`, cleanup `rg` checks, + and the full `bun run test:integration` suite. +- Documented the browser spy: + - Added `src/spy/README.md` covering enablement, launch flow, data locations, + retention settings, clear-data behavior, privacy/security implications, + troubleshooting, API shape, provider/harness organization, and build/test + commands. + - Added brief links from the main README and proxy README. + - Verified `git diff --check`, stale legacy spy wording checks, and + `bun run lint`. +- Added runtime validation for browser API and SSE payloads: + - Added `src/spy/api-contracts.ts` as a browser-safe shared Zod contract + module for health, call pages, details, diffs, stream-event pages, + clear-data results, and SSE event payloads. + - Rewired the React UI types and API client to infer from shared schemas and + parse every successful JSON response instead of trusting generic + `fetchJson()` casts. + - Replaced unchecked browser SSE parsing with named event payload validation + for `hello`, `health`, `calls-changed`, and `cleared`, surfacing malformed + event data as a concise UI error. + - Updated service tests to validate real endpoint and SSE output through the + shared contracts, and added UI API tests for invalid response payloads and + malformed SSE payloads. + - Verified `bun run typecheck`, `bun run lint`, `bun run test` with localhost + bind permission, `bun run test:spy-ui:unit`, and `git diff --check`. +- Completed the V1 request composition summary: + - Added shared `SpyRequestComposition` API contracts and included + `requestComposition` in `GET /api/calls/:id` detail responses. + - Computes request-only structural measures from persisted normalized blocks + and provider usage in TypeScript: total blocks/messages/chars/bytes, + per-section presence/counts/sizes, tool schema size, cache marker summary, + media summary, and provider-reported usage. + - Replaced the old mixed request/response block summary with a request-native + browser panel while keeping request and response block lists unchanged. + - Added fixture-backed store/service/UI/e2e coverage for simple prompts, + history, toolUse/toolResult, cache markers, raw-disabled, and raw-enabled + cases. + - Verified `bun run typecheck`, `bun run lint`, `bun run build:spy`, + `bun test src/spy --timeout 10000`, `bun run test`, + `bun run test:spy-ui:unit`, and `bun run test:spy-ui:e2e` with localhost + bind/browser permissions where required. +- Completed the V1 health/settings surface: + - Added explicit `service.enabled`, `store.droppedCaptureCount`, and + `store.lastIngestAt` fields to `/api/health` and SSE health payloads. + - Kept diagnostic counters and metadata available while moving required UI + state off metadata inference and onto typed API fields. + - Updated the browser health panel to show enabled state, DB size, spool size, + store/spool caps, retention days, dropped capture count, and last ingest + time without adding service/API versioning. + - Added store, service, shared-contract, UI API, and Playwright coverage for + the required health fields. + - Verified `bun run typecheck`, `bun run lint`, `bun run build:spy`, + `bun test src/spy --timeout 10000`, `bun run test:spy-ui:unit`, and + `bun run test:spy-ui:e2e` with localhost bind/browser permissions where + required. +- Completed V1 timeline filtering: + - Locked the V1 event-type definition to provider-call operation + (`provider_call.operation`), leaving stream event types as inspector detail. + - Extended `/api/calls` and `/api/search` to apply time, provider, model, + operation, status, and normalized-text filters before pagination. + - Updated the browser timeline controls to send provider/model/operation/status + filters to the API instead of post-filtering paged results locally. + - Added store, service, UI API, and Playwright coverage for filtered calls, + filtered search, invalid provider/status query values, and search constrained + by the active operation filter. + - Verified `bun run typecheck`, `bun run lint`, `bun run build:spy`, + `bun run test`, `bun run test:spy-ui:unit`, `bun run test:spy-ui:e2e`, and + `git diff --check` with localhost bind/browser permissions where required. +- Completed V1 completion validation on 2026-05-23: + - Ran the full V1 baseline: `bun run typecheck`, `bun run lint`, + `python3 -m unittest discover -s proxy -p 'test_*.py'`, + `bun run build:spy`, `bun run test`, `bun run test:spy-ui:unit`, + `bun run test:spy-ui:e2e`, `bun run test:integration`, and + `bun run test:integration:clean`. + - Localhost-bound tests required the normal localhost/browser permissions; no + product, packaging, or test defects were found in the baseline. + - Ran the live smoke against the `default` Lima instance: enabled spy, + provisioned, launched `./rootcell spy --no-open`, verified `/api/health`, + captured real Pi/Bedrock `converse-stream` calls, inspected API detail, + diff, stream events, browser timeline/detail, and health, then cleared + data. + - Fixed one live-smoke UI defect where a selected pending call detail could + remain stale after SSE updated the call summary to complete. The browser now + refetches selected detail when the selected summary status/content changes. + - Rebuilt and reprovisioned the patched spy UI, then confirmed the browser + inspector updated without reselecting the row and showed completed duration, + usage, request composition, repeated/changed diff labels, response blocks, + and health data. + - Restored `ROOTCELL_SPY_ENABLED=false`, reprovisioned, confirmed + `./rootcell spy --no-open` refuses to launch while disabled, the + `rootcell-spy.service` is inactive, the SQLite store is preserved, the spool + is empty, and a disabled-state Pi/Bedrock call writes no spool files. + +### V1 + +Build the Bedrock/Pi browser spy: + +- [x] Define spool event schema, normalized provider schema, and SQLite schema. +- [x] Capture sanitized real Pi/Bedrock fixtures to ground the schema and + adapter work. +- [x] Add initial AWS event-stream decoder. +- [x] Replace Python spy with minimal provider-gated spool shim. +- [x] Implement TypeScript Bedrock adapter on top of the captured fixtures. +- [x] Implement SQLite persistence, migrations, retention, and clear-data. +- [x] Implement TS web service, API, SSE, and static asset serving. +- [x] Implement React desktop UI with virtualized timeline and call inspector. +- [x] Wire `rootcell provision`, systemd service config, and `rootcell spy` + launcher/tunnel. +- [x] Remove old user-facing `rootcell spy --tui`, `--raw`, and `--no-dedupe` + CLI flags. +- [x] Raise firewall disk/root volume defaults to 64 GiB. +- [x] Remove old TUI/terminal spy implementation files, tests, and docs. +- [x] Add `src/spy/README.md` and brief links from main/proxy docs. +- [x] Complete the final V1 acceptance pass against the `default` Lima instance. + - Ran the full baseline, live spy smoke, clear-data check, disabled-state + capture check, and restored `ROOTCELL_SPY_ENABLED=false`. + - Fixed the selected-call detail refresh defect found during the live smoke. + +### V1 Review Findings + +Review date: 2026-05-23. + +The implementation history above is complete, and the V1-specific validation +commands pass when local listener permissions are available. All V1 review +findings are complete: + +- [x] Add runtime validation for browser API and SSE payloads. + - Added shared Zod schemas for health, call pages, details, diffs, + stream-event pages, clear-data results, and SSE event payloads. + - Replaced unchecked client parsing such as generic `fetchJson()` casts and + `JSON.parse(event.data) as SpyServiceHealth` with schema-backed parsing. + - Kept browser-facing parsed values typed from the shared schema module + instead of importing server-only store/service types into the UI. + - Added UI/API unit coverage for invalid response payloads and malformed SSE + payloads, plus service coverage that parses real API/SSE output through the + shared contracts. +- [x] Complete the health/settings surface required by V1. + - Added explicit `service.enabled`, `store.droppedCaptureCount`, and + `store.lastIngestAt` fields to `/api/health` and SSE health payloads. + - Kept counters and metadata for diagnostics while making the UI use explicit + health fields instead of inferring V1 status from partial metadata. + - Updated the browser health panel to show enabled state, DB size, spool size, + store/spool caps, retention days, dropped capture count, and last ingest + time. + - Added store, service, UI API, and Playwright coverage that fails when + required health fields are absent. +- [x] Complete V1 timeline filtering. + - V1 requires filtering by time, provider/model, event type, and normalized + text. + - V1 event type means the provider-call operation stored as + `provider_call.operation`; stream event types remain inspector detail, not + timeline filters. + - `/api/calls` and `/api/search` apply time, provider, model, operation, + status, and normalized-text filters consistently before pagination. + - The browser timeline exposes provider, operation, status, model, time range, + and normalized-text filters without client-side post-filtering of paged + results. +- [x] Expand the request composition summary to the exact V1 structural + measures. + - Added shared request composition Zod schemas and types, then exposed + `requestComposition` on call detail responses. + - Computes the exact V1 request measures from normalized blocks and usage in + the TypeScript store instead of deriving them ad hoc in React. + - The UI now shows a request-only composition panel with section presence, + message count, character/byte size by section, tool schema size, cache + marker summary, media summary, and provider-reported usage. + - Added fixture-backed tests for simple prompts, history, toolUse/toolResult, + cache markers, raw-disabled, raw-enabled, API contract validation, and the + browser composition surface. + +### V1 Bug Archive + +The completed V1 manual browser QA findings, prioritized bug handoff, evidence +notes, and follow-up verification baseline were moved to +`docs/bugfix/V1_bugs.md` so this plan can focus on V1.5 development. + +### V1.5 + +Add analysis depth: + +- Exact/estimated token counting for highlighted text, blocks, sections, and + whole requests. +- Provider-routed token-count backend; browser never calls LLM providers + directly. +- Per-block token provenance: `provider_reported`, `provider_counted`, + `estimated`, or `unavailable`. +- Automated compaction candidate detection: + - Pi-specific request patterns from fixtures. + - Generic fallback heuristics. + - Labels that distinguish Pi-specific candidates from heuristic candidates. +- Dedicated compaction investigation view. +- Visual regression/screenshot checks. + +### V2 + +Broaden scope: + +- Direct Anthropic provider adapter. +- OpenAI provider adapter. +- Additional harness analyzers for Claude Code, Codex, Cursor, and others. +- Multiple simultaneous conversation grouping. +- Rich token/time/cache charts and dashboards. +- Export/archive workflows if real use shows demand. +- Stronger auth/security model only if any non-local exposure is introduced. + +## Capacity Defaults + +Completed firewall disk defaults: + +- Lima firewall disk: 64 GiB. +- AWS firewall root volume: 64 GiB. +- Keep agent disk default unchanged. + +Current CPU/RAM defaults remain unchanged. Existing instances are not migrated +or resized automatically. + +Keep validating CPU/RAM with fixtures and live captures, then raise them only if +the service, SQLite ingestion, or UI serving needs more headroom. Disk is cheap; +do not optimize the service around artificially tiny capacity. + +## Security And Privacy + +This feature persists decrypted LLM prompts and responses. + +Security model: + +- Disabled by default. +- SSH tunnel only. +- No public web listener. +- Service binds firewall-local `127.0.0.1`. +- Header/query credential redaction is mandatory. +- No body secret redaction. +- Binary/media payloads summarized by default. +- Raw exact payload storage disabled by default. +- Spool is sensitive even when raw storage is disabled. +- Retention and manual clear are required. +- Capture failure must not affect agent traffic. +- EBS encryption / disk-at-rest posture should be reviewed for AWS, but v1 does + not add a separate application-level encryption layer. + +The docs must clearly state that if a prompt contains a secret, the spy store may +contain that secret until retention or manual clear removes it. + +## Failure Modes + +Define user-visible behavior and recovery for: + +- Spy disabled. +- Service not provisioned or stale. +- SSH tunnel failure. +- TypeScript service down. +- SQLite locked/corrupt. +- Spool full. +- Store retention limit reached. +- Retention cleanup failure. +- Mitmproxy shim error. +- Bedrock adapter parse failure. +- SSE disconnect. +- Dropped capture events. + +Hard invariant: spy failures must not block, slow significantly, or change +network traffic allow/deny decisions. + +## Testing + +V1 tests: + +- Python shim unit tests: + - enabled/disabled gating + - Bedrock provider candidate detection + - auth header/query redaction + - spool cap behavior + - no-write behavior when disabled/full +- TypeScript unit tests: + - Zod schema validation + - Bedrock request/response normalization + - AWS event-stream decoding + - usage/cache marker extraction + - repeated/new/changed hash classification + - Pi provenance classification from fixtures +- SQLite tests: + - migrations + - ingest idempotence/retry behavior + - request/response pairing + - retention by age and size + - clear-data baseline + - cascade deletes +- API tests: + - health + - pagination + - detail loading + - diff endpoint + - stream events endpoint + - search + - clear + - SSE notifications +- Playwright functional tests without screenshot baselines: + - app loads in local fixture mode + - receives SSE update + - selects a call + - opens inspector sections + - loads historical range + - searches + - loads stream events on demand + - clears data +- Integration tests: + - full provider contract flow with Lima user-v2 VMs + - clean provisioning cycle from deleted integration VMs and network state + - rootcell-managed VM stop/start restart path + - Lima control-path availability after VM restarts + - host SSH to firewall and proxied agent aliases + - firewall service and spy asset provisioning checks + - DNS, HTTPS, request-regex, and SSH policy enforcement + - CLI smoke test against a fresh named instance + - Lima firewall disk default `64GiB` while agent remains `60GiB` + - AWS Terraform variables render firewall root volume `64` by default + - AWS root-volume environment overrides still win + +Fixture strategy: + +- Initial sanitized real Pi/Bedrock fixture capture is complete for + `us.anthropic.claude-sonnet-4-6`. +- Add handcrafted minimal fixtures only as supplements for targeted edge cases. +- Add more sanitized real captures as the Bedrock adapter, shim, and UI expose + concrete gaps. +- Cover normal calls, streaming, tool calls/results, cache markers, large + history, error responses, disabled capture, raw disabled, and raw enabled. + +Completed validation for the 64 GiB firewall default: + +- `bun run typecheck` +- `bun run lint` +- `bun run test` +- `bun run test:integration` +- `bun run test:integration:clean` + +## Documentation + +The detailed operator/developer doc is `src/spy/README.md`. + +It is briefly referenced from `README.md` and `proxy/README.md`. + +The doc covers: + +- Enabling spy in the instance `.env`. +- Running `./rootcell provision`. +- Launching `./rootcell spy`. +- Data locations. +- Retention settings. +- Disk sizing defaults. +- Privacy/security implications. +- Clear-data behavior. +- Service health and troubleshooting. +- Removed TUI/terminal flags. +- How provider and harness adapters are organized. + +## Non-Goals + +- No LiteLLM or request translation proxy. +- No old TUI or old NDJSON compatibility. +- No public web exposure. +- No auth system in v1. +- No multi-provider support in v1. +- No multi-instance UI in v1. +- No multi-conversation grouping in v1. +- No automated compaction detection in v1. +- No highlighted-text token counting in v1. +- No local token estimates in v1. +- No mobile optimization. +- No keyboard shortcut parity with the TUI. +- No annotations/bookmarks/labels. +- No import/export. +- No in-UI settings editing. +- No body secret redaction beyond auth headers/query credentials. + +## Post-V1 Technical Validations + +No V1-specific open questions or investigations remain. + +- Continue measuring firewall CPU/RAM under larger Bedrock/Pi captures before + changing CPU/RAM defaults. +- Revisit true mitmproxy chunk arrival timing only if a later analysis view + needs wall-clock stream timing beyond decoded logical stream events. diff --git a/bun.lock b/bun.lock index 39df676..a71fbbd 100644 --- a/bun.lock +++ b/bun.lock @@ -10,14 +10,25 @@ "@aws-sdk/client-secrets-manager": "^3.1050.0", "@aws-sdk/client-sts": "^3.1050.0", "@aws-sdk/credential-providers": "^3.1050.0", + "@tanstack/react-virtual": "^3.13.25", + "clsx": "^2.1.1", + "lucide-react": "^1.16.0", + "react": "^19.2.6", + "react-dom": "^19.2.6", + "tailwind-merge": "^3.6.0", "yargs": "18.0.0", "zod": "^4.4.3", }, "devDependencies": { "@eslint/js": "10.0.1", + "@playwright/test": "^1.60.0", + "@tailwindcss/vite": "^4.3.0", "@types/bun": "1.3.14", "@types/node": "25.7.0", + "@types/react": "^19.2.15", + "@types/react-dom": "^19.2.3", "@types/yargs": "17.0.35", + "@vitejs/plugin-react": "^6.0.2", "eslint": "10.3.0", "jiti": "2.7.0", "typescript": "5.9.3", @@ -135,14 +146,24 @@ "@humanwhocodes/retry": ["@humanwhocodes/retry@0.4.3", "", {}, "sha512-bV0Tgo9K4hfPCek+aMAn81RppFKv2ySDQeMoSZuvTASywNTnVJCArCZE2FWqpvIatKu7VMRLWlR1EazvVhDyhQ=="], + "@jridgewell/gen-mapping": ["@jridgewell/gen-mapping@0.3.13", "", { "dependencies": { "@jridgewell/sourcemap-codec": "^1.5.0", "@jridgewell/trace-mapping": "^0.3.24" } }, "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA=="], + + "@jridgewell/remapping": ["@jridgewell/remapping@2.3.5", "", { "dependencies": { "@jridgewell/gen-mapping": "^0.3.5", "@jridgewell/trace-mapping": "^0.3.24" } }, "sha512-LI9u/+laYG4Ds1TDKSJW2YPrIlcVYOwi2fUC6xB43lueCjgxV4lffOCZCtYFiH6TNOX+tQKXx97T4IKHbhyHEQ=="], + + "@jridgewell/resolve-uri": ["@jridgewell/resolve-uri@3.1.2", "", {}, "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw=="], + "@jridgewell/sourcemap-codec": ["@jridgewell/sourcemap-codec@1.5.5", "", {}, "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og=="], + "@jridgewell/trace-mapping": ["@jridgewell/trace-mapping@0.3.31", "", { "dependencies": { "@jridgewell/resolve-uri": "^3.1.0", "@jridgewell/sourcemap-codec": "^1.4.14" } }, "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw=="], + "@napi-rs/wasm-runtime": ["@napi-rs/wasm-runtime@1.1.4", "", { "dependencies": { "@tybys/wasm-util": "^0.10.1" }, "peerDependencies": { "@emnapi/core": "^1.7.1", "@emnapi/runtime": "^1.7.1" } }, "sha512-3NQNNgA1YSlJb/kMH1ildASP9HW7/7kYnRI2szWJaofaS1hWmbGI4H+d3+22aGzXXN9IJ+n+GiFVcGipJP18ow=="], "@nodable/entities": ["@nodable/entities@2.1.0", "", {}, "sha512-nyT7T3nbMyBI/lvr6L5TyWbFJAI9FTgVRakNoBqCD+PmID8DzFrrNdLLtHMwMszOtqZa8PAOV24ZqDnQrhQINA=="], "@oxc-project/types": ["@oxc-project/types@0.130.0", "", {}, "sha512-ibD2usx9JRu7f5pu2tMKMI4cpA4NgXJQoYRP4pQ7Pxmn1l6k/53qWtQWZayhYy3X4QZkt90Ot+mJEaeXouio6Q=="], + "@playwright/test": ["@playwright/test@1.60.0", "", { "dependencies": { "playwright": "1.60.0" }, "bin": { "playwright": "cli.js" } }, "sha512-O71yZIbAh/PxDMNGns37GHBIfrVkEVyn+AXyIa5dOTfb4/xNvRWV+Vv/NMbNCtODB/pO7vLlF2OTmMVLhmr7Ag=="], + "@rolldown/binding-android-arm64": ["@rolldown/binding-android-arm64@1.0.1", "", { "os": "android", "cpu": "arm64" }, "sha512-fJI3I0r3C3Oj/zdBCpaCmBRZYf07xpaq4yCfDDoSFm+beWNzbIl26puW8RraUdugoJw/95zerNOn6jasAhzSmg=="], "@rolldown/binding-darwin-arm64": ["@rolldown/binding-darwin-arm64@1.0.1", "", { "os": "darwin", "cpu": "arm64" }, "sha512-cKnAhWEsV7TPcA/5EAteDp6KcJZBQ2G+BqE7zayMMi7kMvwRsbv7WT9aOnn0WNl4SKEIf43vjS31iUPu80nzXg=="], @@ -195,6 +216,40 @@ "@standard-schema/spec": ["@standard-schema/spec@1.1.0", "", {}, "sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w=="], + "@tailwindcss/node": ["@tailwindcss/node@4.3.0", "", { "dependencies": { "@jridgewell/remapping": "^2.3.5", "enhanced-resolve": "^5.21.0", "jiti": "^2.6.1", "lightningcss": "1.32.0", "magic-string": "^0.30.21", "source-map-js": "^1.2.1", "tailwindcss": "4.3.0" } }, "sha512-aFb4gUhFOgdh9AXo4IzBEOzBkkAxm9VigwDJnMIYv3lcfXCJVesNfbEaBl4BNgVRyid92AmdviqwBUBRKSeY3g=="], + + "@tailwindcss/oxide": ["@tailwindcss/oxide@4.3.0", "", { "optionalDependencies": { "@tailwindcss/oxide-android-arm64": "4.3.0", "@tailwindcss/oxide-darwin-arm64": "4.3.0", "@tailwindcss/oxide-darwin-x64": "4.3.0", "@tailwindcss/oxide-freebsd-x64": "4.3.0", "@tailwindcss/oxide-linux-arm-gnueabihf": "4.3.0", "@tailwindcss/oxide-linux-arm64-gnu": "4.3.0", "@tailwindcss/oxide-linux-arm64-musl": "4.3.0", "@tailwindcss/oxide-linux-x64-gnu": "4.3.0", "@tailwindcss/oxide-linux-x64-musl": "4.3.0", "@tailwindcss/oxide-wasm32-wasi": "4.3.0", "@tailwindcss/oxide-win32-arm64-msvc": "4.3.0", "@tailwindcss/oxide-win32-x64-msvc": "4.3.0" } }, "sha512-F7HZGBeN9I0/AuuJS5PwcD8xayx5ri5GhjYUDBEVYUkexyA/giwbDNjRVrxSezE3T250OU2K/wp/ltWx3UOefg=="], + + "@tailwindcss/oxide-android-arm64": ["@tailwindcss/oxide-android-arm64@4.3.0", "", { "os": "android", "cpu": "arm64" }, "sha512-TJPiq67tKlLuObP6RkwvVGDoxCMBVtDgKkLfa/uyj7/FyxvQwHS+UOnVrXXgbEsfUaMgiVvC4KbJnRr26ho4Ng=="], + + "@tailwindcss/oxide-darwin-arm64": ["@tailwindcss/oxide-darwin-arm64@4.3.0", "", { "os": "darwin", "cpu": "arm64" }, "sha512-oMN/WZRb+SO37BmUElEgeEWuU8E/HXRkiODxJxLe1UTHVXLrdVSgfaJV7pSlhRGMSOiXLuxTIjfsF3wYvz8cgQ=="], + + "@tailwindcss/oxide-darwin-x64": ["@tailwindcss/oxide-darwin-x64@4.3.0", "", { "os": "darwin", "cpu": "x64" }, "sha512-N6CUmu4a6bKVADfw77p+iw6Yd9Q3OBhe0veaDX+QazfuVYlQsHfDgxBrsjQ/IW+zywL8mTrNd0SdJT/zgtvMdA=="], + + "@tailwindcss/oxide-freebsd-x64": ["@tailwindcss/oxide-freebsd-x64@4.3.0", "", { "os": "freebsd", "cpu": "x64" }, "sha512-zDL5hBkQdH5C6MpqbK3gQAgP80tsMwSI26vjOzjJtNCMUo0lFgOItzHKBIupOZNQxt3ouPH7RPhvNhiTfCe5CQ=="], + + "@tailwindcss/oxide-linux-arm-gnueabihf": ["@tailwindcss/oxide-linux-arm-gnueabihf@4.3.0", "", { "os": "linux", "cpu": "arm" }, "sha512-R06HdNi7A7OEoMsf6d4tjZ71RCWnZQPHj2mnotSFURjNLdBC+cIgXQ7l81CqeoiQftjf6OOblxXMInMgN2VzMA=="], + + "@tailwindcss/oxide-linux-arm64-gnu": ["@tailwindcss/oxide-linux-arm64-gnu@4.3.0", "", { "os": "linux", "cpu": "arm64" }, "sha512-qTJHELX8jetjhRQHCLilkVLmybpzNQAtaI/gaoVoidn/ufbNDbAo8KlK2J+yPoc8wQxvDxCmh/5lr8nC1+lTbg=="], + + "@tailwindcss/oxide-linux-arm64-musl": ["@tailwindcss/oxide-linux-arm64-musl@4.3.0", "", { "os": "linux", "cpu": "arm64" }, "sha512-Z6sukiQsngnWO+l39X4pPbiWT81IC+PLKF+PHxIlyZbGNb9MODfYlXEVlFvej5BOZInWX01kVyzeLvHsXhfczQ=="], + + "@tailwindcss/oxide-linux-x64-gnu": ["@tailwindcss/oxide-linux-x64-gnu@4.3.0", "", { "os": "linux", "cpu": "x64" }, "sha512-DRNdQRpSGzRGfARVuVkxvM8Q12nh19l4BF/G7zGA1oe+9wcC6saFBHTISrpIcKzhiXtSrlSrluCfvMuledoCTQ=="], + + "@tailwindcss/oxide-linux-x64-musl": ["@tailwindcss/oxide-linux-x64-musl@4.3.0", "", { "os": "linux", "cpu": "x64" }, "sha512-Z0IADbDo8bh6I7h2IQMx601AdXBLfFpEdUotft86evd/8ZPflZe9COPO8Q1vw+pfLWIUo9zN/JGZvwuAJqduqg=="], + + "@tailwindcss/oxide-wasm32-wasi": ["@tailwindcss/oxide-wasm32-wasi@4.3.0", "", { "dependencies": { "@emnapi/core": "^1.10.0", "@emnapi/runtime": "^1.10.0", "@emnapi/wasi-threads": "^1.2.1", "@napi-rs/wasm-runtime": "^1.1.4", "@tybys/wasm-util": "^0.10.1", "tslib": "^2.8.1" }, "cpu": "none" }, "sha512-HNZGOUxEmElksYR7S6sC5jTeNGpobAsy9u7Gu0AskJ8/20FR9GqebUyB+HBcU/ax6BHuiuJi+Oda4B+YX6H1yA=="], + + "@tailwindcss/oxide-win32-arm64-msvc": ["@tailwindcss/oxide-win32-arm64-msvc@4.3.0", "", { "os": "win32", "cpu": "arm64" }, "sha512-Pe+RPVTi1T+qymuuRpcdvwSVZjnll/f7n8gBxMMh3xLTctMDKqpdfGimbMyioqtLhUYZxdJ9wGNhV7MKHvgZsQ=="], + + "@tailwindcss/oxide-win32-x64-msvc": ["@tailwindcss/oxide-win32-x64-msvc@4.3.0", "", { "os": "win32", "cpu": "x64" }, "sha512-Mvrf2kXW/yeW/OTezZlCGOirXRcUuLIBx/5Y12BaPM7wJoryG6dfS/NJL8aBPqtTEx/Vm4T4vKzFUcKDT+TKUA=="], + + "@tailwindcss/vite": ["@tailwindcss/vite@4.3.0", "", { "dependencies": { "@tailwindcss/node": "4.3.0", "@tailwindcss/oxide": "4.3.0", "tailwindcss": "4.3.0" }, "peerDependencies": { "vite": "^5.2.0 || ^6 || ^7 || ^8" } }, "sha512-t6J3OrB5Fc0ExuhohouH0fWUGMYL6PTLhW+E7zIk/pdbnJARZDCwjBznFnkh5ynRnIRSI4YjtTH0t6USjJISrw=="], + + "@tanstack/react-virtual": ["@tanstack/react-virtual@3.13.25", "", { "dependencies": { "@tanstack/virtual-core": "3.15.0" }, "peerDependencies": { "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" } }, "sha512-bmNoqMu6gcAW9JGrKVB0Q1tN1i5RONZF8r1fW0bbE4Oyf3DwEGnzzQJ2OW+Ozg1P4s8PyugkHg2ULZoFQN+cqw=="], + + "@tanstack/virtual-core": ["@tanstack/virtual-core@3.15.0", "", {}, "sha512-0AwPGx0I8QxPYjAxShT/+z+ZOe9u8mW5rsXvivCTjRfRmz9a43+3mRyi4wwlyoUqOC56q/jatKa0Bh9M99BEHQ=="], + "@tybys/wasm-util": ["@tybys/wasm-util@0.10.2", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-RoBvJ2X0wuKlWFIjrwffGw1IqZHKQqzIchKaadZZfnNpsAYp2mM0h36JtPCjNDAHGgYez/15uMBpfGwchhiMgg=="], "@types/bun": ["@types/bun@1.3.14", "", { "dependencies": { "bun-types": "1.3.14" } }, "sha512-h1hFqFVcvAvD9j9K7ZW7vd82aSA+rTdznZa+5bwvCwqSB1jmmfLcbIWhOLx1/+boy/xmjgCs/OMUL8hRJSmnPw=="], @@ -211,6 +266,10 @@ "@types/node": ["@types/node@25.7.0", "", { "dependencies": { "undici-types": "~7.21.0" } }, "sha512-z+pdZyxE+RTQE9AcboAZCb4otwcrvgHD+GlBpPgn0emDVt0ohrTMhAwlr2Wd9nZ+nihhYFxO2pThz3C5qSu2Eg=="], + "@types/react": ["@types/react@19.2.15", "", { "dependencies": { "csstype": "^3.2.2" } }, "sha512-eRwcGNHve+E8qtEQSSRl6urh+rFop4v8gm6O8rGv25CodbvFdLjA1vVQ1KkiFE0w0UPOnb8tDiFKL5lp0rtY5Q=="], + + "@types/react-dom": ["@types/react-dom@19.2.3", "", { "peerDependencies": { "@types/react": "^19.2.0" } }, "sha512-jp2L/eY6fn+KgVVQAOqYItbF0VY/YApe5Mz2F0aykSO8gx31bYCZyvSeYxCHKvzHG5eZjc+zyaS5BrBWya2+kQ=="], + "@types/yargs": ["@types/yargs@17.0.35", "", { "dependencies": { "@types/yargs-parser": "*" } }, "sha512-qUHkeCyQFxMXg79wQfTtfndEC+N9ZZg76HJftDJp+qH2tV7Gj4OJi7l+PiWwJ+pWtW8GwSmqsDj/oymhrTWXjg=="], "@types/yargs-parser": ["@types/yargs-parser@21.0.3", "", {}, "sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ=="], @@ -235,6 +294,8 @@ "@typescript-eslint/visitor-keys": ["@typescript-eslint/visitor-keys@8.59.3", "", { "dependencies": { "@typescript-eslint/types": "8.59.3", "eslint-visitor-keys": "^5.0.0" } }, "sha512-f1UQF7ggd42YiwI5wGrRaPsa+P0CINBlrkLPmGfpq/u/I/oVtecoEIfFR9ag/oa1sLOsRNZ6xehf6qMZhQGBDg=="], + "@vitejs/plugin-react": ["@vitejs/plugin-react@6.0.2", "", { "dependencies": { "@rolldown/pluginutils": "^1.0.0" }, "peerDependencies": { "@rolldown/plugin-babel": "^0.1.7 || ^0.2.0", "babel-plugin-react-compiler": "^1.0.0", "vite": "^8.0.0" }, "optionalPeers": ["@rolldown/plugin-babel", "babel-plugin-react-compiler"] }, "sha512-DlSMqo4WhThw4vB8Mpn0Woe9J+Jfq1geJ61AKW0QEgLzGMNwtIMdxbDUzLxcun8W7NbJO0e2Jg/Nxm3cCSVzzg=="], + "@vitest/expect": ["@vitest/expect@4.1.6", "", { "dependencies": { "@standard-schema/spec": "^1.1.0", "@types/chai": "^5.2.2", "@vitest/spy": "4.1.6", "@vitest/utils": "4.1.6", "chai": "^6.2.2", "tinyrainbow": "^3.1.0" } }, "sha512-7EHDquPthALSV0jhhjgEW8FXaviMx7rSqu8W6oqCoAuOhKov814P99QDV1pxMA3QPv21YudvJngIhjrNI4opLg=="], "@vitest/mocker": ["@vitest/mocker@4.1.6", "", { "dependencies": { "@vitest/spy": "4.1.6", "estree-walker": "^3.0.3", "magic-string": "^0.30.21" }, "peerDependencies": { "msw": "^2.4.9", "vite": "^6.0.0 || ^7.0.0 || ^8.0.0" }, "optionalPeers": ["msw", "vite"] }, "sha512-MCFc63czMjEInOlcY2cpQCvCN+KgbAn+60xu9cMgP4sKaLC5JNAKw7JH8QdAnoAC88hW1IiSNZ+GgVXlN1UcMQ=="], @@ -273,10 +334,14 @@ "cliui": ["cliui@9.0.1", "", { "dependencies": { "string-width": "^7.2.0", "strip-ansi": "^7.1.0", "wrap-ansi": "^9.0.0" } }, "sha512-k7ndgKhwoQveBL+/1tqGJYNz097I7WOvwbmmU2AR5+magtbjPWQTS1C5vzGkBC8Ym8UWRzfKUzUUqFLypY4Q+w=="], + "clsx": ["clsx@2.1.1", "", {}, "sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA=="], + "convert-source-map": ["convert-source-map@2.0.0", "", {}, "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg=="], "cross-spawn": ["cross-spawn@7.0.6", "", { "dependencies": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", "which": "^2.0.1" } }, "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA=="], + "csstype": ["csstype@3.2.3", "", {}, "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ=="], + "debug": ["debug@4.4.3", "", { "dependencies": { "ms": "^2.1.3" } }, "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA=="], "deep-is": ["deep-is@0.1.4", "", {}, "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ=="], @@ -285,6 +350,8 @@ "emoji-regex": ["emoji-regex@10.6.0", "", {}, "sha512-toUI84YS5YmxW219erniWD0CIVOo46xGKColeNQRgOzDorgBi1v4D71/OFzgD9GO2UGKIv1C3Sp8DAn0+j5w7A=="], + "enhanced-resolve": ["enhanced-resolve@5.22.0", "", { "dependencies": { "graceful-fs": "^4.2.4", "tapable": "^2.3.3" } }, "sha512-xYcDWrpELkFzz9SpZ3PlI6Eu6eD93Yf0WLDRxikGhWJ3MAir2SNZTIVCVZqZ/NUyx8AdMc2gT9C0gPiw18kG+A=="], + "es-module-lexer": ["es-module-lexer@2.1.0", "", {}, "sha512-n27zTYMjYu1aj4MjCWzSP7G9r75utsaoc8m61weK+W8JMBGGQybd43GstCXZ3WNmSFtGT9wi59qQTW6mhTR5LQ=="], "escalade": ["escalade@3.2.0", "", {}, "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA=="], @@ -331,7 +398,7 @@ "flatted": ["flatted@3.4.2", "", {}, "sha512-PjDse7RzhcPkIJwy5t7KPWQSZ9cAbzQXcafsetQoD7sOJRQlGikNbx7yZp2OotDnJyrDcbyRq3Ttb18iYOqkxA=="], - "fsevents": ["fsevents@2.3.3", "", { "os": "darwin" }, "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw=="], + "fsevents": ["fsevents@2.3.2", "", { "os": "darwin" }, "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA=="], "get-caller-file": ["get-caller-file@2.0.5", "", {}, "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg=="], @@ -339,6 +406,8 @@ "glob-parent": ["glob-parent@6.0.2", "", { "dependencies": { "is-glob": "^4.0.3" } }, "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A=="], + "graceful-fs": ["graceful-fs@4.2.11", "", {}, "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ=="], + "ignore": ["ignore@5.3.2", "", {}, "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g=="], "imurmurhash": ["imurmurhash@0.1.4", "", {}, "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA=="], @@ -387,6 +456,8 @@ "locate-path": ["locate-path@6.0.0", "", { "dependencies": { "p-locate": "^5.0.0" } }, "sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw=="], + "lucide-react": ["lucide-react@1.16.0", "", { "peerDependencies": { "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0" } }, "sha512-dYwyPzb4MEKpGUmNYk3WKWPnMrHs3FKM+q94kAnJrcDIqqn1hq2xY8scaS2ovsOCM5D51ey2gaRG3PBb1vgoYQ=="], + "magic-string": ["magic-string@0.30.21", "", { "dependencies": { "@jridgewell/sourcemap-codec": "^1.5.5" } }, "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ=="], "minimatch": ["minimatch@10.2.5", "", { "dependencies": { "brace-expansion": "^5.0.5" } }, "sha512-MULkVLfKGYDFYejP07QOurDLLQpcjk7Fw+7jXS2R2czRQzR56yHRveU5NDJEOviH+hETZKSkIk5c+T23GjFUMg=="], @@ -417,14 +488,24 @@ "picomatch": ["picomatch@4.0.4", "", {}, "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A=="], + "playwright": ["playwright@1.60.0", "", { "dependencies": { "playwright-core": "1.60.0" }, "optionalDependencies": { "fsevents": "2.3.2" }, "bin": { "playwright": "cli.js" } }, "sha512-hheHdokM8cdqCb0lcE3s+zT4t4W+vvjpGxsZlDnikarzx8tSzMebh3UiFtgqwFwnTnjYQcsyMF8ei2mCO/tpeA=="], + + "playwright-core": ["playwright-core@1.60.0", "", { "bin": { "playwright-core": "cli.js" } }, "sha512-9bW6zvX/m0lEbgTKJ6YppOKx8H3VOPBMOCFh2irXFOT4BbHgrx5hPjwJYLT40Lu+4qtD36qKc/Hn56StUW57IA=="], + "postcss": ["postcss@8.5.14", "", { "dependencies": { "nanoid": "^3.3.11", "picocolors": "^1.1.1", "source-map-js": "^1.2.1" } }, "sha512-SoSL4+OSEtR99LHFZQiJLkT59C5B1amGO1NzTwj7TT1qCUgUO6hxOvzkOYxD+vMrXBM3XJIKzokoERdqQq/Zmg=="], "prelude-ls": ["prelude-ls@1.2.1", "", {}, "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g=="], "punycode": ["punycode@2.3.1", "", {}, "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg=="], + "react": ["react@19.2.6", "", {}, "sha512-sfWGGfavi0xr8Pg0sVsyHMAOziVYKgPLNrS7ig+ivMNb3wbCBw3KxtflsGBAwD3gYQlE/AEZsTLgToRrSCjb0Q=="], + + "react-dom": ["react-dom@19.2.6", "", { "dependencies": { "scheduler": "^0.27.0" }, "peerDependencies": { "react": "^19.2.6" } }, "sha512-0prMI+hvBbPjsWnxDLxlCGyM8PN6UuWjEUCYmZhO67xIV9Xasa/r/vDnq+Xyq4Lo27g8QSbO5YzARu0D1Sps3g=="], + "rolldown": ["rolldown@1.0.1", "", { "dependencies": { "@oxc-project/types": "=0.130.0", "@rolldown/pluginutils": "^1.0.0" }, "optionalDependencies": { "@rolldown/binding-android-arm64": "1.0.1", "@rolldown/binding-darwin-arm64": "1.0.1", "@rolldown/binding-darwin-x64": "1.0.1", "@rolldown/binding-freebsd-x64": "1.0.1", "@rolldown/binding-linux-arm-gnueabihf": "1.0.1", "@rolldown/binding-linux-arm64-gnu": "1.0.1", "@rolldown/binding-linux-arm64-musl": "1.0.1", "@rolldown/binding-linux-ppc64-gnu": "1.0.1", "@rolldown/binding-linux-s390x-gnu": "1.0.1", "@rolldown/binding-linux-x64-gnu": "1.0.1", "@rolldown/binding-linux-x64-musl": "1.0.1", "@rolldown/binding-openharmony-arm64": "1.0.1", "@rolldown/binding-wasm32-wasi": "1.0.1", "@rolldown/binding-win32-arm64-msvc": "1.0.1", "@rolldown/binding-win32-x64-msvc": "1.0.1" }, "bin": { "rolldown": "bin/cli.mjs" } }, "sha512-X0KQHljNnEkWNqqiz9zJrGunh1B0HgOxLXvnFpCOcadzcy5qohZ3tqMEUg00vncoRovXuK3ZqCT9KnnKzoInFQ=="], + "scheduler": ["scheduler@0.27.0", "", {}, "sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q=="], + "semver": ["semver@7.8.0", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-AcM7dV/5ul4EekoQ29Agm5vri8JNqRyj39o0qpX6vDF2GZrtutZl5RwgD1XnZjiTAfncsJhMI48QQH3sN87YNA=="], "shebang-command": ["shebang-command@2.0.0", "", { "dependencies": { "shebang-regex": "^3.0.0" } }, "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA=="], @@ -445,6 +526,12 @@ "strnum": ["strnum@2.3.0", "", {}, "sha512-ums3KNd42PGyx5xaoVTO1mjU1bH3NpY4vsrVlnv9PNGqQj8wd7rJ6nEypLrJ7z5vxK5RP0yMLo6J/Gsm62DI5Q=="], + "tailwind-merge": ["tailwind-merge@3.6.0", "", {}, "sha512-uxL7qAVQriqRQPAyK3pj66VqskWqoZ37PW94jwOTwNfq/z9oyu1V+eqrZqtR2+fCiXdYOZe/Modt8GtvqNzu+w=="], + + "tailwindcss": ["tailwindcss@4.3.0", "", {}, "sha512-y6nxMGB1nMW9R6k96e5gdIFzcfL/gTJRNaqGes1YvkLnPVXzWgbqFF2yLC0T8G774n24cx3Pe8XrKoniCOAH+Q=="], + + "tapable": ["tapable@2.3.3", "", {}, "sha512-uxc/zpqFg6x7C8vOE7lh6Lbda8eEL9zmVm/PLeTPBRhh1xCgdWaQ+J1CUieGpIfm2HdtsUpRv+HshiasBMcc6A=="], + "tinybench": ["tinybench@2.9.0", "", {}, "sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg=="], "tinyexec": ["tinyexec@1.1.2", "", {}, "sha512-dAqSqE/RabpBKI8+h26GfLq6Vb3JVXs30XYQjdMjaj/c2tS8IYYMbIzP599KtRj7c57/wYApb3QjgRgXmrCukA=="], @@ -493,6 +580,20 @@ "@eslint-community/eslint-utils/eslint-visitor-keys": ["eslint-visitor-keys@3.4.3", "", {}, "sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag=="], + "@tailwindcss/oxide-wasm32-wasi/@emnapi/core": ["@emnapi/core@1.10.0", "", { "dependencies": { "@emnapi/wasi-threads": "1.2.1", "tslib": "^2.4.0" }, "bundled": true }, "sha512-yq6OkJ4p82CAfPl0u9mQebQHKPJkY7WrIuk205cTYnYe+k2Z8YBh11FrbRG/H6ihirqcacOgl2BIO8oyMQLeXw=="], + + "@tailwindcss/oxide-wasm32-wasi/@emnapi/runtime": ["@emnapi/runtime@1.10.0", "", { "dependencies": { "tslib": "^2.4.0" }, "bundled": true }, "sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA=="], + + "@tailwindcss/oxide-wasm32-wasi/@emnapi/wasi-threads": ["@emnapi/wasi-threads@1.2.1", "", { "dependencies": { "tslib": "^2.4.0" }, "bundled": true }, "sha512-uTII7OYF+/Mes/MrcIOYp5yOtSMLBWSIoLPpcgwipoiKbli6k322tcoFsxoIIxPDqW01SQGAgko4EzZi2BNv2w=="], + + "@tailwindcss/oxide-wasm32-wasi/@napi-rs/wasm-runtime": ["@napi-rs/wasm-runtime@1.1.4", "", { "dependencies": { "@tybys/wasm-util": "^0.10.1" }, "peerDependencies": { "@emnapi/core": "^1.7.1", "@emnapi/runtime": "^1.7.1" }, "bundled": true }, "sha512-3NQNNgA1YSlJb/kMH1ildASP9HW7/7kYnRI2szWJaofaS1hWmbGI4H+d3+22aGzXXN9IJ+n+GiFVcGipJP18ow=="], + + "@tailwindcss/oxide-wasm32-wasi/@tybys/wasm-util": ["@tybys/wasm-util@0.10.2", "", { "dependencies": { "tslib": "^2.4.0" }, "bundled": true }, "sha512-RoBvJ2X0wuKlWFIjrwffGw1IqZHKQqzIchKaadZZfnNpsAYp2mM0h36JtPCjNDAHGgYez/15uMBpfGwchhiMgg=="], + + "@tailwindcss/oxide-wasm32-wasi/tslib": ["tslib@2.8.1", "", { "bundled": true }, "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="], + "@typescript-eslint/eslint-plugin/ignore": ["ignore@7.0.5", "", {}, "sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg=="], + + "vite/fsevents": ["fsevents@2.3.3", "", { "os": "darwin" }, "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw=="], } } diff --git a/docs/bugfix/SPY-QA-01-RCA.md b/docs/bugfix/SPY-QA-01-RCA.md new file mode 100644 index 0000000..912c9d3 --- /dev/null +++ b/docs/bugfix/SPY-QA-01-RCA.md @@ -0,0 +1,203 @@ +# SPY-QA-01 RCA: Clipped Spy Layout Scroll Containers + +## Scope + +This RCA covers the highest-priority open spy bug from `PLAN.md`: `SPY-QA-01`. + +`PLAN.md` lines 830-833 describe the failure as the two-column page layout letting +`main` hide overflow instead of giving the timeline and inspector their own +reachable scroll containers. + +## Reproduction Used + +- Built the current UI with `bun run build:spy-ui`. +- Started the fixture-backed spy UI service with: + `bun run src/spy/ui/test-server.ts --port 4681 --static dist/spy-ui` +- Opened `http://127.0.0.1:4681/?since=0` in the in-app browser. +- Measured layout geometry in a 1280 x 720 viewport after fixture calls loaded. + +The fixture server loaded 5 provider-call rows, which was enough to reproduce the +layout bug. This is stronger than the manual QA condition in one respect: the +bug does not require a long Today view or a large live call. + +## Proof + +Runtime layout measurements from the browser: + +```json +{ + "viewport": { "width": 1280, "height": 720 }, + "main": { + "height": 720, + "clientHeight": 720, + "scrollHeight": 912, + "overflowY": "hidden", + "scrollTop": 0 + }, + "section": { + "top": 64, + "bottom": 720, + "height": 656, + "clientHeight": 656, + "scrollHeight": 848 + }, + "timeline": { + "top": 265, + "bottom": 912, + "height": 647, + "clientHeight": 647, + "scrollHeight": 647, + "maxScrollTop": 0, + "overflowY": "auto" + }, + "footer": { + "top": 855, + "bottom": 912, + "height": 57 + } +} +``` + +Key observations: + +- `main` is exactly viewport height (`720`) but its content height is `912`. +- `main` has `overflowY: hidden`, so the extra `192 px` is clipped with no page + scrollbar. +- The timeline is laid out from y=`265` to y=`912`, so its bottom `192 px` sit + below the visible viewport. +- The timeline reports `clientHeight == scrollHeight == 647`, so it believes it + has no internal scroll range (`maxScrollTop: 0`). Scrolling over it cannot + reveal the clipped rows or footer. +- Row 3 ends at y=`733`, row 4 spans y=`745` to y=`851`, and the footer spans + y=`855` to y=`912`; all are partly or wholly below the visible viewport bottom + at y=`720`. + +Inspector proof after scrolling the inspector to its maximum scroll position: + +```json +{ + "viewport": { "height": 720 }, + "aside": { + "top": 64, + "bottom": 912, + "height": 848, + "clientHeight": 848, + "scrollHeight": 3448, + "scrollTop": 2600, + "maxScrollTop": 2600 + }, + "lowerSectionsAtMaxScroll": { + "Stream Events": { "top": 723, "bottom": 767 }, + "Raw Payloads": { "top": 785, "bottom": 829 }, + "Health": { "top": 847, "bottom": 891 } + } +} +``` + +At the inspector's maximum scroll position, `Stream Events`, `Raw Payloads`, and +`Health` are still below y=`720`. The aside's own `clientHeight` is `848`, but +only y=`64` through y=`720` is visible. The browser is aligning the bottom of +the inspector content to y=`912`, not to the visible viewport bottom. + +## Source Evidence + +Relevant current code: + +- `src/spy/ui/src/App.tsx:375` sets the page root to + `h-screen min-h-[720px] overflow-hidden`. +- `src/spy/ui/src/App.tsx:408` sets the content grid to + `h-[calc(100vh-4rem)] min-h-[656px]`. +- `src/spy/ui/src/App.tsx:409` creates the timeline column as a flex column + without `min-h-0`. +- `src/spy/ui/src/App.tsx:641` makes the timeline itself `flex-1 overflow-auto`, + but the parent has already expanded past the visible section. +- `src/spy/ui/src/App.tsx:763` makes the inspector `overflow-auto`, but it is + also allowed to size taller than the visible grid track. + +## Root Cause + +The scroll containers are present, but their ancestors are not allowed to shrink +inside the fixed viewport grid. + +The combination of viewport-fixed heights, minimum heights, `main` clipping, and +missing `min-h-0` on the grid/flex children causes the timeline column and the +inspector to resolve to content-driven heights. They become `848 px` tall in a +`656 px` visible content area. Because the scroll containers themselves believe +their full `848 px` boxes are visible, they compute the wrong scroll range. + +This is why the bug is not merely cosmetic: + +- Timeline rows and the footer can exist below the viewport while the timeline + has no internal scroll range. +- Lower inspector sections can remain below the viewport even when the inspector + is scrolled to its maximum. +- `main` hides the layout overflow, so the user gets neither a page scrollbar + nor a correct nested scrollbar. + +## Proposed Fix + +Rework the app shell so the header and body are explicit viewport rows, and make +the body grid and both columns shrinkable: + +- Use a root layout equivalent to `h-screen grid grid-rows-[4rem_minmax(0,1fr)]` + with `overflow-hidden`. +- Remove the content area's viewport-derived `min-h` that can exceed the visible + viewport. +- Add `min-h-0` to the content grid, timeline column, timeline scroll region, + and inspector. +- Keep timeline and inspector as the only vertical scroll owners for their + respective columns. + +Expected proof after the fix: + +- The content grid bottom should equal the viewport bottom. +- Timeline and inspector bottoms should be `720` in the same viewport, not `912`. +- Timeline `clientHeight` should reflect the visible space, and rows beyond that + should be reachable through `timeline.scrollTop`. +- At inspector max scroll, lower sections such as `Health` should be visible + within the viewport. + +## Fix Status + +Implemented. + +Changed `src/spy/ui/src/App.tsx` so the spy UI shell uses an explicit header row +and shrinkable body row, with `min-h-0` on the body grid, timeline column, +timeline scroll region, and inspector. Added Playwright coverage in +`src/spy/ui/e2e/spy-ui.playwright.ts` for the viewport clipping regression. + +Post-fix browser verification at 1280 x 720: + +```json +{ + "main": { "clientHeight": 720, "scrollHeight": 720 }, + "timeline": { + "bottom": 720, + "clientHeight": 455, + "scrollHeight": 647, + "maxScrollTop": 192 + }, + "aside": { + "bottom": 720, + "clientHeight": 656, + "scrollHeight": 3448, + "maxScrollTop": 2792 + }, + "afterTimelineScroll": { + "scrollTop": 192, + "lastRow": { "top": 553, "bottom": 659 } + }, + "afterInspectorScroll": { + "scrollTop": 2792, + "health": { "top": 655, "bottom": 699 } + } +} +``` + +Verification commands: + +- `bun run typecheck` +- `bun run lint` +- `bun run build:spy-ui` +- `bun run test:spy-ui:unit` +- `bun run test:spy-ui:e2e` diff --git a/docs/bugfix/SPY-QA-02-RCA.md b/docs/bugfix/SPY-QA-02-RCA.md new file mode 100644 index 0000000..1d49ac6 --- /dev/null +++ b/docs/bugfix/SPY-QA-02-RCA.md @@ -0,0 +1,160 @@ +# SPY-QA-02 RCA: Hidden Top-Level Scrolling + +## Scope + +This RCA covers what was the highest-priority open spy item in `PLAN.md`: +`SPY-QA-02`. + +`PLAN.md` describes the failure as lower inspector focus or panel opening setting +`main.scrollTop` even though `main` uses `overflow-hidden`, which pushes the +global header and range controls above the visible viewport without a visible +page scrollbar. + +## Triage Result + +`SPY-QA-02` does not reproduce in the current tree. The current evidence points +to a stale open checkbox for a symptom that was removed by the completed +`SPY-QA-01` layout fix. + +No product-code fix should be made from the old QA note alone. After review, +`SPY-QA-02` was closed in `PLAN.md` as no-repro in the current implementation, +while keeping the existing Playwright guard that verifies this failure mode. + +## Reproduction Attempt + +I used the production-built spy UI and the fixture-backed spy UI test server: + +- `bun run build:spy-ui` +- `bun run src/spy/ui/test-server.ts --port 4682 --static dist/spy-ui` +- Opened `http://127.0.0.1:4682/?since=0` in headless Chromium. +- Used the `1159 x 862` viewport mentioned in the manual QA notes. +- Selected the first provider call. +- Jumped to lower inspector sections through the section navigator: + `Network`, `Stream`, `Raw`, and `Health`. +- Loaded stream events when the stream panel exposed the load control. + +## Proof + +Runtime DOM metrics after the attempted reproduction: + +```json +{ + "viewport": { + "width": 1159, + "height": 862 + }, + "document": { + "documentElementScrollTop": 0, + "bodyScrollTop": 0 + }, + "main": { + "scrollTop": 0, + "scrollHeight": 862, + "clientHeight": 862, + "rect": { + "top": 0, + "bottom": 862, + "height": 862 + }, + "overflowY": "hidden" + }, + "header": { + "rect": { + "top": 0, + "bottom": 64, + "height": 64 + } + }, + "aside": { + "scrollTop": 2002, + "scrollHeight": 2800, + "clientHeight": 798, + "rect": { + "top": 64, + "bottom": 862, + "height": 798 + }, + "overflowY": "auto" + }, + "health": { + "top": 523, + "bottom": 842, + "height": 319 + }, + "openSections": [ + "inspector-section-network", + "inspector-section-stream", + "inspector-section-raw", + "inspector-section-health" + ] +} +``` + +Key observations: + +- `main.scrollTop` stayed `0`. +- `main.scrollHeight` equals `main.clientHeight` at `862`, so the top-level + container has no hidden scroll range in this layout. +- `documentElement.scrollTop` and `body.scrollTop` also stayed `0`. +- The global header remained visible at y=`0..64`. +- The scroll movement happened in `aside`, which is the intended inspector + scroll owner. +- The lower Health section remained visible inside the viewport at y=`523..842`. + +The existing Playwright regression for this same class of bug also passes: + +```text +$ bunx playwright test -c src/spy/ui/playwright.config.ts -g "jumps to buried inspector sections from the section navigator" +Running 1 test using 1 worker + ✓ 1 src/spy/ui/e2e/spy-ui.playwright.ts:251:1 › jumps to buried inspector sections from the section navigator (4.8s) + + 1 passed (5.4s) +``` + +## Source Evidence + +Current layout code constrains the page to two viewport rows and makes the body +grid shrinkable: + +- `src/spy/ui/src/App.tsx:493` sets `main` to + `grid h-screen min-h-0 grid-rows-[4rem_minmax(0,1fr)] overflow-hidden`. +- `src/spy/ui/src/App.tsx:526` sets the body grid to `min-h-0 overflow-hidden`. +- `src/spy/ui/src/App.tsx:527` makes the timeline column `min-h-0`. + +The current section navigation still calls `scrollIntoView`, which is the path +that would reveal `SPY-QA-02` if a top-level scroll range still existed: + +- `src/spy/ui/src/App.tsx:1018-1035` renders the inspector section navigator. +- `src/spy/ui/src/App.tsx:1050-1055` opens details sections and calls + `target?.scrollIntoView({ block: "start" })`. + +The existing e2e guard asserts that this navigation path scrolls the inspector, +not `main`: + +- `src/spy/ui/e2e/spy-ui.playwright.ts:251-311` +- Initial assertion: `mainScrollTop` is `0`. +- After jumping to Health: `asideScrollTop` is greater than `0`, + `mainScrollTop` remains `0`, and the header remains at the top of the + viewport. + +## Root Cause + +The original `SPY-QA-02` symptom depended on `main` having hidden overflow with +a real internal scroll range. In that older layout, browser focus movement or +`scrollIntoView` could satisfy the requested scroll by moving the hidden +top-level container, which made the global header disappear even though no page +scrollbar was available. + +The current layout no longer gives `main` a scroll range. The measured +`main.scrollHeight === main.clientHeight`, and lower-section navigation moves +only the inspector `aside`. That matches the completed `SPY-QA-01` RCA: the +actual shared cause was the old page shell allowing content-driven heights to +exceed the visible viewport while `main` clipped the overflow. + +## Plan Status + +Do not change product code for `SPY-QA-02` unless a new reproduction is found. + +`PLAN.md` now closes `SPY-QA-02` as stale/no-repro in the current tree, with +this RCA as the proof. The next actual implementation bug to diagnose should be +the highest-priority remaining reproducible item after that. diff --git a/docs/bugfix/SPY-QA-03-RCA.md b/docs/bugfix/SPY-QA-03-RCA.md new file mode 100644 index 0000000..87d5d4f --- /dev/null +++ b/docs/bugfix/SPY-QA-03-RCA.md @@ -0,0 +1,157 @@ +# SPY-QA-03 RCA: Timeline Row And Footer Overlap + +## Scope + +This RCA covers the highest-priority spy bug that I could prove in the current +tree: `SPY-QA-03`. + +Triage note: `SPY-QA-02` is listed before this in `PLAN.md`, but I could not +reproduce it with the current code. I tested lower inspector panel focus, stream +event loading, narrower desktop viewports, shorter viewports, and keyboard focus +traversal. In all probes, `main.scrollTop` stayed `0` and the global header +stayed at y=`0`. Since the request requires evidence before fixing, I did not +infer a `SPY-QA-02` fix from the old QA note. + +## Reproduction Used + +- Built the current UI with `bun run build:spy-ui`. +- Started the fixture-backed spy UI service: + `bun src/spy/ui/test-server.ts --port 0 --static dist/spy-ui` +- The server selected `http://127.0.0.1:35678`. +- Opened `http://127.0.0.1:35678/?since=0`. +- Selected the `10 min` range, matching the QA note for the short 10-minute + view. +- Measured browser geometry with headless Chromium against the built UI. + +The fixture data was enough to reproduce the bug with five provider-call rows. + +## Proof + +At a normal desktop viewport of 1100 x 720, rendered timeline rows overlap: + +```json +{ + "viewport": { "width": 1100, "height": 720 }, + "rowRects": [ + { "index": 0, "top": 273, "bottom": 395, "height": 122 }, + { "index": 1, "top": 391, "bottom": 513, "height": 122 }, + { "index": 2, "top": 509, "bottom": 631, "height": 122 }, + { "index": 3, "top": 627, "bottom": 749, "height": 122 }, + { "index": 4, "top": 745, "bottom": 867, "height": 122 } + ], + "overlaps": [ + { "previous": 0, "current": 1, "gap": -4 }, + { "previous": 1, "current": 2, "gap": -4 }, + { "previous": 2, "current": 3, "gap": -4 }, + { "previous": 3, "current": 4, "gap": -4 } + ] +} +``` + +The overlap is not just visual. At y=`393`, both row 0 and row 1 are in the hit +stack: + +```json +{ + "overlapY": 393, + "row0": { "top": 273, "bottom": 395 }, + "row1": { "top": 391, "bottom": 513 }, + "elementsAtPoint": [ + { "tag": "BUTTON", "testid": "timeline-row", "row": 1 }, + { "tag": "BUTTON", "testid": "timeline-row", "row": 0 } + ] +} +``` + +The sticky footer also covers row content. At initial scroll position in the same +viewport, the footer covers row 3 by 57 px: + +```json +{ + "footer": { "top": 663, "bottom": 720, "height": 57 }, + "covered": [ + { "index": 3, "overlap": 57, "rowBottom": 749, "footerTop": 663 } + ] +} +``` + +Even at the bottom of the timeline scroll range, the footer still covers the +last row: + +```json +{ + "scrollTop": 192, + "max": 192, + "lastRow": { "index": 4, "top": 553, "bottom": 675, "height": 122 }, + "footer": { "top": 663, "bottom": 720, "height": 57 }, + "covered": [ + { "index": 4, "overlap": 12, "rowBottom": 675, "footerTop": 663 } + ] +} +``` + +## Source Evidence + +Relevant current code: + +- `src/spy/ui/src/App.tsx:624-629` configures the virtualizer with + `estimateSize: () => 118`. +- `src/spy/ui/src/App.tsx:649-653` positions each rendered row wrapper at + `virtualRow.start`. +- `src/spy/ui/src/App.tsx:654-660` renders `TimelineRow` without attaching + `virtualizer.measureElement`, so the virtualizer never learns the actual row + height. +- `src/spy/ui/src/App.tsx:665-672` renders the call-count/Load More footer as + `sticky bottom-0` inside the same scroll container, but the virtualized list + does not reserve bottom clearance for that sticky footer. + +## Root Cause + +The timeline virtualizer assumes every row is 118 px tall, but the actual row +height at common desktop widths is 122 px. Because each row is absolutely +positioned from the virtualizer's fixed starts, row starts are 118 px apart while +the row boxes are 122 px tall. That creates a 4 px overlap between adjacent rows +in the current fixture; live rows with more wrapping can overlap more. + +The footer has a separate cause in the same component. It is sticky inside the +timeline scroll container, so it floats over list content. The virtualized list +height is based only on virtual rows and does not include bottom padding equal to +the sticky footer height. Therefore the final visible rows can scroll underneath +the footer and remain partly covered even at maximum scroll. + +## Proposed Fix + +Fix both parts in `Timeline`: + +- Give the virtualizer a conservative row estimate at or above the actual compact + row height, and attach `virtualizer.measureElement` to each row wrapper so + wrapped rows update the virtual layout with their real height. +- Include vertical spacing in the measured row wrapper instead of relying on a + too-small fixed estimate. +- Reserve bottom clearance for the sticky footer, either by adding bottom + padding/spacer to the virtualized content or by moving the footer outside the + scroll-overlay path. +- Add Playwright coverage that fails when adjacent row rects overlap or when the + footer covers the last row at max timeline scroll. + +## Fix Status + +Implemented. + +Changed `src/spy/ui/src/App.tsx` so timeline virtual rows are measured with +`virtualizer.measureElement` and use a conservative initial row estimate. The +timeline footer is now a sibling of the scroll viewport instead of a sticky +overlay inside it, so row content cannot scroll underneath the call-count/Load +More controls. + +Added Playwright coverage in `src/spy/ui/e2e/spy-ui.playwright.ts` that fails +when adjacent timeline rows overlap or when the footer overlaps row content at +maximum timeline scroll. + +Verification commands: + +- `bun run typecheck` +- `bun run lint` +- `bun run build:spy-ui` +- `bun run test:spy-ui:unit` +- `bun run test:spy-ui:e2e` diff --git a/docs/bugfix/SPY-QA-04-RCA.md b/docs/bugfix/SPY-QA-04-RCA.md new file mode 100644 index 0000000..327a972 --- /dev/null +++ b/docs/bugfix/SPY-QA-04-RCA.md @@ -0,0 +1,223 @@ +# SPY-QA-04 RCA: Long Inspectors Bury Core Panels + +## Scope + +This RCA covers the highest-priority spy bug that is both open and reproducible +in the current tree: `SPY-QA-04`. + +`PLAN.md` lines 852-855 describe the failure as long inspectors being hard to +navigate because Request/Response Blocks open by default and Usage Records, +Network Metadata, Stream Events, Raw Payloads, and Health become effectively +buried. + +Triage note: `SPY-QA-02` is listed first among the remaining P0 items, but the +plan already records a no-repro attempt. I repeated the relevant check in the +current built UI. Clicking the offscreen `Health` section scrolled the inspector +only: `main.scrollTop` stayed `0`, the global header stayed at y=`0..64`, and +the Health summary became visible at y=`655..699`. Because the requested fix +requires actual evidence before implementation, I am not treating `SPY-QA-02` +as the next fix until it can be reproduced. + +## Reproduction Used + +- Built the current UI with `bun run build:spy-ui`. +- Started the fixture-backed spy UI service: + `bun src/spy/ui/test-server.ts --port 4683 --static dist/spy-ui` +- Opened `http://127.0.0.1:4683/?since=0` in the Codex in-app browser. +- Selected the first fixture-backed provider call. +- Measured DOM geometry in the default 1280 x 720 browser viewport. + +The fixture call is sufficient to reproduce the issue without relying on live +traffic or a special long production capture. + +## Proof + +Immediately after selecting the first provider call, the inspector scroll +container is correctly constrained to the viewport, but all inspector section +summaries are below the visible area: + +```json +{ + "viewport": { "width": 1280, "height": 720 }, + "main": { + "scrollTop": 0, + "clientHeight": 720, + "scrollHeight": 720, + "overflowY": "hidden" + }, + "aside": { + "scrollTop": 0, + "clientHeight": 656, + "scrollHeight": 3448, + "maxScrollTop": 2792, + "overflowY": "auto", + "rect": { "top": 64, "bottom": 720, "height": 656 } + }, + "detailsOpenByDefault": ["Request Blocks", "Response Blocks"], + "visibleSummaries": [], + "screenfulsToBottom": 4.26 +} +``` + +The measured section positions at the top of the inspector were: + +```json +[ + { "title": "Request Blocks", "top": 967, "open": true, "height": 1653 }, + { "title": "Response Blocks", "top": 2636, "open": true, "height": 485 }, + { "title": "Diff Against Previous Request", "top": 3137, "open": false }, + { "title": "Usage Records", "top": 3199, "open": false }, + { "title": "Network Metadata", "top": 3261, "open": false }, + { "title": "Stream Events", "top": 3323, "open": false }, + { "title": "Raw Payloads", "top": 3385, "open": false }, + { "title": "Health", "top": 3447, "open": false } +] +``` + +The practical effect is that none of the call-native inspector sections are +visible in the first viewport. The operator sees only the top summary and +request-composition card, then must scroll several viewport heights before +reaching provider usage, network metadata, stream events, raw payload state, or +service health. + +After opening and loading Stream Events, the scroll burden grows further: + +```json +{ + "aside": { + "clientHeight": 656, + "scrollHeight": 4926, + "maxScrollTop": 4270 + }, + "detailsOpenByDefaultOrUserOpened": [ + "Request Blocks", + "Response Blocks", + "Stream Events", + "Health" + ], + "preBlockCount": 18, + "screenfulsToBottom": 6.51 +} +``` + +That stream-event expansion is the related `SPY-QA-05` problem, but it makes the +same root navigation problem worse: all detail panels live in one long scroll +stack with no section navigation and several high-volume panels rendered inline. + +## Source Evidence + +Relevant pre-fix code observed before implementation: + +- `src/spy/ui/src/App.tsx:768` makes the whole inspector one vertical + `overflow-auto` scroll container. +- `src/spy/ui/src/App.tsx:783` renders all inspector content in a single + `space-y-4` stack. +- `src/spy/ui/src/App.tsx:812-843` renders Summary, Request Composition, then + every inspector section sequentially. +- `src/spy/ui/src/App.tsx:814` opens Request Blocks by default. +- `src/spy/ui/src/App.tsx:818` opens Response Blocks by default. +- `src/spy/ui/src/App.tsx:1013-1016` renders every normalized block in each open + block section. +- `src/spy/ui/src/App.tsx:1036-1038` allows each block preview to consume up to + `max-h-64`, so a modest number of blocks can consume multiple viewport + heights. +- `src/spy/ui/src/App.tsx:1147-1157` renders loaded stream events inline. +- `src/spy/ui/src/App.tsx:1225-1234` defines `Section` as a plain `
` + panel with no sticky local navigation, no summary list, and no bounded + section-level pagination. + +## Root Cause + +The inspector layout has a correct scroll owner after `SPY-QA-01`, but the +content model inside that owner is still linear and volume-driven. + +`InspectorContent` renders high-level summary cards first, then opens both +Request Blocks and Response Blocks by default before the diagnostic sections. +Those block lists render every block preview inline. The first fixture call +therefore places the first section summary at y=`967`, below the viewport, and +places Usage Records, Network Metadata, Stream Events, Raw Payloads, and Health +more than 3,100 px below the viewport top. + +The issue is not that lower panels are mathematically unreachable. They are +reachable through the inspector scrollbar. The bug is that the call-native +inspector makes core diagnostic panels operationally hard to reach during normal +use, especially when request/response blocks or loaded stream events are large. + +## Proposed Fix + +Make the inspector navigable without requiring a long linear scroll through +expanded content: + +- Add a compact sticky section navigator inside the inspector header or directly + below it, with anchors for Summary, Request Composition, Request Blocks, + Response Blocks, Diff, Usage, Network, Stream, Raw, and Health. +- Collapse high-volume Request Blocks and Response Blocks by default for large + calls, or render them behind explicit "expand blocks" controls when their + measured block count/byte size exceeds a conservative threshold. +- Preserve quick visibility for Usage Records, Network Metadata, Stream Events, + Raw Payloads, and Health from the top of the inspector through the navigator. +- Keep Stream Events loaded on demand, and coordinate with `SPY-QA-05` for + pagination or virtualization so loading stream events does not create another + multi-screen inline stack. +- Add Playwright coverage that selects the fixture call and verifies that the + lower diagnostic sections are reachable via the new navigation without + manually scrolling thousands of pixels. + +## Fix Status + +Implemented. + +Changed `src/spy/ui/src/App.tsx` so the inspector header includes a sticky +section navigator for Summary, Composition, Request, Response, Diff, Usage, +Network, Stream, Raw, and Health. Navigator buttons open `
` sections +when needed and scroll the target section into view inside the inspector. + +Changed large Request/Response block sections to start collapsed when the +combined block count or byte size exceeds the conservative auto-open threshold. +For the original fixture call, Request Blocks and Response Blocks now start +collapsed and show block-count/byte-size summaries in their headers. + +Added Playwright coverage in `src/spy/ui/e2e/spy-ui.playwright.ts` proving that: + +- Large fixture block sections start collapsed. +- Health initially remains below the viewport, preserving the regression setup. +- The Health navigator button scrolls Health into the viewport and opens it. +- `main.scrollTop` remains `0`, so the fix does not reintroduce hidden + top-level scrolling. + +Post-fix browser verification at 1280 x 720: + +```json +{ + "beforeNav": { + "main": { "scrollTop": 0, "clientHeight": 720, "scrollHeight": 720 }, + "aside": { + "scrollTop": 0, + "clientHeight": 656, + "scrollHeight": 1457, + "maxScrollTop": 801 + }, + "requestOpen": false, + "responseOpen": false, + "health": { "top": 1455, "bottom": 1501 } + }, + "afterHealthNav": { + "main": { "scrollTop": 0, "clientHeight": 720, "scrollHeight": 720 }, + "aside": { + "scrollTop": 1074, + "clientHeight": 656, + "scrollHeight": 1730, + "maxScrollTop": 1074 + }, + "healthOpen": true, + "health": { "top": 381, "bottom": 700 } + } +} +``` + +Verification commands: + +- `bun run typecheck` +- `bun run lint` +- `bun run test:spy-ui:unit` +- `bun run test:spy-ui:e2e` diff --git a/docs/bugfix/SPY-QA-05-RCA.md b/docs/bugfix/SPY-QA-05-RCA.md new file mode 100644 index 0000000..97b44c6 --- /dev/null +++ b/docs/bugfix/SPY-QA-05-RCA.md @@ -0,0 +1,298 @@ +# SPY-QA-05 RCA: Stream Events Render Inline And Persist Across Range Changes + +## Scope + +This RCA covers the highest-priority spy bug that is open and reproducible in +the current tree: `SPY-QA-05`. + +Triage note: `SPY-QA-02` is still the only open P0 in `PLAN.md`, but I could +not reproduce it in the current built UI. The earlier `SPY-QA-01` and +`SPY-QA-04` fixes appear to keep `main` constrained. Because this fix request +requires actual evidence before implementation, I am not proposing a +`SPY-QA-02` code change from the old QA note alone. + +## Reproduction Used + +Baseline build: + +- Ran `bun run build:spy-ui`. +- Started the existing fixture-backed spy UI service at + `http://127.0.0.1:26737`. +- Opened `/?since=0` in the Codex in-app browser. +- Clicked the inspector `Health` and `Stream` section navigator buttons, then + loaded stream events for the selected fixture call. + +High-volume stream reproduction: + +- Started an API-compatible synthetic local server at + `http://127.0.0.1:59006` serving the built `dist/spy-ui` assets. +- The server returned one complete Bedrock call with `streamEventCount=250`. +- The `/api/calls/:id/stream-events` endpoint respected the UI's current + `limit=100` request and returned `nextCursor` until all 250 events were + loaded. +- Opened `http://127.0.0.1:59006/?since=0`, jumped to `Stream Events`, clicked + `Load Stream Events`, clicked `Load More Stream Events` twice, then changed + the time range to `Today`. + +The high-volume server used API-shaped data only; no application code was +changed to produce this proof. + +## P0 Triage Proof + +The current UI did not reproduce the hidden top-level scroll failure called out +in `SPY-QA-02`. After opening lower inspector sections and loading stream +events in the fixture-backed UI, `main.scrollTop` remained `0`, the header +remained at the top of the viewport, and `main.scrollHeight` equaled +`main.clientHeight`. + +Representative browser measurements after opening `Health`, opening `Stream`, +and loading the fixture stream events: + +```json +{ + "main": { + "clientHeight": 720, + "scrollHeight": 720, + "scrollTop": 0, + "overflowY": "hidden" + }, + "header": { "top": 0, "bottom": 64, "height": 64 }, + "aside": { + "clientHeight": 656, + "scrollHeight": 2935, + "scrollTop": 1030, + "maxScrollTop": 2279 + }, + "stream": { + "open": true, + "height": 1251 + }, + "streamPreCount": 6 +} +``` + +That is a no-repro for `SPY-QA-02`, not a fix. It only explains why this RCA +moves to the next highest-priority open spy bug with current evidence. + +## Proof + +Initial high-volume stream state before loading events: + +```json +{ + "stream": { + "open": false, + "height": 46, + "elementCount": 6 + }, + "streamPreCount": 0, + "aside": { + "clientHeight": 656, + "scrollHeight": 1969, + "maxScrollTop": 1313 + }, + "health": { + "top": 1967, + "bottom": 2013 + } +} +``` + +After clicking `Load Stream Events`, the UI rendered the first API page of 100 +events inline: + +```json +{ + "streamPreCount": 100, + "streamPreScrollHeightSum": 24800, + "stream": { + "open": true, + "height": 27511, + "elementCount": 505 + }, + "aside": { + "clientHeight": 656, + "scrollHeight": 29434, + "maxScrollTop": 28778 + }, + "health": { + "top": 28028, + "bottom": 28074 + }, + "streamButtons": [ + { "text": "Load More Stream Events", "disabled": false } + ] +} +``` + +After clicking `Load More Stream Events` once, the UI appended another 100 +events and kept all previous event DOM nodes mounted: + +```json +{ + "streamPreCount": 200, + "streamPreScrollHeightSum": 49600, + "stream": { + "height": 54911, + "elementCount": 1005 + }, + "aside": { + "scrollHeight": 56834, + "maxScrollTop": 56178 + } +} +``` + +After clicking `Load More Stream Events` a second time, all 250 events were +mounted inline: + +```json +{ + "streamPreCount": 250, + "streamPreScrollHeightSum": 62000, + "stream": { + "height": 68611, + "elementCount": 1255 + }, + "aside": { + "clientHeight": 656, + "scrollHeight": 70534, + "scrollTop": 56178, + "maxScrollTop": 69878 + }, + "health": { + "top": 14354, + "bottom": 14400 + }, + "streamButtons": [ + { "text": "Load More Stream Events", "disabled": true } + ] +} +``` + +After changing the range to `Today`, the loaded stream state persisted: + +```json +{ + "streamPreCount": 250, + "streamPreScrollHeightSum": 62000, + "stream": { + "open": true, + "height": 68611, + "elementCount": 1255 + }, + "aside": { + "scrollHeight": 70534, + "scrollTop": 56178, + "maxScrollTop": 69878 + } +} +``` + +This reproduces the core `SPY-QA-05` behavior in a deterministic way: + +- The UI loads stream events in pages, but every loaded event remains rendered + inline. +- The stream section grows from `46 px` closed to `68,611 px` with 250 events. +- The inspector scroll height grows to `70,534 px`, while the visible inspector + viewport is only `656 px`. +- The `Health` panel is pushed thousands of pixels away after stream expansion. +- The loaded stream state and deep inspector `scrollTop` survive a range change. + +## Source Evidence + +Relevant current code: + +- `src/spy/ui/src/api.ts:25-26` defines `DEFAULT_STREAM_LIMIT = 100`. +- `src/spy/ui/src/api.ts:70-75` always requests stream events with + `limit=100`, adding only a cursor when loading more. +- `src/spy/store.ts:530-545` supports cursor pagination and returns + `nextCursor`, so the backend already exposes a bounded stream-event page. +- `src/spy/ui/src/App.tsx:362-380` stores loaded stream events in one + `streamState.items` array and appends more pages with + `[...(current?.items ?? []), ...page.items]`. +- `src/spy/ui/src/App.tsx:1245-1262` renders + `props.streamState.items.map(...)` directly, producing one event card and one + `
` payload block per loaded stream event.
+- `src/spy/ui/src/App.tsx:1254-1255` formats and renders each full event payload
+  preview inline, clipped to 4,000 characters but still mounted as DOM text.
+- `src/spy/ui/src/App.tsx:144-174` reloads calls when time range/filter/search
+  state changes and intentionally preserves the selected call when it is still
+  present in the new result set.
+- `src/spy/ui/src/App.tsx:291-319` resets `streamState` only when the selected
+  call detail version changes. A range change that keeps the same selected call
+  does not clear stream events or reset inspector scroll.
+- `src/spy/ui/src/App.tsx:794-814` makes the entire inspector a single
+  `overflow-auto` scroll container, so the high-volume stream panel expands the
+  same scroll stack that contains `Raw Payloads` and `Health`.
+
+## Root Cause
+
+`SPY-QA-05` is a browser-side rendering/state problem, not a backend pagination
+problem.
+
+The backend stream-event endpoint is paginated. The UI requests 100 events per
+page and receives `nextCursor` correctly, but `loadStreamEvents(true)` appends
+each page into a single `streamState.items` array. `StreamPanel` then maps every
+loaded item into a fully mounted card with a JSON `
`.
+
+Because stream events are rendered inside the same linear inspector scroll
+container as the rest of the call detail, high-volume calls turn the inspector
+into a tens-of-thousands-of-pixels document. Lower panels such as `Raw Payloads`
+and `Health` remain technically in the DOM but become operationally buried.
+
+The stale-state part has a separate but related cause: time range changes reload
+the timeline but preserve the selected call if that call is still in the new
+result set. Since `streamState` is reset only when the selected call detail
+version changes, loaded stream events and the inspector's deep scroll position
+survive the range change.
+
+## Proposed Fix
+
+Fix `SPY-QA-05` in the inspector stream-event path:
+
+- Keep the backend cursor pagination as-is.
+- Render the stream-event list with virtualization or a bounded page window
+  instead of mapping every loaded event into mounted DOM nodes.
+- Avoid nested scroll traps inside every event payload. Keep each event payload
+  collapsed by default or render a short summary with an explicit expand action.
+- Reset stream state when the active time range, search, metadata filters, or
+  selected call changes. If preserving a selected call across range changes is
+  desired, still clear loaded stream events because the operator changed the
+  timeline context.
+- Reset the inspector scroll owner to the top when the selected call changes or
+  when stream state is cleared by a range/filter/search change.
+- Add Playwright coverage with a synthetic high-volume stream response proving
+  that loading 250 events does not mount 250 event payload blocks and that a
+  range change clears the loaded stream panel.
+
+## Fix Status
+
+Implemented.
+
+Changed `src/spy/ui/src/App.tsx` so loaded stream events render through a
+bounded 25-event window instead of mounting every loaded event. Event payloads
+start collapsed, and expanding one payload mounts only that payload preview.
+
+Changed stream state handling so selecting a different call or changing the
+timeline context clears loaded stream events and resets the inspector scroll
+owner to the top.
+
+Added Playwright coverage in `src/spy/ui/e2e/spy-ui.playwright.ts` with a
+synthetic 250-event Bedrock stream response proving that:
+
+- Loading all 250 events leaves only 25 event cards mounted.
+- Payload `
` blocks are not mounted while collapsed.
+- The inspector and stream section stay below the previous runaway heights.
+- Changing the range to `Today` clears loaded stream events and resets
+  inspector scroll.
+
+Verification commands:
+
+- `bun run typecheck`
+- `bun run lint`
+- `bun run build:spy-ui`
+- `bun run test:spy-ui:unit`
+- `bun run test:spy-ui:e2e`
+- `bun run test`
+- `git diff --check`
diff --git a/docs/bugfix/SPY-QA-06-RCA.md b/docs/bugfix/SPY-QA-06-RCA.md
new file mode 100644
index 0000000..28c4d2d
--- /dev/null
+++ b/docs/bugfix/SPY-QA-06-RCA.md
@@ -0,0 +1,166 @@
+# SPY-QA-06 RCA: No Reset Path For Already-Selected Calls
+
+## Scope
+
+This RCA covers the highest-priority spy bug that is both open and reproducible
+in the current tree: `SPY-QA-06`.
+
+`PLAN.md` lists `SPY-QA-02` first among open P0 items, but the plan already
+records a no-repro attempt. I repeated the check before this RCA across 1280 x
+720, 1100 x 720, 1280 x 600, 980 x 600, and 1440 x 900 viewports. Focusing,
+clicking, tabbing to, and navigating to lower inspector sections all kept
+`main.scrollTop`, `documentElement.scrollTop`, `body.scrollTop`, and
+`window.scrollY` at `0`. In the current tree, `main.scrollHeight` also equals
+`main.clientHeight`, so there is no hidden top-level scroll range for the
+reported P0 behavior to enter. I am not proposing a P0 source change without a
+reproduction.
+
+## Reproduction Used
+
+- Built the current UI with `bun run build:spy-ui`.
+- Started the fixture-backed spy UI service:
+  `bun run src/spy/ui/test-server.ts --port 4699 --static dist/spy-ui`
+- Opened `http://127.0.0.1:4699/?since=0` in Playwright Chromium.
+- Used a 1280 x 720 viewport.
+- Selected the latest fixture call.
+- Jumped to Stream Events, loaded the stream events, and scrolled the inspector
+  to the bottom.
+- Clicked the already-selected timeline row again.
+- Clicked a different timeline row as a control case.
+
+## Proof
+
+Runtime measurements from the browser:
+
+```json
+[
+  {
+    "label": "selected latest call",
+    "selectedCallId": "call-fixture-flow-tool-result",
+    "activeRowLabel": "Open call call-fixture-flow-tool-result",
+    "main": { "scrollTop": 0, "scrollHeight": 720, "clientHeight": 720 },
+    "aside": { "scrollTop": 0, "scrollHeight": 1442, "clientHeight": 656 },
+    "openSections": [],
+    "visibleStreamCards": 0
+  },
+  {
+    "label": "deep stream inspection",
+    "selectedCallId": "call-fixture-flow-tool-result",
+    "activeRowLabel": "Open call call-fixture-flow-tool-result",
+    "main": { "scrollTop": 0, "scrollHeight": 720, "clientHeight": 720 },
+    "aside": { "scrollTop": 1321, "scrollHeight": 1977, "clientHeight": 656 },
+    "openSections": ["inspector-section-stream"],
+    "stream": { "top": -5, "bottom": 576, "height": 581 },
+    "health": { "top": 654, "bottom": 700, "height": 46 },
+    "visibleStreamCards": 6
+  },
+  {
+    "label": "after clicking selected row again",
+    "selectedCallId": "call-fixture-flow-tool-result",
+    "activeRowLabel": "Open call call-fixture-flow-tool-result",
+    "main": { "scrollTop": 0, "scrollHeight": 720, "clientHeight": 720 },
+    "aside": { "scrollTop": 1321, "scrollHeight": 1977, "clientHeight": 656 },
+    "openSections": ["inspector-section-stream"],
+    "stream": { "top": -5, "bottom": 576, "height": 581 },
+    "health": { "top": 654, "bottom": 700, "height": 46 },
+    "visibleStreamCards": 6
+  },
+  {
+    "label": "after clicking different row control case",
+    "selectedCallId": "call-fixture-flow-tool-use",
+    "activeRowLabel": "Open call call-fixture-flow-tool-use",
+    "main": { "scrollTop": 0, "scrollHeight": 720, "clientHeight": 720 },
+    "aside": { "scrollTop": 0, "scrollHeight": 1442, "clientHeight": 656 },
+    "openSections": [],
+    "visibleStreamCards": 0
+  }
+]
+```
+
+Key observations:
+
+- Re-clicking the already-selected row leaves `aside.scrollTop` at `1321`.
+- The Stream Events accordion remains open.
+- Previously loaded stream-event cards remain visible.
+- The selected call id does not change, so the operator stays in the deep detail
+  context instead of returning to the call title/summary.
+- Clicking a different row resets the inspector correctly: `aside.scrollTop`
+  returns to `0`, open sections clear, and stream-event cards are removed.
+
+## Source Evidence
+
+Relevant current code:
+
+- `src/spy/ui/src/App.tsx:202-208` resets stream state and inspector scroll only
+  when `selectedCallId` changes.
+- `src/spy/ui/src/App.tsx:768-774` wires every timeline row click to
+  `props.onSelect(summary.call.id)`.
+- `src/spy/ui/src/App.tsx:551-572` passes inspector callbacks, but there is no
+  explicit inspector reset callback or reset affordance.
+- `src/spy/ui/src/App.tsx:1032-1038` opens lower inspector sections through the
+  section navigator.
+- `src/spy/ui/src/App.tsx:1040-1047` resets only `aside.scrollTop`; it is not
+  callable from the already-selected row path.
+- `src/spy/ui/src/App.tsx:1537-1542` renders native `
` sections whose + open state can be changed by user interaction or section navigation. + +## Root Cause + +The UI currently treats inspector reset as a side effect of selection identity +change. That works for a genuinely different call, because `selectedCallId` +changes and the `useEffect` at `src/spy/ui/src/App.tsx:202-208` clears +`streamState` and scrolls the inspector to the top. + +Re-clicking the already-selected timeline row does not change React state: +`TimelineRow` calls `onSelect` with the same call id, React keeps +`selectedCallId` unchanged, and the reset effect does not run. The selected call +therefore keeps the previous inspector scroll position, loaded stream-event +state, and open lower section. + +This matches the operator problem in `PLAN.md`: once the inspector is deep in +Stream Events or another lower panel, there is no obvious way to return the +selected call to a clean top-of-inspector state unless the operator selects a +different call or changes the timeline context. + +## Proposed Fix + +Add an explicit selected-call reset path instead of relying only on selection +identity changes: + +- Introduce a `resetSelectedCallInspection()` helper in `App` that clears + `streamState`, resets the inspector scroll, and increments an inspector reset + token. +- Pass that helper to `Timeline` and call it when the clicked row id already + equals `selectedCallId`; keep the existing `setSelectedCallId` path for + different calls. +- Use the reset token as a `key` on the loaded inspector content, or pass it to + the section stack, so native `
` open state is remounted and lower + accordions close on reset. +- Add Playwright coverage for the exact reproduction above: load stream events, + scroll deep, re-click the selected row, and assert `aside.scrollTop === 0`, + no stream cards are rendered, and no lower section remains open. + +## Fix Status + +Implemented. + +Changed `src/spy/ui/src/App.tsx` so clicking the already-selected timeline row +now clears loaded stream state, increments an inspector reset key, and scrolls +the inspector back to the top. The reset key remounts the loaded inspector +content, which closes native `
` sections such as Stream Events. + +Added Playwright coverage in `src/spy/ui/e2e/spy-ui.playwright.ts` for the +reproduction path. The test loads stream events, scrolls the inspector deep, +clicks the selected row again, then verifies: + +- `aside.scrollTop` returns to `0`. +- Stream event cards are removed. +- `main.scrollTop` remains `0`. +- The Stream Events section is closed. + +Verification commands: + +- `bun run typecheck` +- `bun run lint` +- `bun run test:spy-ui:unit` +- `bun run test:spy-ui:e2e` diff --git a/docs/bugfix/SPY-QA-07-RCA.md b/docs/bugfix/SPY-QA-07-RCA.md new file mode 100644 index 0000000..7401e64 --- /dev/null +++ b/docs/bugfix/SPY-QA-07-RCA.md @@ -0,0 +1,189 @@ +# SPY-QA-07 RCA: Selected Call Pinning Is Implicit During Live Updates + +## Scope + +This RCA covers the highest-priority spy bug I could reproduce in the current +tree after reading `PLAN.md`: `SPY-QA-07`. + +Triage notes: + +- `SPY-QA-02` remains the only open P0 in `PLAN.md`, but it did not reproduce + in the current built UI. Opening lower inspector panels, loading stream + events, and jumping back to Health kept `main.scrollTop=0` and the global + header at `top=0`. +- `SPY-QA-06` also did not reproduce in the current built UI. Selecting a + different call after deep inspector scrolling reset the inspector to + `scrollTop=0`. +- The next open P1 with current evidence is `SPY-QA-07`. + +No implementation code has been changed for this RCA. + +## Reproduction Used + +Baseline: + +- Ran `bun run build:spy-ui`. +- Started the fixture-backed spy UI service with: + `bun src/spy/ui/test-server.ts --port 0 --static dist/spy-ui` +- Opened the built UI in the Codex in-app browser at + `http://127.0.0.1:35394/?since=0`. +- Used the production built assets from `dist/spy-ui`. + +The fixture server returns multiple Bedrock calls ordered newest first. This is +enough to prove the ambiguous UI state that happens after a live update inserts +a newer row above the currently selected call. + +## Higher-Priority No-Repro Proof + +`SPY-QA-02` was checked at the default `1280 x 720` viewport and at the original +manual-QA style `1159 x 862` viewport. + +At `1159 x 862`, after opening Health, opening Stream, loading stream events, +and jumping back to Health: + +```json +{ + "main": { + "clientHeight": 862, + "scrollHeight": 862, + "scrollTop": 0, + "overflowY": "hidden" + }, + "header": { "top": 0, "bottom": 64, "height": 64 }, + "aside": { + "clientHeight": 798, + "scrollHeight": 2321, + "scrollTop": 1523, + "overflowY": "auto" + }, + "openDetails": [ + { "id": "spy-inspector-stream", "top": -192, "height": 637 }, + { "id": "spy-inspector-health", "top": 523, "height": 319 } + ] +} +``` + +This proves the lower-panel navigation work is currently owned by the inspector +scroll container, not the hidden top-level `main` scroll container. + +For `SPY-QA-06`, after selecting another timeline call from the deep Health +position: + +```json +{ + "main": { + "clientHeight": 862, + "scrollHeight": 862, + "scrollTop": 0 + }, + "header": { "top": 0, "bottom": 64, "height": 64 }, + "aside": { + "clientHeight": 798, + "scrollHeight": 1457, + "scrollTop": 0 + }, + "openDetails": [] +} +``` + +That is a no-repro for the current tree, so this RCA does not propose a +`SPY-QA-02` or `SPY-QA-06` code change. + +## Proof + +With five calls visible, I selected the second row, +`call-fixture-flow-tool-use`, while the newer +`call-fixture-flow-tool-result` row remained above it. + +Browser measurement: + +```json +{ + "label": "selected-older-call-with-newer-row-above", + "rowCount": 5, + "firstRow": { + "ariaLabel": "Open call call-fixture-flow-tool-result", + "selectedVisual": false, + "text": "claude-sonnet-4-6 complete cache 2 07:09:19 PM input 2.6 KiB output 217 B usage 1.3k tok duration 1.0 s converse-stream..." + }, + "selectedRows": [ + { + "ariaLabel": "Open call call-fixture-flow-tool-use", + "selectedVisual": true, + "text": "claude-sonnet-4-6 complete cache 2 07:09:17 PM input 2.4 KiB output 479 B usage 1.2k tok duration 1.0 s converse-stream..." + } + ], + "inspectorId": "call-fixture-flow-tool-use", + "visiblePinnedText": false, + "headerSubtitle": "Live from now" +} +``` + +This proves the core ambiguous state: + +- The timeline can contain a newer call above the selected call. +- The inspector remains on the older selected call. +- The UI has no visible "pinned", "following", or "auto-follow" state. +- The only selected-call cue is the timeline row border/ring, which may be + offscreen in longer live timelines. + +## Source Evidence + +Relevant current code: + +- `src/spy/ui/src/App.tsx:254-258` handles `calls-changed` SSE events by + calling `loadCalls()`. +- `src/spy/ui/src/App.tsx:175-183` preserves the current `selectedCallId` when a + reloaded page still contains that call. +- `src/spy/ui/src/App.tsx:772-788` renders selected timeline state only as + visual row styling plus `aria-label="Open call "`. +- `src/spy/ui/src/App.tsx:862-876` renders the inspector header with model, + call id, status, and section navigation, but no pinned/following status. + +## Root Cause + +`SPY-QA-07` is a browser state-communication bug. + +The live-update behavior intentionally preserves `selectedCallId` when +`loadCalls()` receives a refreshed timeline that still contains the selected +call. That is a reasonable state model for an inspector: operators often need +the detail pane to stay on the call they are reading while new live rows arrive. + +The bug is that the UI does not communicate that state. Once a newer call is +inserted above the selected call, the inspector is effectively pinned to an +older call, but the inspector header still looks like ordinary selected-call +detail. There is no explicit pinned indicator, no "newer calls available" +signal, and no follow-latest control. + +## Proposed Fix + +Fix `SPY-QA-07` in the browser selection/live-update path: + +- Track whether the selected call is the newest visible call. +- When it is not, show a compact pinned-state badge in the inspector header, + such as `Pinned`, with a clear affordance to jump/follow the latest call. +- Keep the existing selected-call preservation behavior; changing that behavior + would interrupt active inspection during live capture. +- Add Playwright coverage for a live update that inserts a newer call above the + selected call and proves the inspector exposes the pinned state. + +## Fix Status + +Implemented. + +Changed `src/spy/ui/src/App.tsx` so the app derives whether the selected call +is older than the newest visible timeline row. When that happens, the inspector +sticky header now shows a `Pinned` badge and a `Follow Latest` button that +selects the newest visible call while preserving the existing pinned-inspection +behavior by default. + +Added Playwright coverage in `src/spy/ui/e2e/spy-ui.playwright.ts` proving that +selecting an older visible call exposes the pinned state, and that `Follow +Latest` selects the newest call and clears the pinned indicator. + +Verification commands: + +- `bun run typecheck` +- `bun run lint` +- `bun run test:spy-ui:unit` +- `bun run test:spy-ui:e2e` diff --git a/docs/bugfix/SPY-QA-08-RCA.md b/docs/bugfix/SPY-QA-08-RCA.md new file mode 100644 index 0000000..6adc6ed --- /dev/null +++ b/docs/bugfix/SPY-QA-08-RCA.md @@ -0,0 +1,230 @@ +# SPY-QA-08 RCA: Fixed Since URLs Render As Live And Range State Does Not Update The URL + +## Scope + +This RCA covers the highest-priority open spy bug I could prove in the current +tree after reading `PLAN.md`: `SPY-QA-08`. + +Triage notes: + +- `SPY-QA-02` remains the only open P0 in `PLAN.md`, but it still does not + reproduce in the current built UI. Opening lower inspector panels, loading + stream events, and jumping back to Health kept `main.scrollTop=0`, kept the + global header at `top=0`, and kept `main.scrollHeight === main.clientHeight`. +- `SPY-QA-06` also did not reproduce in the current built UI. Selecting a + different call after deep inspector scrolling reset the inspector to + `scrollTop=0`. +- The next open P1 with concrete current evidence is `SPY-QA-08`. + +No implementation code has been changed for this RCA. + +## Reproduction Used + +- Built the current UI with `bun run build:spy-ui`. +- Started the fixture-backed spy UI service with: + `bun src/spy/ui/test-server.ts --port 4689 --static dist/spy-ui` +- Opened the built UI at `http://127.0.0.1:4689/?since=0`. +- Used browser DOM measurements against the production build in the default + 1280 x 720 viewport. + +The fixture server is enough to prove the state bug because `?since=0` is a +fixed historical URL, regardless of whether the loaded fixture calls are recent. + +## Higher-Priority No-Repro Proof + +After opening Health, opening Stream, loading stream events, and jumping back to +Health: + +```json +{ + "viewport": { "width": 1280, "height": 720 }, + "main": { + "clientHeight": 720, + "scrollHeight": 720, + "scrollTop": 0, + "overflowY": "hidden", + "rect": { "top": 0, "bottom": 720, "height": 720 } + }, + "header": { "top": 0, "bottom": 64, "height": 64 }, + "aside": { + "clientHeight": 656, + "scrollHeight": 2265, + "scrollTop": 1609, + "overflowY": "auto", + "rect": { "top": 64, "bottom": 720, "height": 656 } + }, + "stream": { + "open": true, + "cards": 6, + "rect": { "top": -278, "bottom": 303, "height": 581 } + }, + "health": { + "open": true, + "rect": { "top": 381, "bottom": 700, "height": 319 } + } +} +``` + +This is a no-repro for `SPY-QA-02`: the lower-panel navigation is owned by the +inspector scroll container, and the hidden top-level `main` container does not +scroll. + +After selecting another timeline call from that deep inspector position: + +```json +{ + "main": { + "clientHeight": 720, + "scrollHeight": 720, + "scrollTop": 0, + "rect": { "top": 0, "bottom": 720, "height": 720 } + }, + "header": { "top": 0, "bottom": 64, "height": 64 }, + "aside": { + "clientHeight": 656, + "scrollHeight": 1457, + "scrollTop": 0, + "rect": { "top": 64, "bottom": 720, "height": 656 } + }, + "openDetails": [] +} +``` + +This is a no-repro for `SPY-QA-06`: selecting a different call resets the +inspector scroll and closes the opened detail sections in the current build. + +## Proof + +Initial load from a fixed historical URL: + +```json +{ + "href": "http://127.0.0.1:4689/?since=0", + "search": "?since=0", + "subtitle": "Live from now", + "activeRangeButtons": ["Live"], + "visibleRows": 5 +} +``` + +The UI is using `since=0` from the URL to load calls, but the visible range state +claims the page is `Live from now` and highlights `Live`. + +After clicking `10 min`: + +```json +{ + "href": "http://127.0.0.1:4689/?since=0", + "search": "?since=0", + "subtitle": "Since May 23, 07:17:53 PM", + "activeRangeButtons": ["10 min"], + "visibleRows": 5 +} +``` + +The internal range state changed, but the browser URL stayed `?since=0`. + +After reloading that same URL: + +```json +{ + "href": "http://127.0.0.1:4689/?since=0", + "search": "?since=0", + "subtitle": "Live from now", + "activeRangeButtons": ["Live"], + "visibleRows": 5 +} +``` + +This proves the round-trip failure: + +- A fixed `since` URL is not distinguished from true live mode on initial load. +- Changing the selected range does not update the URL. +- Reloading brings back the stale `since=0` query while the UI again labels the + view as `Live from now`. + +## Source Evidence + +Relevant current code: + +- `src/spy/ui/src/api.ts:28-35` parses `?since=` into a number and returns it + as the initial call query timestamp. +- `src/spy/ui/src/App.tsx:124-126` initializes `preset` independently as + `"live"` while initializing `since` and `customStart` from + `initialSinceFromLocation(window.location)`. +- `src/spy/ui/src/App.tsx:163-165` passes the parsed `since` value into the + call-list API query. +- `src/spy/ui/src/App.tsx:365-383` updates React state when a range changes, + but does not update `window.history` or the query string. +- `src/spy/ui/src/App.tsx:477-479` renders `Live from now` solely from + `preset === "live"`, even when `since` came from a fixed URL. +- `src/spy/ui/src/App.tsx:701-710` makes the active segment visual depend on + the same `preset` state, not on the URL-derived mode. + +## Root Cause + +`SPY-QA-08` is a browser state-model bug. + +The app stores two related pieces of timeline state separately: + +- `since`, which controls the API query and may come from `window.location`. +- `preset`, which controls the range label and active range segment. + +On startup, `since` is hydrated from the URL, but `preset` is always initialized +to `"live"`. That creates an impossible state: a fixed historical `since` value +with a live-mode label. + +Range changes have the inverse problem. `setPresetSince()` and +`applyCustomStart()` update React state only. They never push or replace the +browser URL, so the URL can continue to advertise an old `since` value after the +visible range has changed. Reloading then rehydrates from the stale URL and +recreates the wrong live/fixed state. + +## Proposed Fix + +Fix `SPY-QA-08` by making the range URL and range state a single coherent model: + +- Derive the initial preset from `window.location.search`. + - No `since` query means true live mode. + - A valid `since` query means a fixed/custom range unless it exactly matches a + known preset that the URL explicitly records. +- Store enough URL state to distinguish true live mode from a fixed `since` + timestamp. A `mode` or `preset` query parameter would make this explicit. +- Update the URL with `history.replaceState` or `history.pushState` whenever the + operator changes Live, 10 min, 1 hour, Today, or Custom. +- Keep `./rootcell spy` launch URLs that include a viewer launch timestamp from + being labeled `Live from now`; those are fixed since URLs unless the URL + explicitly says they are live. +- Add UI unit or Playwright coverage that loads `/?since=0`, verifies it is not + labeled live, changes the range, verifies the URL changes, reloads, and + verifies the same range state is restored. + +## Fix Status + +Implemented. + +Changed `src/spy/ui/src/api.ts` so initial browser range state is parsed as a +coherent `{ preset, since }` pair. A URL with `?since=` but no explicit +`preset=live` now starts as a fixed/custom range instead of being labeled live. + +Changed `src/spy/ui/src/App.tsx` so range changes update the browser URL: + +- `Live` writes `preset=live` and removes `since`. +- `10 min`, `1 hour`, `Today`, and `Custom` write both `preset` and the fixed + `since` timestamp used by the API query. + +Added regression coverage: + +- `src/spy/ui/src/api.test.ts` covers fixed `since` parsing, explicit live mode, + invalid URL values, and canonical range URL construction. +- `src/spy/ui/e2e/spy-ui.playwright.ts` covers loading `/?since=0` as non-live, + changing to `10 min`, preserving that state through reload, switching to + `Live`, and preserving live state through reload. + +Verification commands: + +- `bun run typecheck` +- `bun test src/spy/ui/src --timeout 10000` +- `bun run lint` +- `bun run build:spy-ui` +- `bun run test:spy-ui:e2e` diff --git a/docs/bugfix/SPY-QA-09-RCA.md b/docs/bugfix/SPY-QA-09-RCA.md new file mode 100644 index 0000000..aa2ecbc --- /dev/null +++ b/docs/bugfix/SPY-QA-09-RCA.md @@ -0,0 +1,215 @@ +# SPY-QA-09 RCA: Relative Time Ranges Behave As Fixed Snapshots + +## Scope + +This RCA covers the highest-priority open spy bug I could prove in the current +tree after reading `PLAN.md`: `SPY-QA-09`. + +Triage notes: + +- `SPY-QA-02` remains the only open P0 in `PLAN.md`, but it did not reproduce in + the current production-built UI. Lower inspector navigation and stream-event + loading kept the top-level `main` fixed at `scrollTop=0`. +- `SPY-QA-06` is the first open P1, but it also did not reproduce. Selecting a + different call from a deep inspector scroll position reset the inspector to + `scrollTop=0` and closed the opened detail sections. +- `SPY-QA-09` is the first open P1 with concrete current evidence. + +No implementation code has been changed for this RCA. + +## Reproduction Used + +- Built the current UI with `bun run build:spy-ui`. +- Started the fixture-backed spy UI service with: + `bun src/spy/ui/test-server.ts --port 0 --static dist/spy-ui` +- The service selected `http://127.0.0.1:28375`. +- Opened the production build at `http://127.0.0.1:28375/?since=0`. +- Used Playwright Chromium in the default `1280 x 720` viewport. + +The fixture server is enough to prove this bug because range state and refresh +behavior are browser-side UI state. They do not depend on live provider traffic. + +## Higher-Priority No-Repro Proof + +After loading the app, selecting the first call, jumping to Health, opening +Stream Events, loading stream events, opening Raw Payloads, and jumping back to +Health: + +```json +{ + "viewport": { "width": 1280, "height": 720 }, + "main": { + "clientHeight": 720, + "scrollHeight": 720, + "scrollTop": 0, + "overflowY": "hidden", + "rect": { "top": 0, "bottom": 720, "height": 720 } + }, + "header": { "rect": { "top": 0, "bottom": 64, "height": 64 } }, + "aside": { + "clientHeight": 656, + "scrollHeight": 2303, + "scrollTop": 1647, + "overflowY": "auto", + "rect": { "top": 64, "bottom": 720, "height": 656 } + }, + "openDetails": [ + "spy-inspector-stream", + "spy-inspector-raw", + "spy-inspector-health" + ], + "rowCount": 5 +} +``` + +This is a no-repro for `SPY-QA-02`: the scroll movement stayed inside the +inspector, and the hidden top-level `main` did not scroll or push the global +header offscreen. + +After forcing the inspector to its deepest scroll position and selecting another +timeline call: + +```json +{ + "before": { + "aside": { "scrollTop": 1647, "scrollHeight": 2303, "clientHeight": 656 }, + "openDetails": [ + "spy-inspector-stream", + "spy-inspector-raw", + "spy-inspector-health" + ] + }, + "after": { + "main": { "scrollTop": 0, "scrollHeight": 720, "clientHeight": 720 }, + "header": { "rect": { "top": 0, "bottom": 64, "height": 64 } }, + "aside": { "scrollTop": 0, "scrollHeight": 1442, "clientHeight": 656 }, + "openDetails": [] + } +} +``` + +This is a no-repro for `SPY-QA-06`: the current build resets inspector scroll +and panel state when the selected call changes. + +## Proof + +After clicking the `10 min` range button: + +```json +{ + "href": "http://127.0.0.1:28375/?since=1779579245&preset=10m", + "subtitle": "Since May 23, 07:34:05 PM", + "active": ["10 min"], + "since": 1779579245, + "now": 1779579845, + "ageSeconds": 600 +} +``` + +After waiting 3.2 seconds and clicking the in-app `Refresh calls` button: + +```json +{ + "href": "http://127.0.0.1:28375/?since=1779579245&preset=10m", + "subtitle": "Since May 23, 07:34:05 PM", + "active": ["10 min"], + "since": 1779579245, + "now": 1779579848, + "ageSeconds": 603 +} +``` + +This proves the current UI labels the selected range as `10 min` while using a +fixed `since` timestamp. Refreshing the app's call list does not advance the +window start. The query remains `preset=10m`, the active control remains +`10 min`, and the visible subtitle stays fixed even though the real window age +has moved past 10 minutes. + +The same source path handles `1 hour`, so the bug applies to both relative range +buttons listed in `PLAN.md`. + +## Source Evidence + +Relevant current code: + +- `src/spy/ui/src/format.ts:21-30` computes `10m` and `1h` as relative offsets + from the current clock only when `secondsForPreset()` is called. +- `src/spy/ui/src/App.tsx:366-373` calls `secondsForPreset()` only when the + operator clicks a preset button, then stores the resulting absolute `since`. +- `src/spy/ui/src/App.tsx:383-388` persists that absolute `since` into React + state, the custom datetime value, and the URL query. +- `src/spy/ui/src/App.tsx:493-495` makes `Refresh calls` call `loadCalls()` only. + It does not recompute `since` for relative presets. +- `src/spy/ui/src/App.tsx:160-190` sends the stored `since` value to the call + list API. +- `src/spy/ui/src/App.tsx:481-482` renders the subtitle from the stored `since`, + while the active range button still says `10 min` or `1 hour`. +- `src/spy/ui/src/api.ts:70-83` records the current absolute timestamp in the + URL as `preset=10m&since=` or + `preset=1h&since=`. + +## Root Cause + +`SPY-QA-09` is a browser range-state model bug. + +The UI represents a relative preset with the same state shape as a custom fixed +range: `{ preset, since }`. When the user selects `10 min` or `1 hour`, the app +immediately resolves the relative preset into an absolute timestamp and then +only stores that timestamp. Later refreshes have no branch that says "this is a +relative preset; recompute its start before querying." As a result, the label +continues to advertise a relative range while the query behaves like a fixed +snapshot. + +The URL model reinforces the same problem by storing both `preset=10m` and a +fixed `since`. Reloading can restore the old fixed timestamp while still making +the UI present the range as `10 min`. + +## Proposed Fix + +Fix `SPY-QA-09` by making relative and fixed range state explicit: + +- Treat `10m`, `1h`, and `today` as rolling presets. +- Keep `live` as a fixed viewer-session start until the operator explicitly + selects `Live` again. +- Treat `custom` as the only fixed user-selected timestamp range. +- Derive the API `since` timestamp from the active preset when loading calls, + refreshing, and handling `calls-changed` SSE updates. +- For `10m` and `1h`, recompute `since` before each refresh/load so the visible + range is actually rolling. +- Preserve URL shareability by recording `preset=10m` or `preset=1h` without + requiring a stale fixed `since`, or by ignoring stored `since` for dynamic + presets on load. +- Add unit coverage for dynamic range URL parsing and Playwright coverage that + selects `10 min`, advances time, clicks refresh, and verifies the query start + advances. + +`Today` can continue to resolve to the current local start of day; recomputing +it on refresh is harmless and handles midnight rollover correctly. + +## Fix Status + +Implemented on 2026-05-23. + +Changed `src/spy/ui/src/api.ts` so dynamic preset URLs such as `preset=10m`, +`preset=1h`, and `preset=today` resolve from the current clock instead of a +stale stored `since` value. The URL writer now removes `since` for those dynamic +presets and keeps `since` only for `custom`. + +Changed `src/spy/ui/src/App.tsx` so non-paginated call loads recompute `since` +for rolling presets before querying the API. Pagination keeps the current +window start so cursor queries do not mix windows. `Live from now` remains a +fixed viewer-session range until the operator clicks `Live` again. + +Added regression coverage: + +- `src/spy/ui/src/api.test.ts` verifies dynamic preset parsing, current-clock + `since` resolution, and canonical URLs without stale dynamic `since` values. +- `src/spy/ui/e2e/spy-ui.playwright.ts` verifies that refreshing `10 min` + advances the API `since` parameter while preserving `preset=10m` in the URL. + +Verification commands: + +- `bun test src/spy/ui/src --timeout 10000` +- `bun run typecheck` +- `bun run test:spy-ui:e2e` +- `bun run lint` diff --git a/docs/bugfix/SPY-QA-10-RCA.md b/docs/bugfix/SPY-QA-10-RCA.md new file mode 100644 index 0000000..49809aa --- /dev/null +++ b/docs/bugfix/SPY-QA-10-RCA.md @@ -0,0 +1,205 @@ +# SPY-QA-10 RCA: Health Is Hidden When Timeline Filters Return No Calls + +## Scope + +This RCA covers the highest-priority open spy bug I could prove in the current +tree after reading `PLAN.md`: `SPY-QA-10`. + +Triage notes: + +- `SPY-QA-02` remains the only open P0 in `PLAN.md`, but it still does not + reproduce in the current production-built UI. +- `SPY-QA-10` is the first open P1 with concrete current evidence. + +This RCA was written before implementation. The fix status below now documents +the implemented change and verification. + +## Reproduction Used + +- Built the current UI with `bun run build:spy-ui`. +- Started the fixture-backed spy UI service with: + `bun src/spy/ui/test-server.ts --port 4680 --static dist/spy-ui` +- Opened the production build at `http://127.0.0.1:4680/?since=0`. +- Used the Codex in-app browser in the default `1280 x 720` viewport. +- Verified the fixture service had 5 completed provider calls and 0 pending + calls. + +The fixture server is enough to prove this bug because the failure is in browser +selection and inspector rendering. It does not depend on live provider traffic. + +## Higher-Priority No-Repro Proof + +After loading the app, selecting the first call, jumping to Health, jumping to +Stream Events, loading stream events, jumping to Raw Payloads, and jumping back +to Health: + +```json +{ + "viewport": { "width": 1280, "height": 720 }, + "main": { + "clientHeight": 720, + "scrollHeight": 720, + "scrollTop": 0, + "overflowY": "hidden", + "rect": { "top": 0, "bottom": 720, "height": 720 } + }, + "header": { "rect": { "top": 0, "bottom": 64, "height": 64 } }, + "aside": { + "clientHeight": 656, + "scrollHeight": 2318, + "scrollTop": 1662, + "overflowY": "auto", + "rect": { "top": 64, "bottom": 720, "height": 656 } + }, + "openDetails": [ + "spy-inspector-stream", + "spy-inspector-raw", + "spy-inspector-health" + ], + "rowCount": 5 +} +``` + +This is a no-repro for `SPY-QA-02`: lower inspector navigation and stream-event +loading kept top-level `main.scrollTop=0`, and the global header stayed pinned at +`y=0`. + +## Proof + +With the same loaded UI, I changed the Status filter to `Pending`. The fixture +data has no pending calls, so the call list became empty. + +Browser measurements immediately after selecting `Pending`: + +```json +{ + "statusFilterValue": "pending", + "statusFilterText": "Pending", + "timelineRowCount": 0, + "timelineEmptyText": "No provider calls in this range.", + "inspectorHeading": "Call Inspector", + "inspectorSubtext": "Select a provider call.", + "inspectorEmptyText": "Select a timeline row to inspect the provider call.", + "inspectorNavCount": 1, + "healthSectionCount": 0, + "healthTextVisible": false +} +``` + +Key observations: + +- The active filter is definitely `Pending`. +- The timeline has no rows. +- The inspector has been replaced by the empty call-selection state. +- The health section is not in the DOM (`healthSectionCount: 0`). +- Health labels such as `Dropped captures`, `Last ingest`, and `Schema` are not + visible. +- A stale inspector section nav can remain, but its `Health` target no longer + exists. + +The service health API is still valid at the same time: + +```json +{ + "ok": true, + "service": { + "enabled": true, + "bind": "127.0.0.1", + "port": 4680, + "retentionDays": 7, + "maxBytes": 6442450944, + "spoolMaxBytes": 1073741824, + "storeRaw": false, + "staticAssets": true + }, + "store": { + "schemaVersion": 2, + "dbSizeBytes": 274432, + "dbUsedBytes": 274432, + "spoolSizeBytes": 0, + "providerCallCount": 5, + "pendingCallCount": 0, + "droppedCaptureCount": 0, + "lastIngestAt": 1779581016.454 + } +} +``` + +This proves the health data exists and the backend is healthy, but the UI makes +that data unreachable when no timeline call is selected. + +## Source Evidence + +Relevant current code: + +- `PLAN.md:895-897` defines `SPY-QA-10`: keep service Health reachable + independently of selected calls. +- `src/spy/ui/src/App.tsx:176-184` chooses `page.items[0]?.call.id` as the + selected call after a non-append load. When a filter returns no calls, that + resolves to `undefined`. +- `src/spy/ui/src/App.tsx:300-302` derives `selectedSummary` only by finding the + selected id in the current visible call page. +- `src/spy/ui/src/App.tsx:876-877` renders `EmptyInspector` whenever + `props.summary === null`. +- `src/spy/ui/src/App.tsx:1008-1013` renders the Health section only inside + `InspectorContent`, which is only used for a loaded selected call. +- `src/spy/ui/src/App.tsx:1516-1536` has the independent health panel data, but + there is no selected-call-independent route to render it. +- `src/spy/ui/src/App.tsx:211-219` and `src/spy/ui/src/api.ts:170-173` already + fetch `/api/health` independently of call detail, so the problem is not API + availability. + +## Root Cause + +`SPY-QA-10` is a browser composition bug. + +The app stores service health independently, but the only full health display is +nested inside the selected-call inspector content. When filters or search return +an empty page, `loadCalls()` clears the effective selected call by resolving the +next selection to `undefined`. That makes `selectedSummary` become `null`, and +`CallInspector` renders the empty call-selection state instead of +`InspectorContent`. Because the Health section is inside `InspectorContent`, it +is removed along with the call detail sections. + +This couples service health to call selection even though `/api/health` is +call-independent. + +## Proposed Fix + +Fix `SPY-QA-10` by giving service health a selected-call-independent render path: + +- Keep the call-native inspector behavior for selected call details. +- When no call is selected, render a useful inspector empty state that includes + the same service health panel or a dedicated compact health/status panel. +- Hide the inspector section nav unless its target sections actually exist, or + include a Health target that exists in the no-call state. +- Preserve current selected-call health rendering so operators can still see + health while inspecting a call. +- Add Playwright coverage for an empty filter result that verifies `/api/health` + data remains visible in the inspector. + +Expected proof after the fix: + +- Applying a `Pending` filter with zero pending calls should still show service + health values such as Enabled, DB size, Spool size, Calls, Pending, and Schema. +- The no-call inspector should not expose dead section navigation targets. +- `/api/health` and visible health values should agree. + +## Fix Status + +Implemented on 2026-05-23. + +Changed `src/spy/ui/src/App.tsx` so the no-call inspector state renders a +selected-call-independent Service Health panel using the existing health data. +The inspector section navigator now requires an actual selected call, so empty +filter/search results no longer expose dead call-section targets. + +Added Playwright coverage in `src/spy/ui/e2e/spy-ui.playwright.ts` for applying +a `Pending` filter with zero matching calls and verifying that service health +remains visible in the inspector. + +Verification commands: + +- `bun run typecheck` +- `bun run lint` +- `bun run test:spy-ui:e2e` diff --git a/docs/bugfix/SPY-QA-11-RCA.md b/docs/bugfix/SPY-QA-11-RCA.md new file mode 100644 index 0000000..54910e0 --- /dev/null +++ b/docs/bugfix/SPY-QA-11-RCA.md @@ -0,0 +1,163 @@ +# SPY-QA-11 RCA: Empty Timeline Copy Ignores Active Filters + +## Scope + +This RCA covers the highest-priority open spy bug I could prove in the current +tree after reading `PLAN.md`: `SPY-QA-11`. + +Triage notes: + +- `PLAN.md` marks `SPY-QA-01` through `SPY-QA-10` complete. +- `SPY-QA-11` is the first unchecked item in the prioritized handoff, and it is + a P1 issue. +- This document was written before implementation. No application code has been + changed for this bug yet. + +## Reproduction Used + +I rebuilt the current spy UI and ran the existing fixture-backed Playwright +coverage against the production artifact. + +Targeted proof commands: + +```sh +bun run build:spy-ui +./node_modules/.bin/playwright test -c src/spy/ui/playwright.config.ts -g "keeps service health visible" +``` + +Result: + +```text +bun run build:spy-ui +✓ built in 117ms + +./node_modules/.bin/playwright test -c src/spy/ui/playwright.config.ts -g "keeps service health visible" +1 passed +``` + +The first sandboxed attempt failed before running the test because the local +test server could not bind its localhost port. I reran the same targeted +Playwright command with localhost/browser permission, and the test completed +successfully. + +## Proof + +The passing Playwright test proves the current bad copy is present in a filtered +empty result state: + +- The fixture-backed UI starts with 5 visible provider-call rows. +- The test selects the `Pending` status filter. +- The visible timeline row count becomes 0. +- The same test then expects and finds the text: + `No provider calls in this range.` + +That is the exact failure mode described in `PLAN.md`: there are calls in the +range, but active filters exclude them, and the UI claims the range has no +provider calls. + +This existing test currently locks in the incorrect copy: + +```ts +await expect(page.getByTestId("timeline-row")).toHaveCount(5); +await page.getByLabel("Filter by status").selectOption("pending"); +await expect(page.getByTestId("timeline-row")).toHaveCount(0); +await expect(page.getByText("No provider calls in this range.")).toBeVisible(); +``` + +There is a second filtered/search empty-state test with the same assertion after +choosing a mismatched operation filter and after adding a search term while that +filter is still active. + +## Source Evidence + +Relevant current code: + +- `PLAN.md:903-905` defines `SPY-QA-11`: a `Pending` filter can produce the + copy `No provider calls in this range` even though filters excluded calls. +- `src/spy/ui/src/App.tsx:128-134` stores provider, model, operation, status, + and block-kind filters in React state. +- `src/spy/ui/src/App.tsx:163-171` sends `search`, provider, model, operation, + and status to the API when loading timeline calls. +- `src/spy/ui/src/App.tsx:528-540` passes the filter state into + `TimelineControls`, so the controls know which filters are active. +- `src/spy/ui/src/App.tsx:554-563` renders `Timeline` with only `calls`, + selection, loading, pagination, and callbacks. It does not pass `search`, + filter state, or any unfiltered range count into `Timeline`. +- `src/spy/ui/src/App.tsx:742-749` confirms the `Timeline` props have no + filter/search context. +- `src/spy/ui/src/App.tsx:759-763` hard-codes the only empty-state message to + `No provider calls in this range.` whenever `calls.length === 0` and loading + is false. +- `src/spy/ui/e2e/spy-ui.playwright.ts:328-335` proves the filtered empty state + with 5 initial rows, a `Pending` status filter, 0 resulting rows, and the + range-only empty copy. +- `src/spy/ui/e2e/spy-ui.playwright.ts:314-324` similarly proves the same copy + after an operation filter and search return no rows. + +## Root Cause + +`SPY-QA-11` is a browser presentation-state bug. + +The app correctly tracks active search/filter inputs and sends them to the API, +but the timeline empty-state renderer is isolated from that context. `Timeline` +receives the already-filtered page of calls and a loading flag. When the page is +empty, it has no way to distinguish these cases: + +- There are no provider calls in the selected time range. +- There are provider calls in the selected time range, but active filters or + search excluded them. + +Because the component cannot tell those cases apart, it always renders the +range-only message. The current e2e tests assert that behavior, so the test suite +does not protect the intended UX yet. + +## Proposed Fix + +Fix `SPY-QA-11` by making the timeline empty state aware of the active query +context: + +- Compute whether timeline query constraints are active in `App` from submitted + search plus provider/model/operation/status filters. +- Pass a small empty-state descriptor into `Timeline`, rather than making + `Timeline` infer app-level state. +- Keep the existing `No provider calls in this range.` copy only when no + search/filter constraints are active. +- When constraints are active, render copy that says the current filters/search + excluded calls in the selected range. +- Include the active constraints in concise supporting copy if it can be done + without turning this into `SPY-QA-12` or `SPY-QA-13`. +- Update Playwright coverage so the `Pending` filtered-empty case expects the + filter-aware copy, while the clear-data/no-range-data case still expects the + range-only copy. + +Expected proof after the fix: + +- Applying a `Pending` filter when the range has completed calls but no pending + calls should no longer show `No provider calls in this range.` +- Clearing data, where the range truly has no calls, should still show the + range-only empty-state copy. +- Existing health visibility coverage from `SPY-QA-10` should remain intact. + +## Fix Status + +Implemented on 2026-05-24. + +Changed `src/spy/ui/src/App.tsx` so `App` derives whether the timeline query is +constrained by submitted search text or provider/model/operation/status filters, +then passes that empty-state descriptor into `Timeline`. `Timeline` now keeps +`No provider calls in this range.` only for unconstrained range-empty states and +uses `No provider calls match the current search or filters.` when active query +constraints produce an empty list. + +Updated `src/spy/ui/e2e/spy-ui.playwright.ts` so filtered/search empty states +expect the query-aware copy and assert the range-only copy is absent. The +clear-data test still expects the range-only copy, proving true range-empty +states were preserved. + +Verification commands: + +- `bun run typecheck` +- `bun run lint` +- `bun run build:spy-ui` +- `bun run test:spy-ui:unit` +- `./node_modules/.bin/playwright test -c src/spy/ui/playwright.config.ts` diff --git a/docs/bugfix/SPY-QA-14-RCA.md b/docs/bugfix/SPY-QA-14-RCA.md new file mode 100644 index 0000000..fa4ba06 --- /dev/null +++ b/docs/bugfix/SPY-QA-14-RCA.md @@ -0,0 +1,201 @@ +# SPY-QA-14 RCA: Search Scope Excludes Visible Call And Model Identifiers + +## Scope + +This RCA covers the highest-priority open spy bug I could prove in the current +tree after reading `PLAN.md`: `SPY-QA-14`. + +Triage notes: + +- `PLAN.md` marks `SPY-QA-01` through `SPY-QA-13` complete or closed. +- `SPY-QA-14` is the first unchecked item in the prioritized handoff, and it is + a P1 issue. +- This document was written before implementation. No application source code + has been changed for this bug yet. + +## Reproduction Used + +I rebuilt the current spy UI and ran the fixture-backed service: + +```sh +bun run build:spy-ui +bun src/spy/ui/test-server.ts --port 0 --static dist/spy-ui +``` + +The sandboxed server start failed on localhost bind, so I reran the same server +command with localhost permission. The server started at: + +```text +rootcell spy UI test server listening on http://127.0.0.1:39845 +``` + +## Browser Proof + +I launched headless Chromium against the production-built UI and captured the +current DOM behavior. Result: + +```json +{ + "initialRows": 5, + "searchPlaceholder": "Search text", + "searchAriaLabel": "Search normalized text", + "firstRowAriaLabel": "Open call call-fixture-flow-tool-result", + "visibleInspectorCallId": "call-fixture-flow-tool-result", + "rowsAfterCallIdSearch": 0, + "emptyTextAfterCallIdSearch": "No provider calls match the current search or filters.", + "rowsAfterModelFragmentSearch": 0, + "emptyTextAfterModelSearch": "No provider calls match the current search or filters.", + "rowsAfterNormalizedTextSearch": 5 +} +``` + +This proves the bug as currently shipped: + +- The UI exposes `call-fixture-flow-tool-result` as a call identifier. +- The search placeholder only says `Search text`. +- Searching the visible call id returns zero rows. +- Searching the visible model fragment `sonnet` returns zero rows. +- Searching normalized body text, `Fixture capture`, returns five rows. + +## API Proof + +The same fixture service proves the backend scope mismatch. + +Baseline calls include visible searchable-looking identifiers: + +```sh +curl -sS 'http://127.0.0.1:39845/api/calls?since=0&limit=10' +``` + +The response includes five calls, including: + +```json +{ + "id": "call-fixture-flow-tool-result", + "model_id": "us.anthropic.claude-sonnet-4-6", + "request_flow_id": "fixture-flow-tool-result", + "response_flow_id": "fixture-flow-tool-result" +} +``` + +But searching those visible fields returns no matches: + +```sh +curl -sS 'http://127.0.0.1:39845/api/search?since=0&limit=10&q=call-fixture-flow-tool-result' +``` + +```json +{"items":[]} +``` + +```sh +curl -sS 'http://127.0.0.1:39845/api/search?since=0&limit=10&q=sonnet' +``` + +```json +{"items":[]} +``` + +Normalized text search still works: + +```sh +curl -sS 'http://127.0.0.1:39845/api/search?since=0&limit=10&q=Fixture%20capture' +``` + +That response returns all five calls. + +## Source Evidence + +Relevant current code: + +- `PLAN.md:924-926` defines `SPY-QA-14`: search scope must clarify whether it + includes call ids, model ids, or metadata. +- `PLAN.md:995-997` records the specific evidence note: search scopes to + normalized block text and call-id fragments return no results while the + placeholder says `Search text`. +- `src/spy/ui/src/App.tsx:659-663` renders the search input with + `aria-label="Search normalized text"` but visible placeholder `Search text`. +- `src/spy/ui/src/App.tsx:829` exposes timeline row call ids in accessible names. +- `src/spy/ui/src/App.tsx:837` shows the shortened model id in timeline rows. +- `src/spy/ui/src/App.tsx:914-918` shows the shortened model id and call id in + the inspector header. +- `src/spy/ui/src/api.ts:135-137` sends any submitted search string to + `/api/search?q=...`. +- `src/spy/store.ts:548-580` implements search by matching only + `normalized_block_fts` rows joined back to provider calls. +- `src/spy/migrations.ts:82-83` defines `normalized_block_fts` with only + `block_id` and normalized block `text`. +- `src/spy/migrations.ts:148-153` populates the FTS table only from + `normalized_block.text`. + +## Root Cause + +`SPY-QA-14` is a search contract and presentation mismatch. + +The service implements search as normalized block text search only. It does not +index or query `provider_call` identifiers such as `id`, `model_id`, +`request_flow_id`, or `response_flow_id`. The browser then presents visible and +accessible identifiers next to a generic visible placeholder, `Search text`, +without telling the operator that those identifiers are outside the search +scope. + +That combination makes visible operational keys look searchable even though the +backend cannot match them. + +## Proposed Fix + +Fix `SPY-QA-14` by making search scope explicit and useful for visible +identifiers: + +- Extend `/api/search` to include exact or substring matches for provider-call + metadata that the UI already exposes: call id, request flow id, response flow + id, model id, provider, operation, and status. +- Preserve the existing normalized-block FTS behavior for prompt/response text. +- Deduplicate calls when both normalized text and metadata match. +- Update the visible placeholder and accessible label to name the real scope, + for example `Search text, call ID, or model`. +- Add unit coverage at the store/API-helper layer proving call id and model id + searches return the expected call. +- Add Playwright coverage proving a visible call id and visible model fragment + both return timeline rows, while normalized text search continues to work. + +Expected proof after the fix: + +- `q=call-fixture-flow-tool-result` returns the matching call. +- `q=sonnet` returns the fixture calls with model + `us.anthropic.claude-sonnet-4-6`. +- `q=Fixture capture` still returns normalized-text matches. +- The search control no longer exposes the ambiguous visible placeholder + `Search text`. + +## Fix Status + +Implemented on 2026-05-24. + +Changed `src/spy/store.ts` so `/api/search` matches both normalized block FTS +text and visible provider-call metadata: call id, request flow id, response flow +id, model id, provider, operation, and status. Metadata matches are unioned with +normalized text matches and deduplicated before the existing time/provider/ +model/operation/status filters and pagination are applied. + +Changed `src/spy/ui/src/App.tsx` so the search input now labels the real scope +as `Search text, call ID, or model`. + +Added regression coverage: + +- `src/spy/store.test.ts` proves call id, flow id, model fragment, normalized + text, and filtered metadata searches. +- `src/spy/service.test.ts` proves the HTTP `/api/search` endpoint returns + matches for a call id and model fragment. +- `src/spy/ui/e2e/spy-ui.playwright.ts` proves the production UI can search a + visible call id, visible model fragment, and normalized text. + +Verification commands: + +- `bun test src/spy/store.test.ts --timeout 10000` +- `bun test src/spy/service.test.ts --timeout 10000` +- `bun run typecheck` +- `bun run lint` +- `bun run test:spy-ui:unit` +- `bun run test` +- `bun run test:spy-ui:e2e` diff --git a/docs/bugfix/SPY-QA-17-RCA.md b/docs/bugfix/SPY-QA-17-RCA.md new file mode 100644 index 0000000..8dd3da2 --- /dev/null +++ b/docs/bugfix/SPY-QA-17-RCA.md @@ -0,0 +1,210 @@ +# SPY-QA-17 RCA: Diff Baseline Scope Is Implicit + +## Scope + +This RCA covers the highest-priority open spy bug I could prove in the current +tree after reading `PLAN.md`: `SPY-QA-17`. + +Triage notes: + +- `PLAN.md` marks all P0 spy bugs closed. +- `PLAN.md` marks `SPY-QA-01` through `SPY-QA-16` complete or closed. +- `SPY-QA-17` is the first unchecked item in the prioritized handoff, and it is + a P1 issue. +- This document was initially written before implementation. The fix status + below records the later code changes. + +## Reproduction Used + +I used the existing sanitized Bedrock/Pi fixture pair and the real spy store +code to create three comparable provider calls: + +- `call-qa17-old` at `started_at=1000` +- `call-qa17-mid` at `started_at=1100` +- `call-qa17-visible` at `started_at=1200` + +Then I loaded the same timeline range behavior the UI uses by asking the store +for calls with `since=1150`. That range can only show `call-qa17-visible`. + +The one-off command was equivalent to this script: + +```sh +bun --eval ' +import { mkdtempSync, rmSync, readFileSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { openSpyStore } from "./src/spy/store.ts"; +import { + SpoolEventSchema, + SpoolRequestEventSchema, + SpoolResponseEventSchema, +} from "./src/spy/schemas.ts"; + +const fixturePath = new URL("./src/spy/fixtures/bedrock-pi-us-sonnet-4-6.ndjson", import.meta.url); +const events = readFileSync(fixturePath, "utf8") + .trim() + .split("\n") + .map((line) => SpoolEventSchema.parse(JSON.parse(line))); +const baseRequest = events.find((event) => event.direction === "request" && event.flow_id === "fixture-flow-simple"); +const baseResponse = events.find((event) => event.direction === "response" && event.flow_id === "fixture-flow-simple"); +if (!baseRequest || !baseResponse) { + throw new Error("missing simple fixture pair"); +} + +const root = mkdtempSync(join(tmpdir(), "rootcell-spy-qa17-")); +const store = openSpyStore({ dbPath: join(root, "spy.sqlite"), spoolDir: join(root, "spool") }); +try { + for (const [flowId, ts] of [["qa17-old", 1000], ["qa17-mid", 1100], ["qa17-visible", 1200]]) { + store.persistRequest(SpoolRequestEventSchema.parse({ ...baseRequest, flow_id: flowId, ts })); + store.persistResponse(SpoolResponseEventSchema.parse({ ...baseResponse, flow_id: flowId, ts: ts + 1 })); + } + const visible = store.listCallSummaries({ since: 1150, limit: 10 }); + const diff = store.getCallDiff("call-qa17-visible"); + console.log(JSON.stringify({ + visibleRangeSince: 1150, + visibleCallIds: visible.items.map((item) => item.call.id), + visibleCallStartedAts: visible.items.map((item) => item.call.started_at), + diffCallId: diff?.call.call.id ?? null, + diffPreviousCallId: diff?.previousCall?.call.id ?? null, + diffPreviousStartedAt: diff?.previousCall?.call.started_at ?? null, + previousIsOutsideVisibleRange: diff?.previousCall + ? diff.previousCall.call.started_at < 1150 + : null, + previousIsInVisibleRows: diff?.previousCall + ? visible.items.some((item) => item.call.id === diff.previousCall.call.id) + : null, + diffClassificationCounts: diff?.blocks.reduce((acc, entry) => { + acc[entry.classification] = (acc[entry.classification] ?? 0) + 1; + return acc; + }, {}) ?? null, + }, null, 2)); +} finally { + store.close(); + rmSync(root, { recursive: true, force: true }); +} +' +``` + +The command used `openSpyStore`, `persistRequest`, `persistResponse`, +`listCallSummaries`, and `getCallDiff` from the current implementation. It did +not patch or mock the diff query. + +## Store Proof + +The reproduction output was: + +```json +{ + "visibleRangeSince": 1150, + "visibleCallIds": [ + "call-qa17-visible" + ], + "visibleCallStartedAts": [ + 1200 + ], + "diffCallId": "call-qa17-visible", + "diffPreviousCallId": "call-qa17-mid", + "diffPreviousStartedAt": 1100, + "previousIsOutsideVisibleRange": true, + "previousIsInVisibleRows": false, + "diffClassificationCounts": { + "repeated": 10 + } +} +``` + +This proved the pre-fix behavior: + +- The visible timeline range contains only `call-qa17-visible`. +- The diff baseline for that visible call is `call-qa17-mid`. +- `call-qa17-mid` started before the visible range and is not in the visible + timeline rows. +- The diff still classifies blocks against that hidden baseline. + +## Pre-Fix Source Evidence + +Relevant code before the fix: + +- `PLAN.md:940-941` defines `SPY-QA-17`: live/ranged views can diff against a + prior request outside the visible range without saying so. +- `src/spy/store.ts:414-437` applies the active `since`, provider, model, + operation, status, cursor, and limit filters when listing timeline calls. +- `src/spy/store.ts:470-487` implements `getCallDiff(callId)` by selecting the + previous call with the same provider, model, and operation before the current + call. It does not accept or apply active timeline range/search/status filters. +- `src/spy/service.ts:200-207` exposes `GET /api/calls/:id/diff` with only the + call id. There is no query parameter for the current visible range or filter + context. +- `src/spy/ui/src/App.tsx:335-338` loads call detail and diff by call id only. + The active `since`, search, and filter state are not sent with the diff + request. +- Before the fix, `DiffPanel` rendered only + `Previous comparable request: · `. It did not explain that + the baseline was global to all stored comparable calls, nor whether it was + outside the current visible range. + +## Root Cause + +`SPY-QA-17` is not caused by incorrect diff matching. The store deliberately +computes a previous-comparable-request baseline across all persisted calls with +the same provider, model, and operation. + +The bug is that the browser presents that global baseline inside a ranged or +live timeline without stating its scope. The timeline is range/filter scoped, +but the diff baseline is not. Because the diff endpoint has no range/filter +context and the UI does not derive or display baseline visibility, the operator +can reasonably assume the previous request came from the visible timeline when +it may have come from older hidden capture data. + +## Proposed Fix + +Keep the current global previous-comparable-request behavior, but make the scope +explicit in the inspector: + +- Pass the active visible `since` value to the diff panel. +- In the Diff section, label the baseline as global to stored comparable calls, + not scoped to the visible timeline. +- When `previousCall.started_at < since`, add an explicit note or badge such as + `outside current range`. +- For Live mode, phrase the note as outside the current Live window instead of + implying the call is missing by mistake. +- Add Playwright coverage using a controlled diff response where the selected + visible call has a previous baseline older than the page's active `since` + value. The test should fail if the Diff section only shows the previous id and + timestamp without the outside-range scope text. + +Expected proof after the fix: + +- A ranged/live view still shows accurate repeated/new/changed counts. +- The same hidden-baseline scenario displays that the previous comparable + request is outside the current range. +- Existing diff API behavior remains compatible because the fix can be made in + the browser using data already present in `SpyCallDiff.previousCall`. + +## Fix Status + +Implemented on 2026-05-24. + +Changed `src/spy/ui/src/App.tsx` so the inspector passes the active timeline +range into the Diff section. The Diff section now labels the previous request +as a global baseline across stored comparable calls, and when that previous +request started before the active range it shows an explicit `outside current +range` or `outside current Live window` badge. + +The diff API and store behavior are unchanged: `GET /api/calls/:id/diff` still +compares against the previous stored call with the same provider, model, and +operation. The fix makes that global scope visible in the browser. + +Added Playwright regression coverage in `src/spy/ui/e2e/spy-ui.playwright.ts`. +The new test routes a visible call at `started_at=2100` with an active +`since=2000`, then returns a diff baseline at `started_at=1900`. It verifies +that the Diff section shows the previous call id, the `outside current range` +badge, and the global-baseline explanation. + +Verification commands: + +- `bun run typecheck` +- `bun run lint` +- `bun run test:spy-ui:unit` +- `bun run test:spy-ui:e2e` with localhost/browser permission +- `git diff --check` diff --git a/docs/bugfix/SPY-QA-18-RCA.md b/docs/bugfix/SPY-QA-18-RCA.md new file mode 100644 index 0000000..21cb81b --- /dev/null +++ b/docs/bugfix/SPY-QA-18-RCA.md @@ -0,0 +1,200 @@ +# SPY-QA-18 RCA: Timeline Hides Provider Cache Read/Write + +## Scope + +This RCA covers the highest-priority open spy bug in `PLAN.md`: `SPY-QA-18`. + +Triage notes: + +- `PLAN.md` marks all P0 spy bugs closed. +- `PLAN.md` marks `SPY-QA-01` through `SPY-QA-17` complete or closed. +- `SPY-QA-18` is the first unchecked item in the prioritized handoff, and it is + a P1 issue. +- This document was written before any product-code fix for `SPY-QA-18`. + +## Reproduction Used + +I used the production-built spy UI and a headless Chromium run with mocked API +responses matching the real cache-heavy shape recorded in `PLAN.md`. + +Commands used: + +- `bun run build:spy-ui` +- `python3 -m http.server 4691 --bind 127.0.0.1 --directory dist/spy-ui` +- A one-off Playwright script opened + `http://127.0.0.1:4691/?preset=today`, fulfilled `/api/*` with a single + complete Bedrock call, and read the timeline row plus inspector text. + +The mocked call summary intentionally contained both provider cache usage and +request cache markers: + +```json +{ + "usage": { + "inputTokens": 10, + "outputTokens": 98, + "cacheReadTokens": 5200, + "cacheWriteTokens": 81, + "totalTokens": 5389 + }, + "cacheMarkerCount": 2, + "requestBlockCount": 26, + "responseBlockCount": 3 +} +``` + +## Proof + +Runtime browser output from the current UI: + +```json +{ + "apiSummaryUsage": { + "inputTokens": 10, + "outputTokens": 98, + "cacheReadTokens": 5200, + "cacheWriteTokens": 81, + "totalTokens": 5389 + }, + "apiSummaryCacheMarkerCount": 2, + "timelineRowText": "claude-haiku-4-5-20251001-v1:0\ncomplete\ncache 2\n09:25:03 PM\ninput 18 KiB\noutput 1.2 KiB\nusage 5.4k tok\nduration 1.4 s\nconverse-stream \u00b7 26 request blocks \u00b7 3 response blocks", + "usageSectionText": "Usage Records\ninput\n10\noutput\n98\ncache read\n5,200\ncache write\n81\ntotal\n5,389", + "requestCompositionProviderUsageLines": [ + "Provider usage", + "5.4k tok", + "in 10 \u00b7 out 98 \u00b7 cache 5,200/81" + ], + "timelineContainsCacheReadLabelOrValue": false, + "timelineContainsCacheWriteLabelOrValue": false +} +``` + +Key observations: + +- The API summary had `cacheReadTokens=5200` and `cacheWriteTokens=81`. +- The inspector Usage Records panel showed `cache read 5,200` and + `cache write 81`. +- The Request Composition panel also showed `cache 5,200/81`. +- The timeline row did not include the cache read/write labels or values. +- The only cache-looking timeline text was `cache 2`, which is the request + cache marker count, not provider cache read/write usage. + +This reproduces the exact bug described in `PLAN.md`: cache-read and +cache-write calls look nearly identical from the row alone, and the `cache 2` +badge can be mistaken for provider cache accounting. + +## Pre-Fix Source Evidence + +The data is already present in the browser API contract: + +- `src/spy/api-contracts.ts:65-70` defines `SpyUsageSummary` with + `cacheReadTokens` and `cacheWriteTokens`. +- `src/spy/api-contracts.ts:73-82` includes that usage summary and + `cacheMarkerCount` on each timeline call summary. + +The store already computes those fields: + +- `src/spy/store.ts:982-996` returns `usage` and `cacheMarkerCount` in + `callSummaryForRow`. +- `src/spy/store.ts:999-1016` sums `cache_read_tokens` and + `cache_write_tokens` into the summary usage object. + +The current timeline row drops the provider cache breakdown: + +- `src/spy/ui/src/App.tsx:838-842` renders + `cache {summary.cacheMarkerCount}` as a badge. +- `src/spy/ui/src/App.tsx:844-848` renders `usage` with + `formatUsageTotal(summary.usage)`, which only exposes total tokens in the + row. + +Other inspector surfaces prove the UI can display the same data when it chooses +to: + +- `src/spy/ui/src/App.tsx:1168-1172` shows provider usage in Request + Composition. +- `src/spy/ui/src/App.tsx:1215-1220` formats that composition detail with + input, output, and `cache /` values. +- `src/spy/ui/src/App.tsx:1344-1356` renders Usage Records with explicit + `cache read` and `cache write` cells. + +## Root Cause + +`SPY-QA-18` is a browser timeline rendering bug, not an ingestion, store, or API +bug. + +Two different cache concepts reach the row: + +- `summary.cacheMarkerCount`: the number of request cache marker blocks. +- `summary.usage.cacheReadTokens` and `summary.usage.cacheWriteTokens`: provider + reported cache read/write token counts. + +The timeline renders only the marker count as `cache N` and the provider usage +as total tokens. Because `formatUsageTotal(summary.usage)` discards the usage +breakdown, cache-heavy calls cannot be distinguished from ordinary calls in the +timeline. The `cache N` label then makes the row ambiguous because it looks like +provider cache accounting while actually meaning request marker count. + +## Proposed Fix + +Fix the timeline row display while keeping the existing API/store shape: + +- Remove the marker badge from the high-level timeline row. +- Replace total provider usage with separate read, write, cache-read, and + cache-write token classes. +- Add Playwright coverage with a synthetic cache-heavy call where the row must + expose all four values and omit total `tok` usage plus marker-count text. + +Expected proof after the fix: + +- The same mocked call shows provider cache read/write in the timeline row. +- The marker count is absent from the high-level row. +- Inspector Usage Records and Request Composition continue to show the same + values. + +## Fix Status + +Implemented on 2026-05-24. + +Changed `src/spy/ui/src/App.tsx` so timeline rows now show provider usage as +four separate token classes: + +- `read` from `summary.usage.inputTokens` +- `write` from `summary.usage.outputTokens` +- `cache read` from `summary.usage.cacheReadTokens` +- `cache write` from `summary.usage.cacheWriteTokens` + +The row no longer renders the request cache-marker count badge, and it no +longer renders total provider usage as a combined `tok` value. Request bytes, +response bytes, duration, operation, and block counts remain in the row metadata +line. + +Added Playwright regression coverage in `src/spy/ui/e2e/spy-ui.playwright.ts` +using a synthetic cache-heavy call with `read=10`, `write=98`, +`cache read=5200`, `cache write=81`, and `cacheMarkerCount=2`. The test proves +that the timeline row shows all four provider token classes and does not show +`usage`, `tok`, or `cache 2`. + +Post-fix browser smoke against the built fixture UI showed the first row as: + +```text +read +1,253 +write +8 +cache read +- +cache write +- +converse-stream ... input 2.6 KiB ... output 217 B ... 1.0 s +``` + +The same browser check confirmed `tok=false` and ambiguous cache-marker text +matching `cache ` was absent. + +Verification commands: + +- `bun run typecheck` +- `bun run lint` +- `bun run test:spy-ui:unit` +- `bunx playwright test -c src/spy/ui/playwright.config.ts -g "shows provider cache token classes"` +- `bun run test:spy-ui:e2e` diff --git a/docs/bugfix/SPY-QA-19-RCA.md b/docs/bugfix/SPY-QA-19-RCA.md new file mode 100644 index 0000000..953f4c2 --- /dev/null +++ b/docs/bugfix/SPY-QA-19-RCA.md @@ -0,0 +1,240 @@ +# SPY-QA-19 RCA: Bedrock Reasoning Content Is Classified As Unknown + +## Scope + +This RCA covers the highest-priority open spy bug in `PLAN.md`: `SPY-QA-19`. + +Triage notes: + +- `PLAN.md` marks all P0 spy bugs closed. +- `PLAN.md` marks `SPY-QA-01` through `SPY-QA-18` complete or closed. +- `SPY-QA-19` is the first unchecked item in the prioritized handoff, and it is + a P1 issue. +- This document was written before any product-code fix for `SPY-QA-19`. + +## Bug Definition + +`PLAN.md:955-957` defines the current bug: + +```text +[P1] SPY-QA-19: Fix Bedrock reasoning classification. Prior-history +`reasoningContent` and signature-only reasoning chunks show as `Unknown` +instead of thinking/reasoning metadata. +``` + +## Reproduction Used + +I used the current Bedrock normalizer directly with two synthetic but schema-shaped +Bedrock Converse captures: + +- A request whose prior assistant history contains + `content[].reasoningContent.reasoningText.text` plus a signature. +- A response AWS event-stream whose `contentBlockDelta.delta.reasoningContent` + contains only `reasoningText.signature`. + +This isolates the adapter behavior without relying on browser rendering. + +Command used: + +```sh +bun --eval ' + + diff --git a/src/spy/ui/playwright.config.ts b/src/spy/ui/playwright.config.ts new file mode 100644 index 0000000..3fd33c2 --- /dev/null +++ b/src/spy/ui/playwright.config.ts @@ -0,0 +1,31 @@ +import { defineConfig, devices } from "@playwright/test"; +import { resolve } from "node:path"; + +const port = 4674; +const uiRoot = import.meta.dirname; +const staticDir = resolve(uiRoot, "../../../dist/spy-ui"); +const testServer = resolve(uiRoot, "test-server.ts"); + +export default defineConfig({ + testDir: "./e2e", + testMatch: ["*.playwright.ts"], + timeout: 30_000, + expect: { + timeout: 10_000, + }, + use: { + ...devices["Desktop Chrome"], + baseURL: `http://127.0.0.1:${String(port)}`, + trace: "retain-on-failure", + }, + webServer: { + command: `bun run ${shellQuote(testServer)} --port ${String(port)} --static ${shellQuote(staticDir)}`, + url: `http://127.0.0.1:${String(port)}/api/health`, + reuseExistingServer: false, + timeout: 15_000, + }, +}); + +function shellQuote(value: string): string { + return `'${value.replaceAll("'", "'\\''")}'`; +} diff --git a/src/spy/ui/src/App.tsx b/src/spy/ui/src/App.tsx new file mode 100644 index 0000000..716c856 --- /dev/null +++ b/src/spy/ui/src/App.tsx @@ -0,0 +1,1989 @@ +import { useVirtualizer } from "@tanstack/react-virtual"; +import { + Activity, + AlertTriangle, + ArrowDown, + ArrowUp, + BadgeInfo, + Clock, + Database, + Filter, + Loader2, + RefreshCcw, + Search, + Server, + Trash2, + Wifi, + WifiOff, +} from "lucide-react"; +import * as React from "react"; +import { SpyApiClient, initialTimelineRangeFromLocation, parseSseEventData, replaceTimelineRangeUrl, resolveTimelineSince } from "./api.ts"; +import { Badge } from "./components/ui/badge.tsx"; +import { Button } from "./components/ui/button.tsx"; +import { Input } from "./components/ui/input.tsx"; +import { Select } from "./components/ui/select.tsx"; +import { + blockKindLabel, + blockText, + clipped, + formatBytes, + formatCount, + formatDateTime, + formatDuration, + formatHttpTarget, + formatNumber, + formatTime, + formatUsageTotal, + shortModelId, + statusTone, +} from "./format.ts"; +import { cn } from "./lib/utils.ts"; +import type { + DiffClassification, + HttpEventRecord, + NormalizedBlock, + RawPayloadRecord, + SpyCallDetail, + SpyCallDiff, + SpyRequestComposition, + SpyCallSummary, + SpyServiceHealth, + StreamEvent, + TimePreset, + UiFilters, + UsageRecord, +} from "./types.ts"; + +const api = new SpyApiClient(); +const CALL_LIMIT = 100; +const ALL_FILTER = "all"; +const TIMELINE_ROW_ESTIMATE = 138; +const BLOCK_SECTION_AUTO_OPEN_MAX_BLOCKS = 6; +const BLOCK_SECTION_AUTO_OPEN_MAX_BYTES = 24 * 1024; +const STREAM_EVENT_WINDOW_SIZE = 25; +const STREAM_EVENT_PAYLOAD_PREVIEW_CHARS = 4_000; +const BLOCK_KIND_OPTIONS: readonly NormalizedBlock["kind"][] = [ + "provider-envelope", + "harness-system-context", + "user-visible-message", + "prior-conversation-history", + "current-user-input", + "assistant-output", + "thinking", + "tool-definition", + "tool-call", + "tool-result", + "cache-marker", + "media-summary", + "unknown", +]; +const PROVIDER_OPTIONS = [ + { value: "bedrock", label: "Bedrock" }, +] as const; +const OPERATION_OPTIONS = [ + { value: "invoke", label: "Invoke" }, + { value: "invoke-with-response-stream", label: "Invoke Stream" }, + { value: "converse", label: "Converse" }, + { value: "converse-stream", label: "Converse Stream" }, +] as const; + +type LoadState = "idle" | "loading" | "error"; +type TimelineEmptyState = "range" | "query"; +type InspectorSectionId = + | "summary" + | "composition" + | "request-blocks" + | "response-blocks" + | "diff" + | "usage" + | "network" + | "stream" + | "raw" + | "health"; + +const INSPECTOR_SECTIONS: readonly { readonly id: InspectorSectionId; readonly label: string }[] = [ + { id: "summary", label: "Summary" }, + { id: "composition", label: "Composition" }, + { id: "request-blocks", label: "Request" }, + { id: "response-blocks", label: "Response" }, + { id: "diff", label: "Diff" }, + { id: "usage", label: "Usage" }, + { id: "network", label: "Network" }, + { id: "stream", label: "Stream" }, + { id: "raw", label: "Raw" }, + { id: "health", label: "Health" }, +] as const; + +interface DetailState { + readonly callId: string; + readonly detail: SpyCallDetail | null; + readonly diff: SpyCallDiff | null; + readonly state: LoadState; + readonly error?: string | undefined; +} + +type LoadedDetailState = DetailState & { + readonly detail: SpyCallDetail; + readonly diff: SpyCallDiff; +}; + +interface StreamState { + readonly callId: string; + readonly items: readonly StreamEvent[]; + readonly windowStart: number; + readonly nextCursor?: string | undefined; + readonly state: LoadState; + readonly expandedEventId?: string | undefined; + readonly error?: string | undefined; +} + +export function App(): React.ReactElement { + const initialRange = React.useMemo(() => initialTimelineRangeFromLocation(window.location), []); + const [preset, setPreset] = React.useState(initialRange.preset); + const [since, setSince] = React.useState(initialRange.since); + const [customStart, setCustomStart] = React.useState(() => datetimeLocalValue(initialRange.since)); + const [searchDraft, setSearchDraft] = React.useState(""); + const [search, setSearch] = React.useState(""); + const [filters, setFilters] = React.useState({ + provider: ALL_FILTER, + model: ALL_FILTER, + operation: ALL_FILTER, + status: ALL_FILTER, + blockKind: ALL_FILTER, + }); + const [calls, setCalls] = React.useState([]); + const [nextCursor, setNextCursor] = React.useState(); + const [callState, setCallState] = React.useState("idle"); + const [callError, setCallError] = React.useState(); + const [selectedCallId, setSelectedCallId] = React.useState(); + const [detailState, setDetailState] = React.useState(null); + const [streamState, setStreamState] = React.useState(null); + const [inspectorResetKey, setInspectorResetKey] = React.useState(0); + const [health, setHealth] = React.useState(null); + const [sseConnected, setSseConnected] = React.useState(false); + const [sseError, setSseError] = React.useState(); + const [clearOpen, setClearOpen] = React.useState(false); + const [clearing, setClearing] = React.useState(false); + const clearTriggerRef = React.useRef(null); + const timelineEmptyState = timelineEmptyStateFor(search, filters); + const timelineContextKey = React.useMemo(() => [ + since, + search, + filters.provider, + filters.model, + filters.operation, + filters.status, + ].join("|"), [filters.model, filters.operation, filters.provider, filters.status, search, since]); + const previousTimelineContextKey = React.useRef(null); + const previousSelectedCallId = React.useRef(undefined); + + const loadCalls = React.useCallback(async (options: { readonly cursor?: string | undefined; readonly append?: boolean | undefined } = {}) => { + setCallState("loading"); + setCallError(undefined); + try { + const page = await api.calls({ + since: sinceForCallLoad(options.append === true), + search, + limit: CALL_LIMIT, + provider: filterQueryValue(filters.provider), + modelId: filterQueryValue(filters.model), + operation: filterQueryValue(filters.operation), + status: filterQueryValue(filters.status), + ...(options.cursor === undefined ? {} : { cursor: options.cursor }), + }); + setCalls((current) => options.append === true ? [...current, ...page.items] : page.items); + setNextCursor(page.nextCursor); + setCallState("idle"); + setSelectedCallId((current) => { + if (options.append === true) { + return current ?? page.items[0]?.call.id; + } + if (current !== undefined && page.items.some((item) => item.call.id === current)) { + return current; + } + return page.items[0]?.call.id; + }); + } catch (error) { + setCallState("error"); + setCallError(error instanceof Error ? error.message : "failed to load calls"); + } + }, [filters.model, filters.operation, filters.provider, filters.status, preset, search, since]); + + React.useEffect(() => { + void loadCalls(); + }, [loadCalls]); + + React.useEffect(() => { + if (previousTimelineContextKey.current !== null && previousTimelineContextKey.current !== timelineContextKey) { + setStreamState(null); + resetInspectorScroll(); + } + previousTimelineContextKey.current = timelineContextKey; + }, [timelineContextKey]); + + React.useEffect(() => { + if (previousSelectedCallId.current !== selectedCallId) { + setStreamState(null); + resetInspectorScroll(); + previousSelectedCallId.current = selectedCallId; + } + }, [selectedCallId]); + + React.useEffect(() => { + let cancelled = false; + void api.health().then((snapshot) => { + if (!cancelled) { + setHealth(snapshot); + } + }).catch(() => { + if (!cancelled) { + setHealth(null); + } + }); + return () => { + cancelled = true; + }; + }, []); + + React.useEffect(() => { + const source = new EventSource("/api/events"); + const onOpen = (): void => { + setSseConnected(true); + }; + const onError = (): void => { + setSseConnected(false); + }; + const onHello = (event: MessageEvent): void => { + try { + parseSseEventData("hello", event.data); + setSseConnected(true); + setSseError(undefined); + } catch (error) { + setSseConnected(false); + setSseError(sseErrorMessage(error)); + } + }; + const onHealth = (event: MessageEvent): void => { + try { + setHealth(parseSseEventData("health", event.data)); + setSseConnected(true); + setSseError(undefined); + } catch (error) { + setSseConnected(false); + setSseError(sseErrorMessage(error)); + } + }; + const onCallsChanged = (event: MessageEvent): void => { + try { + parseSseEventData("calls-changed", event.data); + setSseError(undefined); + void loadCalls(); + } catch (error) { + setSseConnected(false); + setSseError(sseErrorMessage(error)); + } + }; + const onCleared = (event: MessageEvent): void => { + try { + parseSseEventData("cleared", event.data); + setSseError(undefined); + } catch (error) { + setSseConnected(false); + setSseError(sseErrorMessage(error)); + return; + } + setCalls([]); + setNextCursor(undefined); + setSelectedCallId(undefined); + setDetailState(null); + setStreamState(null); + void api.health().then(setHealth); + }; + + source.addEventListener("open", onOpen); + source.addEventListener("error", onError); + source.addEventListener("hello", onHello as EventListener); + source.addEventListener("health", onHealth as EventListener); + source.addEventListener("calls-changed", onCallsChanged as EventListener); + source.addEventListener("cleared", onCleared as EventListener); + + return () => { + source.removeEventListener("open", onOpen); + source.removeEventListener("error", onError); + source.removeEventListener("hello", onHello as EventListener); + source.removeEventListener("health", onHealth as EventListener); + source.removeEventListener("calls-changed", onCallsChanged as EventListener); + source.removeEventListener("cleared", onCleared as EventListener); + source.close(); + }; + }, [loadCalls]); + + const selectedSummary = React.useMemo(() => { + return calls.find((summary) => summary.call.id === selectedCallId) ?? null; + }, [calls, selectedCallId]); + const latestVisibleSummary = calls[0] ?? null; + const selectedCallIsPinned = selectedSummary !== null + && latestVisibleSummary !== null + && selectedSummary.call.id !== latestVisibleSummary.call.id; + + const selectedDetailVersion = React.useMemo(() => { + if (selectedSummary === null) { + return selectedCallId; + } + return [ + selectedSummary.call.id, + selectedSummary.call.status, + selectedSummary.call.completed_at ?? "pending", + selectedSummary.call.request_content_hash ?? "", + selectedSummary.call.response_content_hash ?? "", + selectedSummary.durationMs ?? "pending", + selectedSummary.usage.totalTokens ?? "usage-pending", + selectedSummary.responseBlockCount, + selectedSummary.responseByteSize, + selectedSummary.streamEventCount, + ].join("|"); + }, [selectedCallId, selectedSummary]); + + React.useEffect(() => { + if (selectedCallId === undefined) { + return; + } + let cancelled = false; + setDetailState({ callId: selectedCallId, detail: null, diff: null, state: "loading" }); + setStreamState(null); + void Promise.all([ + api.callDetail(selectedCallId), + api.callDiff(selectedCallId), + ]).then(([detail, diff]) => { + if (!cancelled) { + setDetailState({ callId: selectedCallId, detail, diff, state: "idle" }); + } + }).catch((error: unknown) => { + if (!cancelled) { + setDetailState({ + callId: selectedCallId, + detail: null, + diff: null, + state: "error", + error: error instanceof Error ? error.message : "failed to load call detail", + }); + } + }); + return () => { + cancelled = true; + }; + }, [selectedCallId, selectedDetailVersion]); + + const modelOptions = React.useMemo(() => { + return [ + ...new Set([ + ...calls.map((summary) => summary.call.model_id), + ...(filters.model === ALL_FILTER ? [] : [filters.model]), + ]), + ].sort(); + }, [calls, filters.model]); + + function setPresetSince(nextPreset: TimePreset): void { + if (nextPreset === "custom") { + applyCustomStart(); + return; + } + setTimelineRange(nextPreset, resolveTimelineSince(nextPreset, since)); + } + + function applyCustomStart(): void { + const next = secondsFromCustomStart(customStart, since); + if (next !== null) { + setTimelineRange("custom", next); + } + } + + function setTimelineRange(nextPreset: TimePreset, nextSince: number): void { + setPreset(nextPreset); + setSince(nextSince); + setCustomStart(datetimeLocalValue(nextSince)); + replaceTimelineRangeUrl(nextPreset, nextSince); + } + + function sinceForCallLoad(append: boolean): number { + if (append || !isRollingPreset(preset)) { + return since; + } + const nextSince = resolveTimelineSince(preset, since); + if (nextSince !== since) { + setSince(nextSince); + setCustomStart(datetimeLocalValue(nextSince)); + replaceTimelineRangeUrl(preset, nextSince); + } + return nextSince; + } + + function submitSearch(event: React.SyntheticEvent): void { + event.preventDefault(); + setSearch(searchDraft); + } + + async function loadMore(): Promise { + if (nextCursor !== undefined) { + await loadCalls({ cursor: nextCursor, append: true }); + } + } + + async function loadStreamEvents(more = false): Promise { + if (selectedCallId === undefined) { + return; + } + const cursor = more ? streamState?.nextCursor : undefined; + setStreamState((current) => ({ + callId: selectedCallId, + items: more ? current?.items ?? [] : [], + windowStart: more ? current?.windowStart ?? 0 : 0, + state: "loading", + ...(cursor === undefined ? {} : { nextCursor: cursor }), + })); + try { + const page = await api.streamEvents(selectedCallId, cursor); + setStreamState((current) => { + const currentItems = more ? current?.items ?? [] : []; + const items = more ? [...currentItems, ...page.items] : page.items; + return { + callId: selectedCallId, + items, + windowStart: more ? streamWindowStartForIndex(currentItems.length, items.length) : 0, + nextCursor: page.nextCursor, + state: "idle", + }; + }); + } catch (error) { + setStreamState({ + callId: selectedCallId, + items: more ? streamState?.items ?? [] : [], + windowStart: more ? streamState?.windowStart ?? 0 : 0, + state: "error", + error: error instanceof Error ? error.message : "failed to load stream events", + }); + } + } + + function setStreamWindowStart(windowStart: number): void { + setStreamState((current) => { + if (current === null) { + return current; + } + return { + ...current, + expandedEventId: undefined, + windowStart: streamWindowStartForIndex(windowStart, current.items.length), + }; + }); + } + + function toggleStreamPayload(eventId: string): void { + setStreamState((current) => { + if (current === null) { + return current; + } + return { + ...current, + expandedEventId: current.expandedEventId === eventId ? undefined : eventId, + }; + }); + } + + function selectTimelineCall(callId: string): void { + if (callId === selectedCallId) { + resetSelectedCallInspection(); + return; + } + setSelectedCallId(callId); + } + + function resetSelectedCallInspection(): void { + setStreamState(null); + setInspectorResetKey((key) => key + 1); + resetInspectorScroll(); + } + + function closeClearDialog(): void { + setClearOpen(false); + window.requestAnimationFrame(() => { + clearTriggerRef.current?.focus(); + }); + } + + async function clearData(): Promise { + setClearing(true); + try { + await api.clearData(); + closeClearDialog(); + } finally { + setClearing(false); + } + } + + return ( + <> +
+
+
+
+
+
+

Rootcell Spy

+

+ {preset === "live" ? "Live from now" : `Since ${formatDateTime(since)}`} +

+
+
+ +
+ + {sseConnected ? + raw {health?.service.storeRaw === true ? "on" : "off"} + + +
+
+ +
+
+ + {callError === undefined ? null : ( +
+
+ )} + {sseError === undefined ? null : ( +
+
+ )} + { + void loadMore(); + }} + /> +
+ + { + if (latestVisibleSummary !== null) { + setSelectedCallId(latestVisibleSummary.call.id); + } + }} + onLoadStream={() => { + void loadStreamEvents(false); + }} + onLoadMoreStream={() => { + void loadStreamEvents(true); + }} + onStreamWindowStart={setStreamWindowStart} + onToggleStreamPayload={toggleStreamPayload} + /> +
+
+ + {clearOpen ? ( + { + void clearData(); + }} + /> + ) : null} + + ); +} + +function TimelineControls(props: { + readonly preset: TimePreset; + readonly customStart: string; + readonly searchDraft: string; + readonly filters: UiFilters; + readonly modelOptions: readonly string[]; + readonly callState: LoadState; + readonly onPreset: (preset: TimePreset) => void; + readonly onCustomStart: (value: string) => void; + readonly onApplyCustomStart: () => void; + readonly onSearchDraft: (value: string) => void; + readonly onSubmitSearch: (event: React.SyntheticEvent) => void; + readonly onFilters: (filters: UiFilters) => void; +}): React.ReactElement { + const { filters } = props; + return ( +
+
+ { + props.onPreset("live"); + }}>Live + { + props.onPreset("10m"); + }}>10 min + { + props.onPreset("1h"); + }}>1 hour + { + props.onPreset("today"); + }}>Today + { + props.onPreset("custom"); + }}>Custom +
+
+
+ +
+
+
+
+ ); +} + +function SegmentButton(props: { + readonly active: boolean; + readonly children: React.ReactNode; + readonly onClick: () => void; +}): React.ReactElement { + return ( + + ); +} + +function Timeline(props: { + readonly calls: readonly SpyCallSummary[]; + readonly selectedCallId: string | undefined; + readonly loading: boolean; + readonly hasMore: boolean; + readonly emptyState: TimelineEmptyState; + readonly onSelect: (callId: string) => void; + readonly onLoadMore: () => void; +}): React.ReactElement { + const parentRef = React.useRef(null); + const virtualizer = useVirtualizer({ + count: props.calls.length, + getScrollElement: () => parentRef.current, + estimateSize: () => TIMELINE_ROW_ESTIMATE, + overscan: 8, + }); + const virtualItems = virtualizer.getVirtualItems(); + + if (props.calls.length === 0 && !props.loading) { + const message = props.emptyState === "query" + ? "No provider calls match the current search or filters." + : "No provider calls in this range."; + return ( +
+ {message} +
+ ); + } + + return ( +
+
+
+ {virtualItems.map((virtualRow) => { + const summary = props.calls[virtualRow.index]; + if (summary === undefined) { + return null; + } + return ( +
+ { + props.onSelect(summary.call.id); + }} + /> +
+ ); + })} +
+
+
+
+ {formatCount(props.calls.length, "call")} + +
+
+
+ ); +} + +function TimelineRow(props: { + readonly summary: SpyCallSummary; + readonly selected: boolean; + readonly onSelect: () => void; +}): React.ReactElement { + const { summary } = props; + return ( + + ); +} + +function timelineRowAccessibleName(summary: SpyCallSummary): string { + return [ + `Open call ${summary.call.id}`, + `model ${shortModelId(summary.call.model_id)}`, + `status ${summary.call.status}`, + `started ${formatTime(summary.call.started_at)}`, + `operation ${summary.call.operation}`, + ...timelineUsageMetricData(summary.usage).map((metric) => `${metric.label} ${metric.value}`), + `input ${formatBytes(summary.requestByteSize)}`, + `output ${formatBytes(summary.responseByteSize)}`, + `duration ${formatDuration(summary.durationMs)}`, + `${formatNumber(summary.requestBlockCount)} request blocks`, + `${formatNumber(summary.responseBlockCount)} response blocks`, + ].join(", "); +} + +function TimelineUsageMetrics(props: { readonly usage: SpyCallSummary["usage"] }): React.ReactElement { + const metrics = timelineUsageMetricData(props.usage); + + const label = metrics.map((metric) => `${metric.label} ${metric.value}`).join(", "); + + return ( + + {metrics.map((metric) => ( + + ))} + + ); +} + +interface UsageMetricProps { + readonly label: string; + readonly value: string; +} + +function UsageMetric(props: UsageMetricProps): React.ReactElement { + return ( + + + {props.value} + + ); +} + +function timelineUsageMetricData(usage: SpyCallSummary["usage"]): UsageMetricProps[] { + const metrics: UsageMetricProps[] = [ + { label: "read", value: formatNumber(usage.inputTokens) }, + { label: "write", value: formatNumber(usage.outputTokens) }, + ]; + + if (usage.cacheReadTokens !== null || usage.cacheWriteTokens !== null) { + metrics.push( + { label: "cache read", value: formatNumber(usage.cacheReadTokens) }, + { label: "cache write", value: formatNumber(usage.cacheWriteTokens) }, + ); + } + + return metrics; +} + +function usageMetricMarker(label: string): React.ReactNode { + switch (label) { + case "read": + return