From 8574178395655542ba722bac9ece5e4225e249a3 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Wed, 20 May 2026 13:17:34 +0000 Subject: [PATCH] fix: resume sessions across client restarts instead of orphaning them When an AI agent (OpenCode, etc.) restarts, it regenerates its session header value (e.g. x-session-affinity nanoid). Previously, the gateway's Tier 1 identification treated this as a brand-new session, orphaning all prior distillations, gradient calibration, and cache warming state. Four coordinated fixes: 1. OpenCode plugin injects stable x-lore-session-id using OpenCode's persistent DB session ID (survives restarts). 2. Reorder KNOWN_SESSION_HEADERS so x-lore-session-id (always stable) is checked before volatile headers like x-session-affinity. 3. Add Tier 1a cross-header migration: when a new higher-priority header appears (plugin upgrade), check lower-priority headers for an existing session and re-index under the new header. 4. Add Tier 1b rotation detection in the gateway: when a known header name is present but its value is unrecognized, scan for exactly one recent non-subagent predecessor session with the same header name. If found, resume it instead of creating a new session. Also fixes: - Remove sessions.has() gate in Tier 1 so DB-only sessions (after gateway restart) are found without requiring in-memory hydration. - Treat lastTurnAt=0 as 'recently active' (gradient never ran yet) instead of infinitely stale in rotation candidate evaluation. --- .lore.md | 18 +- packages/gateway/src/pipeline.ts | 112 ++++++++++- packages/gateway/src/session.ts | 68 ++++++- packages/gateway/test/session.test.ts | 273 ++++++++++++++++++++++++++ packages/opencode/src/index.ts | 3 + 5 files changed, 460 insertions(+), 14 deletions(-) diff --git a/.lore.md b/.lore.md index ca366cd1..ae3a4e7b 100644 --- a/.lore.md +++ b/.lore.md @@ -5,7 +5,7 @@ ### Architecture -* **3-layer gradient model: layer 2 is transient, falls back to layer 1 via urgent distillation**: 3-layer gradient model: Layer 0 (all raw messages + LTM, append-only cache). Layer 1 (distilled prefix + pinned raw window, bust once on entry then warm). Layer 2/emergency (transient hard reset: fresh LTM, 2-3 best distillations, current agentic turn — fires 1-2 turns then urgent distillation falls back to Layer 1). Layer 2 must NOT set stickiness — stickiness only applies to layers 1-3. Bug in \`gradient.ts\`: \`effectiveMinLayer = max(0, lastLayer)\` traps sessions in emergency indefinitely; fix: restrict stickiness to \`lastLayer >= 1 && lastLayer <= 3\`. Context budget caps (160K at Opus) are cost-driven. Layer-specific distLimit: layers 1-2 all non-archived distillations; layer 3: top 5 via \`selectDistillations()\`; emergency: top 2. Scoring: 70% recency + 30% \`importanceBonus()\`. Cache frozen during tool-call chains for byte-identical prefix. +* **3-layer gradient model: layer 2 is transient, falls back to layer 1 via urgent distillation**: 3-layer gradient model in \`packages/core/src/gradient.ts\`. Layer 0: full passthrough. Layer 1 (\`strip:none\`): distilled prefix + stable raw window (byte-identical for cache). Layer 2 (\`strip:old-tools\`, rawFrac=0.50): strips tool outputs on messages older than last 2 turns. Layer 3 (\`strip:all-tools\`, rawFrac=0.55, distFrac=0.15, distLimit=5): strips all tool outputs except current turn. Layer 4 (emergency): top 2 distillations + 25% tail; never strips tool parts (would cause infinite tool-call loops). \`currentTurnStart()\` (line 2053) walks backward past tool-call chains to protect the entire active chain. \`tryFit()\` backward walk has no pair-keeping logic — safety via reconstruct-after-eviction \[\[see tool pairing entry]]. Bug: \`effectiveMinLayer = max(0, lastLayer)\` traps sessions in emergency; fix: restrict stickiness to \`lastLayer >= 1 && lastLayer <= 3\`. * **Background LLM rate limiting: p-limit(2) + 429 circuit breaker in background-limiter.ts**: Global concurrency limit for background LLM work in \`packages/gateway/src/background-limiter.ts\`. Uses \`p-limit(2)\` to cap simultaneous background LLM calls across all idle sessions. Circuit breaker trips on 429 responses and pauses all background work for the \`Retry-After\` duration. Wired into: idle scheduler, pipeline incremental distillation, in-flight curation. Urgent distillation is excluded (client is waiting). Without this, N idle sessions fire N×4 simultaneous background calls causing cascading rate limit failures. @@ -13,12 +13,18 @@ * **Conversation import system: providers, detection, extraction pipeline**: Core import system lives in \`packages/core/src/import/\`. Key design: \`AgentHistoryProvider\` interface with \`detect()\`/\`load()\` methods; providers registered in a global registry (\`providers/index.ts\`). Detection scans all providers, returns \`DetectedSession\[]\`. Extraction calls curator LLM sequentially per chunk, deduplicating ops via \`parseOps()\`/\`applyOps()\`. Idempotency via \`import\_history\` table (DB migration v19). Built-in providers: Claude Code (\`~/.claude/projects/\`), OpenCode (SQLite), Aider (markdown), Codex (\`~/.codex/sessions/\` JSONL), Cline (VS Code globalStorage JSON), Continue (\`~/.continue/sessions/\` JSON), Pi (\`~/.pi/agent/sessions/\` tree-structured JSONL). Auto-import triggered in \`lore run\` via \`maybeAutoImport()\`. Copilot Chat skipped (opaque leveldb). The OpenCode plugin's \`reflect.ts\` (plugin-side recall tool) was dead code and has been removed — plugin uses \`tool: {}\` (empty), gateway handles all recall. + +* **Gradient tool\_use/tool\_result pairing: reconstruct-after-eviction pattern**: Gradient eviction (\`tryFit()\`) has NO logic to keep \`tool\_use\`/\`tool\_result\` pairs together — it cuts at individual message boundaries. Safety is achieved via reconstruct-after-eviction: (1) \`resolveToolResults()\` (temporal-adapter.ts:239) merges tool result data onto assistant tool parts and strips user-side \`tool\_result\` parts before gradient runs; (2) \`loreMessagesToGateway()\` (pipeline.ts:3401) reconstructs \`tool\_use\`+\`tool\_result\` pairs from surviving assistant tool parts; (3) \`removeOrphanedToolResults()\` (pipeline.ts:3524) removes any remaining orphans as a safety net. \`sanitizeToolParts()\` (gradient.ts:1071) converts pending/running tool parts to error state to prevent API rejection. Layer 4 (emergency) never strips tool parts to avoid infinite tool-call loops. + * **LTM confidence field: semantic meaning and rerankPreferences() for legacy entries**: \`ltm.create()\` accepts optional \`confidence\` param (default 1.0, clamped \[0,1]). Confidence semantics: 1.0=unconditional directive, 0.9=strong preference, 0.8=moderate, 0.6=mild. \`CuratorOp\` create type includes \`confidence\`, wired through \`applyOps\`. \`rerankPreferences()\` in \`packages/core/src/ltm.ts\` re-scores legacy entries by directive keyword patterns (\`STRONG\_DIRECTIVE\_RE\` regex); skips entries whose \`confidence\` was already set to a non-default (custom) value — manual overrides are preserved. \`lore data rerank\` CLI command triggers re-ranking; also auto-runs after \`lore data recover\`. Run after deploying to fix existing preferences in DB. * **OpenAI streaming translation: stateful SSE translators in stream/openai.ts**: OpenAI streaming translation: stateful SSE translators in \`packages/gateway/src/stream/openai.ts\` and \`stream/openai-responses.ts\` consume Anthropic SSE events and emit OpenAI-format SSE events incrementally — clients see tokens as they arrive. The pipeline carries \`effectiveProtocol\` in \`UpstreamResult\` to dispatch to the right translator. All translators must implement: (1) \`cancelled\` flag + \`cancel()\` handler aborting upstream via \`AbortController\`; (2) \`safeEnqueue()\` wrapper that no-ops if \`cancelled\`; (3) error \`catch\` block emitting a protocol-appropriate terminal event (\`response.failed\` for Responses API, \`\[DONE]\` with error for Chat Completions) — otherwise clients hang. Adding a new upstream protocol requires both an accumulator branch and a streaming translator. + +* **OpenCode x-session-affinity is a per-process nanoid — not stable across restarts**: OpenCode generates \`x-session-affinity\` natively in its core binary (not via plugin API) as a nanoid — random, per-process. It does NOT persist across OpenCode restarts. The Lore plugin (\`packages/opencode/src/index.ts\`) never touches this header. \`input.sessionID\` in plugin hooks (e.g. \`chat.headers\`) is OpenCode's persistent DB \`Session.id\` — stable across restarts. These are different values. When OpenCode restarts, the new nanoid causes Tier 1 in \`identifySession()\` to create a brand-new Lore session, orphaning all prior distillations/gradient state. Fix: inject \`input.sessionID\` as \`x-lore-session-id\` in the \`chat.headers\` hook to give Lore a restart-stable identifier. + * **Pi plugin: which providers can be proxied through the gateway**: Pi plugin gateway proxy compatibility by wire protocol. \*\*Proxiable\*\*: \`anthropic\` → \`/v1/messages\`: \`anthropic\`, \`fireworks\`, \`github-copilot\`; \`openai-completions\` → \`/v1/chat/completions\`: \`deepseek\`, \`xai\`, \`groq\`, \`cerebras\`, \`openrouter\`, \`huggingface\`, \`opencode\`, \`opencode-go\`; \`openai-responses\` → \`/v1/responses\`: \`openai\`, \`azure-openai-responses\`, \`openai-codex\`, \`azure-openai\`, \`lm-studio\`, \`ollama\`. \*\*Cannot proxy\*\*: \`google\`, \`google-vertex\`, \`amazon-bedrock\`, \`mistral\`. \`registerProvider(name, { baseUrl })\` overrides base URL. Gateway routes by URL path only. OpenAI streaming clients receive true incremental SSE (\`stream/openai.ts\`). @@ -51,6 +57,9 @@ * **splitSegments() infinite recursion on oversized single messages**: splitSegments() infinite recursion on oversized single messages: In \`packages/core/src/distillation.ts\`, \`splitSegments()\` recurses infinitely when a single message exceeds \`maxSegmentTokens\` (16384). \`findSplitIndex()\` returns \`messages.length\` (=1), so \`left = messages.slice(0, 1)\` produces an identical recursive call. Triggered on large tool outputs (~49KB+). Fix: add base case after the \`totalTokens <= maxTokens\` guard — \`if (messages.length <= 1) return \[messages]\`. The oversized message becomes an indivisible segment. + +* **Tier 1 session identification blocks Tier 3 fingerprinting when known header changes**: Trap: When \`x-session-affinity\` changes (OpenCode restart), Tier 3 fingerprint matching looks like it should reconnect the session. Fix: Tier 1 in \`identifySession()\` (\`pipeline.ts\` ~line 928) is a first-match-wins gate — if ANY \`KNOWN\_SESSION\_HEADERS\` header is present but unrecognized in \`headerSessionIndex\`, it immediately creates a new session and returns \`{ isNew: true, tier: 1 }\`. Tier 3 is only reached when NO known session header is present at all. Result: OpenCode restart always produces a session split with no recovery path through fingerprinting. The only fix is injecting a stable identifier (e.g. \`input.sessionID\` as \`x-lore-session-id\`) so Tier 1 matches correctly. + * **TTL downgrade hysteresis: downgradeStreak field prevents compounding cache busts**: Auto-TTL downgrade hysteresis in \`packages/gateway/src/pipeline.ts\`: downgrade from 1h→5m TTL requires 3 consecutive short-gap turns (\`ttlDowngradeStreak\` in \`SessionState\`). Block downgrade if >50% of session tokens are cached. Reset streak on any long-gap turn. Subagent turns and tool-use continuations excluded from gap recording — capture \`prevStopReason\` before line 1667 overwrites it, skip when \`prevStopReason === 'tool\_use'\` or \`isSubagentTurn\`. State persistence: immediate (session identity), per-turn (cost snapshot), 30s periodic (gradient EMAs + cache warming via dirty flag). Max data loss on crash: ~30s. @@ -73,8 +82,8 @@ * **Always ask for session history recall at the start of new sessions**: Session continuity: At session start, recall specific prior-session details (PR titles, branch names, test counts, file names, commit details, lint fixes) as precisely as possible. State explicitly when details were not captured. Proactively log granular session artifacts (exact error messages, PR titles, file paths, line numbers, commit hashes, test names, root causes) so future recall is reliable. User relies on this to resume context quickly. When information is unavailable, say so explicitly and suggest where to find it (GitHub, logs, commit history). Never fabricate or infer details as confirmed facts — always distinguish verified session records from generated/uncertain information. - -* **Always carry forward workflow directives and incident context as persistent hard rules across sessions**: 🔴 HARD RULES (every session): 1. Two-phase Plan/Build: write plan file first, wait for approval, then implement. 2. Pre-merge self-review: rate all changed files by severity with exact paths/line numbers; fix all criticals; confirm CI green. 3. Eval fairness: demand deep fairness analysis before accepting eval results. 4. LTM cache delete must be inside 'changes made' guard — unconditional placement is a bug. 5. Re-state hard rules + completed work at session start with exact file paths, line numbers, commit hashes, PR URLs — flag missing details with 🟡. 6. Root cause first: RCA must precede any proposed solution. 7. Never run \`git remote -v\` with client-controlled cwd. 8. IDs starting with \`LOREAI-GATEWAY-\` are Sentry issue IDs. 9. Pull from origin/main before any codebase exploration or edits. 10. Resolve \`.lore.md\` rebase conflicts with \`--ours\`. 11. When multiple fixes identified, apply together and immediately re-run evals. 12. Before implementing features/debugging, explore relevant codebase area first. 13. Production incidents: acknowledge → investigate → hotfix → code fix → preventive measures. 14. 'Let's go with that plan.' = full approval, proceed immediately. 15. Pre-c \[truncated — entry too long] + +* **Always create a new branch from main before starting follow-up fixes after a PR merges**: After a PR is merged to main, the user consistently pulls the latest main and creates a new branch before beginning any follow-up work or fixes. This applies even when the follow-up is discovered immediately after the merge. The assistant should proactively suggest or execute: (1) pull/rebase from origin main, (2) create a new branch for the next set of changes, before making any edits. Never continue working on the old branch or commit follow-up fixes directly to main. * **Always fix cache memory leaks with TTL eviction, size cap, and scheduled pruning**: Cache memory leak fix pattern: (1) TTL check in \`.get()\` — delete and return undefined if \`Date.now() - entry.ts > this.ttlMs\`; (2) LRU eviction in \`.set()\` — delete oldest key when \`store.size >= maxEntries\`; (3) \`setInterval(() => this.prune(), 60\_000)\` in constructor. Defaults: \`maxEntries = 10\_000\`, \`ttlMs = 300\_000\` (5 min). Applies to all cache modules. @@ -91,6 +100,9 @@ * **Always request critical code reviews with specific file paths, line numbers, and severity classifications**: Code review standard: provide exact file paths, line ranges, severity classifications (critical/medium/low), root causes, and concrete fix recommendations. Must-fix items called out explicitly before merge. Before merging any PR: (1) run critical self-review covering all changed files; (2) fix all criticals; (3) confirm CI green. Reviews must be skeptical — actively look for subtle bugs (state not cleared on fallback paths, consume-once flag semantics, circuit breaker bypass, concurrency edge cases). Produce explicit verdict alongside ranked findings. Before implementing features or debugging, read all named files deeply and report findings with precise references. Always analyze root causes before proposing solutions. When starting eval-related work, enumerate concrete gaps before proposing solutions. Track which evals have been run vs. pending. After root-cause analysis or bug fix, propose eval extensions covering the newly discovered failure mode. When presented with a GitHub issue, challenge unsubstantiated claims — verify against actual code. + +* **Always request exact file paths, line numbers, and verbatim code snippets when investigating codebase behavior**: When asking for code investigation or analysis, the user consistently expects responses to include exact file paths (e.g., \`packages/gateway/src/session.ts\`), specific line numbers (e.g., 'lines 84-93'), and verbatim code snippets or field names. The user structures requests as multi-part investigations with numbered questions. Responses should never paraphrase or summarize code behavior without grounding it in precise source locations. When a finding is negative (e.g., 'zero references'), that should also be stated with the specific file and search scope examined. + * **Always request worker tests with a consistent 7-case spec covering compute, missing-record, cleanup retention, and sync scenarios**: Worker test files follow a consistent 7-case spec: (1) compute job — DB lookup + update, (2) missing record — skip without throw, (3) cleanup — hard-delete records archived >30 days, (4) cleanup — preserve recently archived records, (5) sync — process a batch, (6) sync — skip missing records, (7) sync — respect dryRun flag. Tests mock DB and Redis. Applies uniformly across all worker modules. diff --git a/packages/gateway/src/pipeline.ts b/packages/gateway/src/pipeline.ts index be8e0c9e..e5058def 100644 --- a/packages/gateway/src/pipeline.ts +++ b/packages/gateway/src/pipeline.ts @@ -70,8 +70,10 @@ import { generateSessionID, fingerprintMessages, MESSAGE_COUNT_PROXIMITY_THRESHOLD, + KNOWN_SESSION_HEADERS, extractKnownSessionHeader, learnHeaders, + findRotationPredecessor, } from "./session"; import { isCompactionRequest, @@ -903,15 +905,22 @@ function getOrCreateSession( /** * Identify or create a session from the incoming request. * - * Uses a 3-tier strategy: - * 1. **Known headers** — `x-claude-code-session-id`, `x-session-affinity`, - * `x-lore-session-id`. Immediate match, survives compaction & model changes. + * Uses a multi-tier strategy: + * 1. **Known headers** — `x-lore-session-id` (stable, checked first), + * `x-claude-code-session-id`, `x-session-affinity`. + * Immediate match, survives compaction & model changes. + * 1a. **Cross-header migration** — when the primary known header is new + * (e.g. plugin upgrade), checks lower-priority headers for an existing + * session and re-indexes under the new header. + * 1b. **Header value rotation** — when a known header name is present but + * its value changed (client restart), finds the predecessor session and + * resumes it instead of creating a new one. * 2. **Learned headers** — `x-` headers discovered via fingerprint-bootstrapped * learning. Promoted after 3 stable turns + cross-session uniqueness. * 3. **Fingerprint fallback** — SHA-256 of first user message + auth suffix * (no model). Message-count proximity for fork disambiguation. * - * Priority: Tier 1 > Tier 2 > Tier 3. + * Priority: Tier 1 > 1a > 1b > Tier 2 > Tier 3. */ async function identifySession( req: GatewayRequest, @@ -929,11 +938,102 @@ async function identifySession( if (known) { const indexKey = `${known.headerName}:${known.sessionId}`; const existingSid = headerSessionIndex.get(indexKey); - if (existingSid && sessions.has(existingSid)) { + if (existingSid) { + // Session may only exist in DB (after gateway restart) — that's fine, + // getOrCreateSession() will hydrate it from the session_state table. return { sessionID: existingSid, isNew: false, tier: 1 }; } - // New session with a known header — create and index it. + // --- Tier 1a: Cross-header migration --- + // The primary known header is new (e.g. plugin upgrade started sending + // x-lore-session-id), but the request also contains a lower-priority + // known header that IS already indexed (e.g. x-session-affinity from + // before the upgrade). Re-index under the new header and resume. + for (const fallbackName of KNOWN_SESSION_HEADERS) { + if (fallbackName === known.headerName) continue; // skip the primary + const fallbackValue = headers[fallbackName]; + if (!fallbackValue) continue; + const fallbackKey = `${fallbackName}:${fallbackValue}`; + const fallbackSid = headerSessionIndex.get(fallbackKey); + if (fallbackSid) { + // Migrate: index under the new (higher-priority) header. + headerSessionIndex.set(indexKey, fallbackSid); + saveSessionTracking(fallbackSid, { + headerSessionId: known.sessionId, + headerName: known.headerName, + }); + // Update in-memory state if present. + const inMemory = sessions.get(fallbackSid); + if (inMemory) { + inMemory.headerSessionId = known.sessionId; + inMemory.headerName = known.headerName; + } + log.info( + `session ${fallbackSid.slice(0, 16)}: migrated from ${fallbackName} to ${known.headerName}`, + ); + return { sessionID: fallbackSid, isNew: false, tier: 1 }; + } + } + + // --- Tier 1b: Header value rotation detection --- + // The header name is known but the value is new (e.g. OpenCode restarted + // and regenerated its nanoid). Before creating a new session, check if + // exactly one existing session was previously identified via the SAME + // header name. If so, this is a client restart — resume the old session + // and re-index the new header value. + const predecessor = findRotationPredecessor( + known.headerName, + known.sessionId, + headerSessionIndex, + (sid) => { + // Session may be in memory or only in DB (after gateway restart). + const inMemory = sessions.get(sid); + if (inMemory) { + return { + sid, + isSubagent: !!inMemory.isSubagent, + lastActiveAt: inMemory.lastRequestTime, + }; + } + // Lightweight DB check for recency and subagent status. + const persisted = loadSessionTracking(sid); + if (!persisted) return null; // orphaned index entry + return { + sid, + isSubagent: persisted.isSubagent, + // lastTurnAt=0 means gradient never ran yet — session is new, + // treat as recently active (not infinitely stale). + lastActiveAt: persisted.lastTurnAt > 0 ? persisted.lastTurnAt : Date.now(), + }; + }, + ); + + if (predecessor) { + // Resume the old session with the new header value. + const oldKey = `${known.headerName}:${predecessor.oldHeaderValue}`; + headerSessionIndex.delete(oldKey); + headerSessionIndex.set(indexKey, predecessor.sid); + + // Update in-memory state if present. + const inMemory = sessions.get(predecessor.sid); + if (inMemory) { + inMemory.headerSessionId = known.sessionId; + inMemory.headerName = known.headerName; + } + + // Persist the new header mapping immediately. + saveSessionTracking(predecessor.sid, { + headerSessionId: known.sessionId, + headerName: known.headerName, + }); + + log.info( + `session ${predecessor.sid.slice(0, 16)}: resumed via ${known.headerName} value rotation`, + ); + return { sessionID: predecessor.sid, isNew: false, tier: 1 }; + } + + // Genuinely new session — no predecessor or ambiguous concurrent sessions. const sessionID = generateSessionID(); headerSessionIndex.set(indexKey, sessionID); return { sessionID, isNew: true, tier: 1 }; diff --git a/packages/gateway/src/session.ts b/packages/gateway/src/session.ts index 8aa34e83..b8d04904 100644 --- a/packages/gateway/src/session.ts +++ b/packages/gateway/src/session.ts @@ -4,10 +4,10 @@ * Uses a 3-tier identification strategy: * * **Tier 1 — Known headers** (immediate match): + * `x-lore-session-id` (Lore plugins: OpenCode, Pi — stable, deterministic), * `x-claude-code-session-id` (Claude Code), `x-session-affinity` - * (OpenCode), `x-lore-session-id` (Pi plugin). These persist for the - * entire client session and survive model changes, compaction, and - * context rewriting. + * (OpenCode native — volatile, regenerated on restart). Checked in + * priority order; stable headers win over volatile ones. * * **Tier 2 — Learned headers** (bootstrapped via fingerprint): * During the first few fingerprinted turns, collect candidate `x-` @@ -265,9 +265,9 @@ export function detectClientType( * Checked in order — first match wins. */ export const KNOWN_SESSION_HEADERS = [ + "x-lore-session-id", // Lore plugins (stable, deterministic) — checked first "x-claude-code-session-id", // Claude Code (UUID, persists for CLI session) - "x-session-affinity", // OpenCode (nanoid, persists for session) - "x-lore-session-id", // Lore plugins (Pi, etc.) — injected via registerProvider + "x-session-affinity", // OpenCode (nanoid, volatile — regenerated on restart) ] as const; /** @@ -456,3 +456,61 @@ export function learnHeaders( return { updatedCandidates: currentCandidates, promoted }; } + +// --------------------------------------------------------------------------- +// Tier 1b: Header value rotation detection +// --------------------------------------------------------------------------- + +/** Maximum age (ms) of a session that can be considered a rotation predecessor. */ +export const ROTATION_MAX_AGE_MS = 24 * 60 * 60 * 1000; // 24 hours + +/** Information about a candidate predecessor session. */ +export interface RotationCandidate { + /** Internal Lore session ID. */ + sid: string; + /** Whether this session is a sub-agent. */ + isSubagent: boolean; + /** Wall-clock timestamp of the last request/turn (ms since epoch). */ + lastActiveAt: number; +} + +/** + * Find a rotation predecessor: an existing session previously identified via + * the same known header name whose value has changed (e.g. client restart). + * + * Returns the predecessor's session ID and old header value if exactly one + * recent, non-subagent match is found. Returns `null` if zero or multiple + * candidates exist (ambiguous — could be concurrent sessions). + * + * This is a pure function with no side effects — the caller is responsible + * for re-indexing the header mapping and persisting the change. + */ +export function findRotationPredecessor( + headerName: string, + newHeaderValue: string, + headerIndex: ReadonlyMap, + getCandidate: (sid: string) => RotationCandidate | null, + now: number = Date.now(), +): { sid: string; oldHeaderValue: string } | null { + const headerPrefix = headerName + ":"; + const newKey = headerPrefix + newHeaderValue; + let predecessor: { sid: string; oldHeaderValue: string } | null = null; + + for (const [key, sid] of headerIndex) { + if (!key.startsWith(headerPrefix)) continue; + if (key === newKey) continue; // same value — not a rotation + + const candidate = getCandidate(sid); + if (!candidate) continue; // orphaned index entry or not loadable + if (candidate.isSubagent) continue; + if (now - candidate.lastActiveAt > ROTATION_MAX_AGE_MS) continue; + + if (predecessor) { + // Multiple predecessors — ambiguous (concurrent sessions). + return null; + } + predecessor = { sid, oldHeaderValue: key.slice(headerPrefix.length) }; + } + + return predecessor; +} diff --git a/packages/gateway/test/session.test.ts b/packages/gateway/test/session.test.ts index 5c50c269..e2a785ad 100644 --- a/packages/gateway/test/session.test.ts +++ b/packages/gateway/test/session.test.ts @@ -7,6 +7,8 @@ import { scanForMarker, fingerprintMessages, extractKnownSessionHeader, + findRotationPredecessor, + ROTATION_MAX_AGE_MS, isSessionHeaderName, isIdLikeValue, @@ -14,6 +16,7 @@ import { learnHeaders, _resetGlobalHeaderValues, type HeaderCandidate, + type RotationCandidate, } from "../src/session"; // --------------------------------------------------------------------------- @@ -378,6 +381,18 @@ describe("extractKnownSessionHeader", () => { }); }); + test("prefers x-lore-session-id over x-claude-code-session-id and x-session-affinity", () => { + const result = extractKnownSessionHeader({ + "x-lore-session-id": "stable-lore-id", + "x-claude-code-session-id": "claude-uuid", + "x-session-affinity": "opencode-id", + }); + expect(result).toEqual({ + sessionId: "stable-lore-id", + headerName: "x-lore-session-id", + }); + }); + test("prefers x-claude-code-session-id over x-session-affinity", () => { const result = extractKnownSessionHeader({ "x-claude-code-session-id": "claude-uuid", @@ -649,3 +664,261 @@ describe("learnHeaders", () => { expect(r3.promoted).toBeNull(); }); }); + +// =========================================================================== +// Tier 1b: Header value rotation detection +// =========================================================================== + +describe("findRotationPredecessor", () => { + const now = Date.now(); + + /** Helper to build a simple header index map. */ + function buildIndex(entries: Array<[string, string, string]>): Map { + const index = new Map(); + for (const [headerName, headerValue, sid] of entries) { + index.set(`${headerName}:${headerValue}`, sid); + } + return index; + } + + /** Helper to build a candidate lookup function. */ + function buildLookup( + candidates: Map, + ): (sid: string) => RotationCandidate | null { + return (sid) => candidates.get(sid) ?? null; + } + + test("finds a single predecessor when header value rotates", () => { + const index = buildIndex([ + ["x-session-affinity", "old-nanoid-abc", "lore-session-123"], + ]); + const candidates = new Map([ + ["lore-session-123", { sid: "lore-session-123", isSubagent: false, lastActiveAt: now - 60_000 }], + ]); + + const result = findRotationPredecessor( + "x-session-affinity", + "new-nanoid-xyz", + index, + buildLookup(candidates), + now, + ); + + expect(result).toEqual({ + sid: "lore-session-123", + oldHeaderValue: "old-nanoid-abc", + }); + }); + + test("returns null when no predecessor exists (first session)", () => { + const index = new Map(); + const candidates = new Map(); + + const result = findRotationPredecessor( + "x-session-affinity", + "first-nanoid-abc", + index, + buildLookup(candidates), + now, + ); + + expect(result).toBeNull(); + }); + + test("returns null when multiple predecessors exist (concurrent sessions)", () => { + const index = buildIndex([ + ["x-session-affinity", "nanoid-session-a", "lore-session-A"], + ["x-session-affinity", "nanoid-session-b", "lore-session-B"], + ]); + const candidates = new Map([ + ["lore-session-A", { sid: "lore-session-A", isSubagent: false, lastActiveAt: now - 60_000 }], + ["lore-session-B", { sid: "lore-session-B", isSubagent: false, lastActiveAt: now - 60_000 }], + ]); + + const result = findRotationPredecessor( + "x-session-affinity", + "new-nanoid-xyz", + index, + buildLookup(candidates), + now, + ); + + expect(result).toBeNull(); + }); + + test("skips sub-agent sessions", () => { + const index = buildIndex([ + ["x-session-affinity", "subagent-nanoid", "lore-subagent-1"], + ]); + const candidates = new Map([ + ["lore-subagent-1", { sid: "lore-subagent-1", isSubagent: true, lastActiveAt: now - 60_000 }], + ]); + + const result = findRotationPredecessor( + "x-session-affinity", + "new-nanoid-xyz", + index, + buildLookup(candidates), + now, + ); + + expect(result).toBeNull(); + }); + + test("skips stale sessions older than 24 hours", () => { + const staleTime = now - ROTATION_MAX_AGE_MS - 1; + const index = buildIndex([ + ["x-session-affinity", "old-nanoid-abc", "lore-session-stale"], + ]); + const candidates = new Map([ + ["lore-session-stale", { sid: "lore-session-stale", isSubagent: false, lastActiveAt: staleTime }], + ]); + + const result = findRotationPredecessor( + "x-session-affinity", + "new-nanoid-xyz", + index, + buildLookup(candidates), + now, + ); + + expect(result).toBeNull(); + }); + + test("finds predecessor when stale + active sessions exist (stale filtered out)", () => { + const staleTime = now - ROTATION_MAX_AGE_MS - 1; + const index = buildIndex([ + ["x-session-affinity", "stale-nanoid", "lore-session-stale"], + ["x-session-affinity", "active-nanoid", "lore-session-active"], + ]); + const candidates = new Map([ + ["lore-session-stale", { sid: "lore-session-stale", isSubagent: false, lastActiveAt: staleTime }], + ["lore-session-active", { sid: "lore-session-active", isSubagent: false, lastActiveAt: now - 60_000 }], + ]); + + const result = findRotationPredecessor( + "x-session-affinity", + "new-nanoid-xyz", + index, + buildLookup(candidates), + now, + ); + + expect(result).toEqual({ + sid: "lore-session-active", + oldHeaderValue: "active-nanoid", + }); + }); + + test("finds predecessor when subagent + real sessions exist (subagent filtered out)", () => { + const index = buildIndex([ + ["x-session-affinity", "subagent-nanoid", "lore-subagent"], + ["x-session-affinity", "real-nanoid", "lore-session-real"], + ]); + const candidates = new Map([ + ["lore-subagent", { sid: "lore-subagent", isSubagent: true, lastActiveAt: now - 60_000 }], + ["lore-session-real", { sid: "lore-session-real", isSubagent: false, lastActiveAt: now - 60_000 }], + ]); + + const result = findRotationPredecessor( + "x-session-affinity", + "new-nanoid-xyz", + index, + buildLookup(candidates), + now, + ); + + expect(result).toEqual({ + sid: "lore-session-real", + oldHeaderValue: "real-nanoid", + }); + }); + + test("ignores entries from different header names", () => { + const index = buildIndex([ + ["x-claude-code-session-id", "claude-uuid", "lore-session-claude"], + ["x-session-affinity", "old-nanoid", "lore-session-opencode"], + ]); + const candidates = new Map([ + ["lore-session-claude", { sid: "lore-session-claude", isSubagent: false, lastActiveAt: now - 60_000 }], + ["lore-session-opencode", { sid: "lore-session-opencode", isSubagent: false, lastActiveAt: now - 60_000 }], + ]); + + // Rotating x-session-affinity should only find the opencode session, not claude + const result = findRotationPredecessor( + "x-session-affinity", + "new-nanoid-xyz", + index, + buildLookup(candidates), + now, + ); + + expect(result).toEqual({ + sid: "lore-session-opencode", + oldHeaderValue: "old-nanoid", + }); + }); + + test("skips orphaned index entries (candidate lookup returns null)", () => { + const index = buildIndex([ + ["x-session-affinity", "orphaned-nanoid", "lore-session-gone"], + ]); + // No candidates — simulates session not in memory and not in DB + const candidates = new Map(); + + const result = findRotationPredecessor( + "x-session-affinity", + "new-nanoid-xyz", + index, + buildLookup(candidates), + now, + ); + + expect(result).toBeNull(); + }); + + test("session just at 24h boundary is still valid", () => { + // Exactly at the boundary (not over) should be included + const justAtBoundary = now - ROTATION_MAX_AGE_MS; + const index = buildIndex([ + ["x-session-affinity", "old-nanoid", "lore-session-boundary"], + ]); + const candidates = new Map([ + ["lore-session-boundary", { sid: "lore-session-boundary", isSubagent: false, lastActiveAt: justAtBoundary }], + ]); + + const result = findRotationPredecessor( + "x-session-affinity", + "new-nanoid-xyz", + index, + buildLookup(candidates), + now, + ); + + // now - justAtBoundary === ROTATION_MAX_AGE_MS, which is NOT > ROTATION_MAX_AGE_MS + expect(result).toEqual({ + sid: "lore-session-boundary", + oldHeaderValue: "old-nanoid", + }); + }); + + test("session 1ms over 24h boundary is stale", () => { + const justOverBoundary = now - ROTATION_MAX_AGE_MS - 1; + const index = buildIndex([ + ["x-session-affinity", "old-nanoid", "lore-session-over"], + ]); + const candidates = new Map([ + ["lore-session-over", { sid: "lore-session-over", isSubagent: false, lastActiveAt: justOverBoundary }], + ]); + + const result = findRotationPredecessor( + "x-session-affinity", + "new-nanoid-xyz", + index, + buildLookup(candidates), + now, + ); + + expect(result).toBeNull(); + }); +}); diff --git a/packages/opencode/src/index.ts b/packages/opencode/src/index.ts index 3145940e..5aba7fe1 100644 --- a/packages/opencode/src/index.ts +++ b/packages/opencode/src/index.ts @@ -249,6 +249,9 @@ export const LorePlugin: Plugin = async (ctx) => { // For local/custom providers, inject the original upstream URL so the // gateway can forward requests to the correct endpoint. "chat.headers": async (input, output) => { + // Inject stable session ID — OpenCode's DB session ID survives restarts, + // unlike x-session-affinity (nanoid regenerated per process). + output.headers["x-lore-session-id"] = input.sessionID; output.headers["x-lore-agent"] = input.agent; // Inject project path so the gateway can attribute data correctly. output.headers["x-lore-project"] = ctx.worktree || ctx.directory;