Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 21 additions & 45 deletions .lore.md

Large diffs are not rendered by default.

128 changes: 75 additions & 53 deletions packages/core/eval/baselines.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,56 +121,85 @@ Conversation to summarize:
/**
* Simulate compaction: LLM-summarize the prefix that falls outside
* the tail window, then return summary + tail.
*
* Iterative: when the total exceeds `compactionThreshold`, compact the prefix
* and check again. Real tools (Claude Code) auto-compact at ~83.5% of the
* context window, and a 400K session triggers 2-3 compaction cycles. Each
* cycle replaces the prefix with a summary, losing more detail.
*/
export async function compactionBaseline(
turns: ConversationTurn[],
tailBudgetTokens: number = 80_000,
llm: EvalLLMClient,
modelContextWindow: number = 200_000,
): Promise<string> {
const total = totalTokens(turns);

// If everything fits, no compaction needed
if (total <= tailBudgetTokens) {
return renderConversation(turns);
}

// Find the tail window cutoff
let tailTokens = 0;
let cutoff = turns.length;
for (let i = turns.length - 1; i >= 0; i--) {
const turnTokens =
turns[i].tokens ?? estimateTokens(renderTurn(turns[i]));
if (tailTokens + turnTokens > tailBudgetTokens) {
cutoff = i + 1;
break;
// Match Claude Code's autoCompactThreshold: effectiveContextWindow * 0.835
const compactionThreshold = Math.floor(
(modelContextWindow - Math.min(32_000, modelContextWindow * 0.15)) * 0.835,
);
const maxCompactions = 4; // safety cap
let currentTurns = turns;
let compactionCount = 0;

while (compactionCount < maxCompactions) {
const total = totalTokens(currentTurns);

// If everything fits within the threshold (or within the tail budget
// on the first pass), no more compaction needed.
if (compactionCount > 0 && total <= compactionThreshold) break;
if (total <= tailBudgetTokens) break;

// Find the tail window cutoff
let tailTokens = 0;
let cutoff = currentTurns.length;
for (let i = currentTurns.length - 1; i >= 0; i--) {
const turnTokens =
currentTurns[i].tokens ?? estimateTokens(renderTurn(currentTurns[i]));
if (tailTokens + turnTokens > tailBudgetTokens) {
cutoff = i + 1;
break;
}
tailTokens += turnTokens;
if (i === 0) cutoff = 0;
}
tailTokens += turnTokens;
if (i === 0) cutoff = 0;
}

const prefix = turns.slice(0, cutoff);
const tail = turns.slice(cutoff);

if (prefix.length === 0) {
return renderConversation(tail);
const prefix = currentTurns.slice(0, cutoff);
const tail = currentTurns.slice(cutoff);

if (prefix.length === 0) break;

// Summarize the prefix via LLM
const prefixText = renderConversation(prefix);
const userPrompt = COMPACTION_USER_TEMPLATE.replace(
"{{conversation}}",
prefixText,
);

const result = await llm.prompt(COMPACTION_SYSTEM, userPrompt, {
maxTokens: 4096,
temperature: 0,
});

// Replace prefix with a synthetic summary turn + keep tail
const summaryTurn: ConversationTurn = {
role: "assistant",
content: [{ type: "text", text: `## Compacted Summary (pass ${compactionCount + 1})\n\n${result.text}` }],
tokens: estimateTokens(result.text),
};
currentTurns = [summaryTurn, ...tail];
compactionCount++;

console.log(
` [compaction] pass ${compactionCount}: ${prefix.length} turns summarized → ${estimateTokens(result.text)} tok, ${currentTurns.length} turns remaining (${totalTokens(currentTurns)} tok)`,
);
}

// Summarize the prefix via LLM
const prefixText = renderConversation(prefix);
const userPrompt = COMPACTION_USER_TEMPLATE.replace(
"{{conversation}}",
prefixText,
);

const result = await llm.prompt(COMPACTION_SYSTEM, userPrompt, {
maxTokens: 4096,
temperature: 0,
});
// Final render
if (compactionCount === 0) {
return renderConversation(currentTurns);
}

return (
`## Compacted Summary of Earlier Conversation\n\n${result.text}\n\n` +
`---\n\n## Recent Conversation\n\n${renderConversation(tail)}`
);
return renderConversation(currentTurns);
}

// ---------------------------------------------------------------------------
Expand Down Expand Up @@ -226,26 +255,19 @@ export function memoryOnlyConfigOverrides(): Record<string, unknown> {
export function buildQAPrompt(
context: string,
question: string,
mode: "baseline" | "lore",
_mode: "baseline" | "lore",
): string {
const preamble =
mode === "lore"
? "Here are distilled observations and knowledge from a coding session. " +
"If the observations don't have enough detail, use the recall tool to search for it."
: "Here is context from a past coding session.";

return (
`${preamble}\n\n${context}\n\n` +
`Here is context from a past coding session.\n\n${context}\n\n` +
`Question: ${question}\n\n` +
`Answer concisely and specifically. Include exact values, file paths, and names when known.`
);
}

export const QA_SYSTEM =
"You are answering questions about past coding sessions. " +
"You have a recall tool available — USE IT to search your memory for specific details " +
"(file paths, branch names, error messages, version numbers, test counts, etc.). " +
"Always invoke recall before answering unless the answer is already in your system context. " +
"When recall returns results with source IDs (t:xxx), you can recall those IDs to get " +
"the full original message with exact details. " +
"Be specific and factual. If you don't have enough information even after recall, say so.";
"Do your best to come up with the exact and correct answer. " +
"Use all the tools available to you to find it. " +
"Be specific and factual — include exact file paths, error messages, " +
"version numbers, and names when known. " +
"If you don't have enough information, say so.";
17 changes: 17 additions & 0 deletions packages/core/eval/harness.ts
Original file line number Diff line number Diff line change
Expand Up @@ -832,6 +832,23 @@ export async function runScenario(
}
}

// Backfill embeddings for distillations and temporal messages created
// during replay. The startup backfill runs before any content exists;
// this ensures vector search works for QA questions.
if (gateway.isReal !== false) {
try {
const { embedding } = await import("@loreai/core");
const kn = await embedding.backfillEmbeddings();
const dist = await embedding.backfillDistillationEmbeddings();
console.log(
` [embedding] post-replay backfill: ${kn} knowledge, ${dist} distillations` +
` (available=${embedding.isAvailable()})`,
);
} catch (err) {
console.warn(" Warning: post-replay embedding backfill failed:", err);
}
}

// Collect all turns across sessions for baseline context building
const allTurns = scenario.sessions.flatMap((s) => s.turns);

Expand Down
16 changes: 8 additions & 8 deletions packages/core/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,9 @@ export const LoreConfig = z.object({
* Default: 1.5. Set to 1.0 to disable. */
vectorBoostWeight: z.number().min(1).max(5).default(1.5),
/** Minimum meaningful query terms (after stopword removal) to activate
* vector boost. Short keyword queries (1-2 terms) are left unweighted
* since BM25 excels there. Default: 3. */
vectorBoostMinTerms: z.number().min(1).max(10).default(3),
* vector boost. Single-term queries are left unweighted since BM25
* excels there. Default: 2. */
vectorBoostMinTerms: z.number().min(1).max(10).default(2),
/** Vector embedding search.
* Supports multiple providers:
* - "local" (default): @huggingface/transformers + nomic-embed-text-v1.5, no API key needed.
Expand Down Expand Up @@ -184,25 +184,25 @@ export const LoreConfig = z.object({
recall: z
.object({
/** Total character budget for recall output. Controls how much context the
* recall results consume. ~2K tokens at 8000 chars. Default: 8000. */
charBudget: z.number().min(2000).max(20000).default(8000),
* recall results consume. ~3K tokens at 12000 chars. Default: 12000. */
charBudget: z.number().min(2000).max(20000).default(12000),
/** Minimum RRF score relative to top result. Results below
* topScore * relevanceFloor are dropped. Default: 0.15.
* Set to 0 to disable score-based cutoff. */
relevanceFloor: z.number().min(0).max(1).default(0.15),
/** Max results to show in recall output. Default: 15. */
maxResults: z.number().min(3).max(30).default(15),
})
.default({ charBudget: 8000, relevanceFloor: 0.15, maxResults: 15 }),
.default({ charBudget: 12000, relevanceFloor: 0.15, maxResults: 15 }),
})
.default({
ftsWeights: { title: 6.0, content: 2.0, category: 3.0 },
recallLimit: 10,
queryExpansion: true,
vectorBoostWeight: 1.5,
vectorBoostMinTerms: 3,
vectorBoostMinTerms: 2,
embeddings: { enabled: true, provider: "local" as const, model: "nomic-ai/nomic-embed-text-v1.5", dimensions: 768 },
recall: { charBudget: 8000, relevanceFloor: 0.15, maxResults: 15 },
recall: { charBudget: 12000, relevanceFloor: 0.15, maxResults: 15 },
}),
cache: z
.object({
Expand Down
57 changes: 52 additions & 5 deletions packages/core/src/gradient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,9 @@ type Distillation = {
token_count: number;
created_at: number;
session_id: string;
r_compression: number | null;
c_norm: number | null;
source_ids: string[];
};

// Load non-archived distillations for the in-context prefix.
Expand All @@ -694,12 +697,16 @@ function loadDistillations(
): Distillation[] {
const pid = ensureProject(projectPath);
const query = sessionID
? "SELECT id, observations, generation, token_count, created_at, session_id FROM distillations WHERE project_id = ? AND session_id = ? AND archived = 0 ORDER BY created_at ASC"
: "SELECT id, observations, generation, token_count, created_at, session_id FROM distillations WHERE project_id = ? AND archived = 0 ORDER BY created_at ASC";
? "SELECT id, observations, generation, token_count, created_at, session_id, r_compression, c_norm, source_ids FROM distillations WHERE project_id = ? AND session_id = ? AND archived = 0 ORDER BY created_at ASC"
: "SELECT id, observations, generation, token_count, created_at, session_id, r_compression, c_norm, source_ids FROM distillations WHERE project_id = ? AND archived = 0 ORDER BY created_at ASC";
const params = sessionID ? [pid, sessionID] : [pid];
return db()
const rows = db()
.query(query)
.all(...params) as Distillation[];
.all(...params) as Array<Omit<Distillation, "source_ids"> & { source_ids: string }>;
return rows.map((r) => ({
...r,
source_ids: r.source_ids ? JSON.parse(r.source_ids) : [],
}));
}

// Cached distillation loader — avoids hitting the DB on every transform() call.
Expand Down Expand Up @@ -1480,8 +1487,21 @@ function tryFitStable(input: {
const windowSize = rawWindowCache!.pinnedRawCount + newMessages;
const pinnedIdx = Math.max(0, input.messages.length - windowSize);

// Ensure the pinned window starts with a user message when a prefix is
// present — the prefix ends with assistant so a leading assistant in the
// raw window would create back-to-back assistants (#424).
let adjustedPinnedIdx = pinnedIdx;
if (input.prefix.length > 0) {
while (
adjustedPinnedIdx < input.messages.length &&
input.messages[adjustedPinnedIdx].info.role === "assistant"
) {
adjustedPinnedIdx++;
}
}

// Measure the token cost of the pinned window.
const pinnedWindow = input.messages.slice(pinnedIdx);
const pinnedWindow = input.messages.slice(adjustedPinnedIdx);
const pinnedTokens = pinnedWindow.reduce(
(sum, m) => sum + estimateMessage(m),
0,
Expand Down Expand Up @@ -1961,6 +1981,17 @@ function transformInner(input: {
olderTokens += est;
}

// Ensure role alternation at the prefix/raw boundary: drop leading assistant
// messages from the older tail so the raw window starts with user (#424).
while (
olderMessages.length > 0 &&
nuclearPrefix.length > 0 &&
olderMessages[0].info.role === "assistant"
) {
olderTokens -= estimateMessage(olderMessages[0]);
olderMessages.shift();
}

const nuclearRaw = [...olderMessages, ...currentTurn];
const nuclearRawTokens = olderTokens + currentTurnTokens;

Expand Down Expand Up @@ -2134,6 +2165,22 @@ function tryFit(input: {
if (i === 0) cutoff = 0;
}

// Ensure role alternation at the prefix/raw boundary: the distilled prefix
// ends with an assistant message, so the raw window must start with a user.
// The backward budget scan is purely token-based and can land on any role.
// If the cutoff produces a raw window starting with assistant(s), advance it
// past them — otherwise loreMessagesToGateway produces back-to-back assistants
// and the API rejects with "tool_use ids found without tool_result" (#424).
if (input.prefix.length > 0) {
while (
cutoff < olderMessages.length &&
olderMessages[cutoff].info.role === "assistant"
) {
olderTokens -= estimateMessage(olderMessages[cutoff]);
cutoff++;
}
}

const rawMessages = [...olderMessages.slice(cutoff), ...currentTurn];
const rawTokens = olderTokens + currentTurnTokens;

Expand Down
29 changes: 27 additions & 2 deletions packages/core/src/prompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -478,10 +478,16 @@ Produce update/delete ops to reduce entry count to at most ${input.targetMax}. P

// Format distillations for injection into the message context.
// Observations are plain event-log text — inject them directly under a header.
// Optional metadata (id, r_compression, source_ids) adds drill-down hints so
// the model knows how lossy each distillation is and can use recall to fetch
// the full original messages.
export function formatDistillations(
distillations: Array<{
observations: string;
generation: number;
id?: string;
r_compression?: number | null;
source_ids?: string[];
}>,
): string {
if (!distillations.length) return "";
Expand All @@ -493,20 +499,39 @@ export function formatDistillations(
if (meta.length) {
sections.push("### Earlier Work (summarized)");
for (const d of meta) {
sections.push(d.observations.trim());
sections.push(formatOneDistillation(d));
}
}

if (recent.length) {
sections.push("### Recent Work (distilled)");
for (const d of recent) {
sections.push(d.observations.trim());
sections.push(formatOneDistillation(d));
}
}

return sections.join("\n\n");
}

/** Render a single distillation with optional metadata header. */
function formatOneDistillation(d: {
observations: string;
id?: string;
r_compression?: number | null;
source_ids?: string[];
}): string {
if (!d.id) return d.observations.trim();

const lossy = d.r_compression != null && d.r_compression < 1.0;
const sourceCount = d.source_ids?.length ?? 0;
const meta = [
`d:${d.id}`,
lossy ? "lossy" : null,
sourceCount > 0 ? `${sourceCount} source${sourceCount > 1 ? "s" : ""}` : null,
].filter(Boolean).join(" | ");
return `(${meta})\n${d.observations.trim()}`;
}

// Strict Markdown skeleton for the /compact session summary. Task-oriented
// sections so the next agent starting from the compacted context has a clear
// "where am I, what's next, what's blocked" briefing. Derived from upstream
Expand Down
Loading
Loading