Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 4 additions & 111 deletions index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import { createReflectionEventId } from "./src/reflection-event-store.js";
import { buildReflectionMappedMetadata } from "./src/reflection-mapped-metadata.js";
import { createMemoryCLI } from "./cli.js";
import { isNoise } from "./src/noise-filter.js";
import { normalizeAutoCaptureText } from "./src/auto-capture-cleanup.js";

// Import smart extraction & lifecycle components
import { SmartExtractor } from "./src/smart-extractor.js";
Expand Down Expand Up @@ -655,65 +656,10 @@ function shouldSkipReflectionMessage(role: string, text: string): boolean {
return false;
}

const AUTO_CAPTURE_INBOUND_META_SENTINELS = [
"Conversation info (untrusted metadata):",
"Sender (untrusted metadata):",
"Thread starter (untrusted, for context):",
"Replied message (untrusted, for context):",
"Forwarded message context (untrusted metadata):",
"Chat history since last reply (untrusted, for context):",
] as const;

const AUTO_CAPTURE_SESSION_RESET_PREFIX =
"A new session was started via /new or /reset. Execute your Session Startup sequence now";
const AUTO_CAPTURE_ADDRESSING_PREFIX_RE = /^(?:<@!?[0-9]+>|@[A-Za-z0-9_.-]+)\s*/;
const AUTO_CAPTURE_MAP_MAX_ENTRIES = 2000;
const AUTO_CAPTURE_EXPLICIT_REMEMBER_RE =
/^(?:请|請)?(?:记住|記住|记一下|記一下|别忘了|別忘了)[。.!??!]*$/u;

function isAutoCaptureInboundMetaSentinelLine(line: string): boolean {
const trimmed = line.trim();
return AUTO_CAPTURE_INBOUND_META_SENTINELS.some((sentinel) => sentinel === trimmed);
}

function stripLeadingInboundMetadata(text: string): string {
if (!text || !AUTO_CAPTURE_INBOUND_META_SENTINELS.some((sentinel) => text.includes(sentinel))) {
return text;
}

const lines = text.split("\n");
let index = 0;
while (index < lines.length && lines[index].trim() === "") {
index++;
}

while (index < lines.length && isAutoCaptureInboundMetaSentinelLine(lines[index])) {
index++;
if (index < lines.length && lines[index].trim() === "```json") {
index++;
while (index < lines.length && lines[index].trim() !== "```") {
index++;
}
if (index < lines.length && lines[index].trim() === "```") {
index++;
}
} else {
// Sentinel line not followed by a ```json fenced block — unexpected format.
// Log and return original text to avoid lossy stripping.
_autoCaptureDebugLog(
`memory-lancedb-pro: stripLeadingInboundMetadata: sentinel line not followed by json fenced block at line ${index}, returning original text`,
);
return text;
}

while (index < lines.length && lines[index].trim() === "") {
index++;
}
}

return lines.slice(index).join("\n").trim();
}

/**
* Prune a Map to stay within the given maximum number of entries.
* Deletes the oldest (earliest-inserted) keys when over the limit.
Expand All @@ -728,28 +674,6 @@ function pruneMapIfOver<K, V>(map: Map<K, V>, maxEntries: number): void {
}
}

function stripAutoCaptureSessionResetPrefix(text: string): string {
const trimmed = text.trim();
if (!trimmed.startsWith(AUTO_CAPTURE_SESSION_RESET_PREFIX)) {
return trimmed;
}

const blankLineIndex = trimmed.indexOf("\n\n");
if (blankLineIndex >= 0) {
return trimmed.slice(blankLineIndex + 2).trim();
}

const lines = trimmed.split("\n");
if (lines.length <= 2) {
return "";
}
return lines.slice(2).join("\n").trim();
}

function stripAutoCaptureAddressingPrefix(text: string): string {
return text.replace(AUTO_CAPTURE_ADDRESSING_PREFIX_RE, "").trim();
}

function isExplicitRememberCommand(text: string): boolean {
return AUTO_CAPTURE_EXPLICIT_REMEMBER_RE.test(text.trim());
}
Expand Down Expand Up @@ -779,34 +703,6 @@ function buildAutoCaptureConversationKeyFromSessionKey(sessionKey: string): stri
return suffix || null;
}

function stripAutoCaptureInjectedPrefix(role: string, text: string): string {
if (role !== "user") {
return text.trim();
}

let normalized = text.trim();
normalized = normalized.replace(/^<relevant-memories>\s*[\s\S]*?<\/relevant-memories>\s*/i, "");
normalized = normalized.replace(
/^\[UNTRUSTED DATA[^\n]*\][\s\S]*?\[END UNTRUSTED DATA\]\s*/i,
"",
);
normalized = stripAutoCaptureSessionResetPrefix(normalized);
normalized = stripLeadingInboundMetadata(normalized);
normalized = stripAutoCaptureAddressingPrefix(normalized);
return normalized.trim();
}

/** Module-level debug logger for auto-capture helpers; set during plugin registration. */
let _autoCaptureDebugLog: (msg: string) => void = () => { };

function normalizeAutoCaptureText(role: unknown, text: string): string | null {
if (typeof role !== "string") return null;
const normalized = stripAutoCaptureInjectedPrefix(role, text);
if (!normalized) return null;
if (shouldSkipReflectionMessage(role, normalized)) return null;
return normalized;
}

function redactSecrets(text: string): string {
const patterns: RegExp[] = [
/Bearer\s+[A-Za-z0-9\-._~+/]+=*/g,
Expand Down Expand Up @@ -1886,9 +1782,6 @@ const memoryLanceDBProPlugin = {
const autoCapturePendingIngressTexts = new Map<string, string[]>();
const autoCaptureRecentTexts = new Map<string, string[]>();

// Wire up the module-level debug logger for pure helper functions.
_autoCaptureDebugLog = (msg: string) => api.logger.debug(msg);

api.logger.info(
`memory-lancedb-pro@${pluginVersion}: plugin registered (db: ${resolvedDbPath}, model: ${config.embedding.model || "text-embedding-3-small"}, smartExtraction: ${smartExtractor ? 'ON' : 'OFF'})`
);
Expand All @@ -1899,7 +1792,7 @@ const memoryLanceDBProPlugin = {
ctx.channelId,
ctx.conversationId,
);
const normalized = normalizeAutoCaptureText("user", event.content);
const normalized = normalizeAutoCaptureText("user", event.content, shouldSkipReflectionMessage);
if (conversationKey && normalized) {
const queue = autoCapturePendingIngressTexts.get(conversationKey) || [];
queue.push(normalized);
Expand Down Expand Up @@ -2127,7 +2020,7 @@ const memoryLanceDBProPlugin = {
const content = msgObj.content;

if (typeof content === "string") {
const normalized = normalizeAutoCaptureText(role, content);
const normalized = normalizeAutoCaptureText(role, content, shouldSkipReflectionMessage);
if (!normalized) {
skippedAutoCaptureTexts++;
} else {
Expand All @@ -2147,7 +2040,7 @@ const memoryLanceDBProPlugin = {
typeof (block as Record<string, unknown>).text === "string"
) {
const text = (block as Record<string, unknown>).text as string;
const normalized = normalizeAutoCaptureText(role, text);
const normalized = normalizeAutoCaptureText(role, text, shouldSkipReflectionMessage);
if (!normalized) {
skippedAutoCaptureTexts++;
} else {
Expand Down
94 changes: 94 additions & 0 deletions src/auto-capture-cleanup.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
const AUTO_CAPTURE_INBOUND_META_SENTINELS = [
"Conversation info (untrusted metadata):",
"Sender (untrusted metadata):",
"Thread starter (untrusted, for context):",
"Replied message (untrusted, for context):",
"Forwarded message context (untrusted metadata):",
"Chat history since last reply (untrusted, for context):",
] as const;

const AUTO_CAPTURE_SESSION_RESET_PREFIX =
"A new session was started via /new or /reset. Execute your Session Startup sequence now";
const AUTO_CAPTURE_ADDRESSING_PREFIX_RE = /^(?:<@!?[0-9]+>|@[A-Za-z0-9_.-]+)\s*/;
const AUTO_CAPTURE_SYSTEM_EVENT_LINE_RE = /^System:\s*\[[^\n]*?\]\s*Exec\s+(?:completed|failed|started)\b.*$/gim;

function escapeRegExp(value: string): string {
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}

const AUTO_CAPTURE_INBOUND_META_BLOCK_RE = new RegExp(
String.raw`(?:^|\n)\s*(?:${AUTO_CAPTURE_INBOUND_META_SENTINELS.map((sentinel) => escapeRegExp(sentinel)).join("|")})\s*\n\`\`\`json[\s\S]*?\n\`\`\`\s*`,
"g",
);

function stripLeadingInboundMetadata(text: string): string {
if (!text) {
return text;
}

let normalized = text;
for (let i = 0; i < 6; i++) {
const before = normalized;
normalized = normalized.replace(AUTO_CAPTURE_SYSTEM_EVENT_LINE_RE, "\n");
normalized = normalized.replace(AUTO_CAPTURE_INBOUND_META_BLOCK_RE, "\n");
normalized = normalized.replace(/\n{3,}/g, "\n\n").trim();
if (normalized === before.trim()) {
break;
}
}

return normalized.trim();
}

function stripAutoCaptureSessionResetPrefix(text: string): string {
const trimmed = text.trim();
if (!trimmed.startsWith(AUTO_CAPTURE_SESSION_RESET_PREFIX)) {
return trimmed;
}

const blankLineIndex = trimmed.indexOf("\n\n");
if (blankLineIndex >= 0) {
return trimmed.slice(blankLineIndex + 2).trim();
}

const lines = trimmed.split("\n");
if (lines.length <= 2) {
return "";
}
return lines.slice(2).join("\n").trim();
}

function stripAutoCaptureAddressingPrefix(text: string): string {
return text.replace(AUTO_CAPTURE_ADDRESSING_PREFIX_RE, "").trim();
}

export function stripAutoCaptureInjectedPrefix(role: string, text: string): string {
if (role !== "user") {
return text.trim();
}

let normalized = text.trim();
normalized = normalized.replace(/<relevant-memories>\s*[\s\S]*?<\/relevant-memories>\s*/gi, "");
normalized = normalized.replace(
/\[UNTRUSTED DATA[^\n]*\][\s\S]*?\[END UNTRUSTED DATA\]\s*/gi,
"",
);
normalized = stripAutoCaptureSessionResetPrefix(normalized);
normalized = stripLeadingInboundMetadata(normalized);
normalized = stripAutoCaptureAddressingPrefix(normalized);
normalized = stripLeadingInboundMetadata(normalized);
normalized = normalized.replace(/\n{3,}/g, "\n\n");
return normalized.trim();
}

export function normalizeAutoCaptureText(
role: unknown,
text: string,
shouldSkipMessage?: (role: string, text: string) => boolean,
): string | null {
if (typeof role !== "string") return null;
const normalized = stripAutoCaptureInjectedPrefix(role, text);
if (!normalized) return null;
if (shouldSkipMessage?.(role, normalized)) return null;
return normalized;
}
Loading
Loading