Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 33 additions & 23 deletions index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -709,36 +709,45 @@ function stripLeadingInboundMetadata(text: string): string {
}

const lines = text.split("\n");
const outputLines: string[] = [];
let index = 0;
while (index < lines.length && lines[index].trim() === "") {
index++;
}

while (index < lines.length && isAutoCaptureInboundMetaSentinelLine(lines[index])) {
index++;
if (index < lines.length && lines[index].trim() === "```json") {
while (index < lines.length) {
if (isAutoCaptureInboundMetaSentinelLine(lines[index])) {
// Found a sentinel line — try to consume it and its ```json block.
const sentinelStart = index;
index++;
while (index < lines.length && lines[index].trim() !== "```") {
index++;
}
if (index < lines.length && lines[index].trim() === "```") {
if (index < lines.length && lines[index].trim() === "```json") {
index++;
while (index < lines.length && lines[index].trim() !== "```") {
index++;
}
if (index < lines.length && lines[index].trim() === "```") {
index++;
}
// Skip trailing blank lines after the metadata block
while (index < lines.length && lines[index].trim() === "") {
index++;
}
// Successfully consumed a sentinel + JSON block — continue scanning.
continue;
} else {
// Sentinel line not followed by a ```json fenced block — unexpected format.
// Log and keep the sentinel line to avoid lossy stripping.
_autoCaptureDebugLog(
`memory-lancedb-pro: stripLeadingInboundMetadata: sentinel line not followed by json fenced block at line ${sentinelStart}, keeping it`,
);
// Push the sentinel line we consumed and continue from current index
outputLines.push(lines[sentinelStart]);
continue;
}
} else {
// Sentinel line not followed by a ```json fenced block — unexpected format.
// Log and return original text to avoid lossy stripping.
_autoCaptureDebugLog(
`memory-lancedb-pro: stripLeadingInboundMetadata: sentinel line not followed by json fenced block at line ${index}, returning original text`,
);
return text;
}

while (index < lines.length && lines[index].trim() === "") {
index++;
}
outputLines.push(lines[index]);
index++;
}

return lines.slice(index).join("\n").trim();
return outputLines.join("\n").trim();
}

/**
Expand Down Expand Up @@ -1307,8 +1316,9 @@ const CAPTURE_EXCLUDE_PATTERNS = [
export function shouldCapture(text: string): boolean {
let s = text.trim();

// Strip OpenClaw metadata headers (Conversation info or Sender)
const metadataPattern = /^(Conversation info|Sender) \(untrusted metadata\):[\s\S]*?\n\s*\n/gim;
// Strip OpenClaw metadata headers (all 6 sentinel labels).
// Some sentinels use "(untrusted metadata):" and others use "(untrusted, for context):".
const metadataPattern = /^(Conversation info|Sender|Thread starter|Replied message|Forwarded message context|Chat history since last reply) \(untrusted(?:,? (?:metadata|for context))\):[\s\S]*?\n\s*\n/gim;
s = s.replace(metadataPattern, "");

// CJK characters carry more meaning per character, use lower minimum threshold
Expand Down
6 changes: 5 additions & 1 deletion src/smart-extractor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@ import {
* - "System: [YYYY-MM-DD HH:MM:SS GMT+N] Channel[account] ..." header lines
* - "Conversation info (untrusted metadata):" + JSON code blocks
* - "Sender (untrusted metadata):" + JSON code blocks
* - "Thread starter (untrusted, for context):" + JSON code blocks
* - "Replied message (untrusted, for context):" + JSON code blocks
* - "Forwarded message context (untrusted metadata):" + JSON code blocks
* - "Chat history since last reply (untrusted, for context):" + JSON code blocks
* - Standalone JSON blocks containing message_id/sender_id fields
*/
export function stripEnvelopeMetadata(text: string): string {
Expand All @@ -76,8 +79,9 @@ export function stripEnvelopeMetadata(text: string): string {

// 2. Strip labeled metadata sections with their JSON code blocks
// e.g. "Conversation info (untrusted metadata):\n```json\n{...}\n```"
// All 6 sentinel labels from AUTO_CAPTURE_INBOUND_META_SENTINELS:
cleaned = cleaned.replace(
/(?:Conversation info|Sender|Replied message)\s*\(untrusted[^)]*\):\s*```json\s*\{[\s\S]*?\}\s*```/g,
/(?:Conversation info|Sender|Thread starter|Replied message|Forwarded message context|Chat history since last reply)\s*\(untrusted[^)]*\):\s*```json\s*\{[\s\S]*?\}\s*```/g,
"",
);

Expand Down
Loading