From aa84960108b57274dc4322196636c5aa59ff1792 Mon Sep 17 00:00:00 2001 From: Steve Paltridge Date: Fri, 24 Apr 2026 03:05:56 -0600 Subject: [PATCH 1/4] docs: SDK launch polish + examples folder - README: PyPI + npm badges, multi-stack quickstart (curl + pip + npm) using real X-API-Key + string tags - examples/python/agent_memory.py: end-to-end remember/recall/checkpoint, validated against ghcr.io/recallworks/recall:0.1.0 - examples/typescript/agent_memory.ts: same flow, async; validated end-to-end - examples/bash/curl_round_trip.sh: SDK-free raw HTTP - examples/README.md: index --- README.md | 31 ++++++++++++++++-- examples/README.md | 34 ++++++++++++++++++++ examples/bash/curl_round_trip.sh | 29 +++++++++++++++++ examples/python/agent_memory.py | 49 +++++++++++++++++++++++++++++ examples/typescript/agent_memory.ts | 46 +++++++++++++++++++++++++++ 5 files changed, 187 insertions(+), 2 deletions(-) create mode 100644 examples/README.md create mode 100644 examples/bash/curl_round_trip.sh create mode 100644 examples/python/agent_memory.py create mode 100644 examples/typescript/agent_memory.ts diff --git a/README.md b/README.md index 856eec0..a2d4601 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,8 @@ [![Tests](https://github.com/RecallWorks/Recall/actions/workflows/test.yml/badge.svg)](https://github.com/RecallWorks/Recall/actions/workflows/test.yml) [![Docker](https://github.com/RecallWorks/Recall/actions/workflows/docker.yml/badge.svg)](https://github.com/RecallWorks/Recall/actions/workflows/docker.yml) +[![PyPI](https://img.shields.io/pypi/v/recall-client?label=pypi%3A%20recall-client&logo=pypi&logoColor=white)](https://pypi.org/project/recall-client/) +[![npm](https://img.shields.io/npm/v/@recallworks/recall-client?label=npm%3A%20%40recallworks%2Frecall-client&logo=npm&logoColor=white)](https://www.npmjs.com/package/@recallworks/recall-client) [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) [![Python 3.11+](https://img.shields.io/badge/python-3.11%2B-blue?logo=python&logoColor=white)](pyproject.toml) [![MCP](https://img.shields.io/badge/MCP-compatible-7c3aed)](https://modelcontextprotocol.io) @@ -34,6 +36,8 @@ ## Five-minute install +**1. Run the server:** + ```bash docker run -d --name recall \ -p 8787:8787 \ @@ -42,13 +46,36 @@ docker run -d --name recall \ ghcr.io/recallworks/recall:latest ``` +**2. Talk to it — pick your stack:** + ```bash -curl -H "Authorization: Bearer changeme" \ +# Raw HTTP (any language) +curl -H "X-API-Key: changeme" \ -H "Content-Type: application/json" \ - -d '{"content":"first memory","tags":["hello"]}' \ + -d '{"content":"first memory","tags":"hello"}' \ http://localhost:8787/tool/remember ``` +```python +# Python +pip install recall-client + +from recall_client import RecallClient +with RecallClient("http://localhost:8787", api_key="changeme") as c: + c.remember("first memory", tags="hello") + print(c.recall("memory").result) +``` + +```ts +// TypeScript / JavaScript (Node 18+, Bun, Deno, browser) +npm install @recallworks/recall-client + +import { RecallClient } from "@recallworks/recall-client"; +const c = new RecallClient({ baseUrl: "http://localhost:8787", apiKey: "changeme" }); +await c.remember("first memory", { tags: "hello" }); +console.log((await c.recall("memory")).result); +``` + Full walkthrough: [docs/quickstart.md](docs/quickstart.md). --- diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..0896c5d --- /dev/null +++ b/examples/README.md @@ -0,0 +1,34 @@ +# Recall examples + +Tiny end-to-end programs you can copy-paste. Each one assumes a Recall server running locally: + +```bash +docker run -d --name recall -p 8787:8787 -e API_KEY=changeme \ + -v recall-data:/data ghcr.io/recallworks/recall:latest +``` + +| Example | Stack | What it shows | +|---|---|---| +| [`python/agent_memory.py`](python/agent_memory.py) | Python 3.11+ | Remember session facts, recall by query, checkpoint at the end | +| [`typescript/agent_memory.ts`](typescript/agent_memory.ts) | Node 18+ / Bun / Deno | Same flow, async client | +| [`bash/curl_round_trip.sh`](bash/curl_round_trip.sh) | curl | Raw HTTP — no SDK needed | + +## Run a Python example + +```bash +pip install recall-client +python examples/python/agent_memory.py +``` + +## Run a TypeScript example + +```bash +npm install @recallworks/recall-client +npx tsx examples/typescript/agent_memory.ts +``` + +## Run the curl example + +```bash +bash examples/bash/curl_round_trip.sh +``` diff --git a/examples/bash/curl_round_trip.sh b/examples/bash/curl_round_trip.sh new file mode 100644 index 0000000..c0cc444 --- /dev/null +++ b/examples/bash/curl_round_trip.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# @wbx-modified copilot-a3f7·MTN | 2026-04-24 | raw HTTP round trip — no SDK +# +# Run a server first: +# docker run -d -p 8787:8787 -e API_KEY=changeme \ +# -v recall-data:/data ghcr.io/recallworks/recall:latest + +set -euo pipefail + +URL="${RECALL_URL:-http://localhost:8787}" +KEY="${RECALL_KEY:-changeme}" + +echo "== health ==" +curl -s "$URL/health" +echo + +echo "== remember ==" +curl -s -X POST "$URL/tool/remember" \ + -H "X-API-Key: $KEY" \ + -H "Content-Type: application/json" \ + -d '{"content":"the project deadline is 2026-05-15","source":"project","tags":"deadline"}' +echo + +echo "== recall ==" +curl -s -X POST "$URL/tool/recall" \ + -H "X-API-Key: $KEY" \ + -H "Content-Type: application/json" \ + -d '{"query":"when is the deadline","n":3}' +echo diff --git a/examples/python/agent_memory.py b/examples/python/agent_memory.py new file mode 100644 index 0000000..bca2b27 --- /dev/null +++ b/examples/python/agent_memory.py @@ -0,0 +1,49 @@ +# @wbx-modified copilot-a3f7·MTN | 2026-04-24 | minimal Recall agent-memory example +"""End-to-end agent memory: remember, recall, checkpoint. + +Run a server first: + docker run -d -p 8787:8787 -e API_KEY=changeme \ + -v recall-data:/data ghcr.io/recallworks/recall:latest + +Then: + pip install recall-client + python agent_memory.py +""" +from __future__ import annotations + +import os + +from recall_client import RecallClient + +URL = os.environ.get("RECALL_URL", "http://localhost:8787") +KEY = os.environ.get("RECALL_KEY", "changeme") + + +def main() -> None: + with RecallClient(URL, api_key=KEY) as c: + # 0. Health + print("server:", c.health()) + + # 1. Store a few facts the agent should remember across sessions. + c.remember("user prefers dark mode", source="prefs", tags="ui,dark-mode") + c.remember("project deadline is 2026-05-15", source="project", tags="deadline") + c.remember("lead engineer is Jamie (jamie@example.com)", source="people", tags="contact") + + # 2. Pull them back semantically. + hits = c.recall("when is the deadline", n=3) + print("\nrecall: when is the deadline") + print(hits.result) + + # 3. End-of-session checkpoint so the next agent can pick up. + cp = c.checkpoint( + intent="onboard new agent to the project", + established="user prefers dark mode; deadline 2026-05-15; lead is Jamie", + pursuing="prepare kickoff doc draft", + open_questions="which team channel does Jamie use?", + session="e0a1", + ) + print("\ncheckpoint:", cp.result.splitlines()[0]) + + +if __name__ == "__main__": + main() diff --git a/examples/typescript/agent_memory.ts b/examples/typescript/agent_memory.ts new file mode 100644 index 0000000..e1cde7f --- /dev/null +++ b/examples/typescript/agent_memory.ts @@ -0,0 +1,46 @@ +// @wbx-modified copilot-a3f7·MTN | 2026-04-24 | minimal Recall agent-memory example (TS) +// +// Run a server first: +// docker run -d -p 8787:8787 -e API_KEY=changeme \ +// -v recall-data:/data ghcr.io/recallworks/recall:latest +// +// Then: +// npm install @recallworks/recall-client +// npx tsx agent_memory.ts + +import { RecallClient } from "@recallworks/recall-client"; + +const URL = process.env.RECALL_URL ?? "http://localhost:8787"; +const KEY = process.env.RECALL_KEY ?? "changeme"; + +async function main() { + const c = new RecallClient({ baseUrl: URL, apiKey: KEY }); + + // 0. Health + console.log("server:", await c.health()); + + // 1. Store a few facts the agent should remember across sessions. + await c.remember("user prefers dark mode", { source: "prefs", tags: "ui,dark-mode" }); + await c.remember("project deadline is 2026-05-15", { source: "project", tags: "deadline" }); + await c.remember("lead engineer is Jamie (jamie@example.com)", { source: "people", tags: "contact" }); + + // 2. Pull them back semantically. + const hits = await c.recall("when is the deadline", { n: 3 }); + console.log("\nrecall: when is the deadline"); + console.log(hits.result); + + // 3. End-of-session checkpoint so the next agent can pick up. + const cp = await c.checkpoint({ + intent: "onboard new agent to the project", + established: "user prefers dark mode; deadline 2026-05-15; lead is Jamie", + pursuing: "prepare kickoff doc draft", + openQuestions: "which team channel does Jamie use?", + session: "e0a1", + }); + console.log("\ncheckpoint:", cp.result.split("\n")[0]); +} + +main().catch((err) => { + console.error(err); + process.exit(1); +}); From 3f6126bc5db5ea27dcb0083ce7386909bc25bf9d Mon Sep 17 00:00:00 2001 From: Steve Paltridge Date: Mon, 27 Apr 2026 22:51:23 -0600 Subject: [PATCH 2/4] =?UTF-8?q?feat:=200.3.3=20=E2=80=94=20MCP=20stdio=20e?= =?UTF-8?q?ntry,=20/sse=20+=20/mcp=20transports,=20Voyage=20embedder?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - New stdio MCP entry point (recall-mcp console script) for Claude Desktop/Cursor/Cline/Continue.dev - Mount FastMCP /sse + /messages/ + /mcp routes alongside HTTP API in app.py - Add VoyageEmbedder backend (parity with marketing claims) - Add recall_filtered, answer, backfill_epoch tools - Bump Docker image to install [llm,mcp] extras - Add docs/mcp.md integration guide - Add docs/{gated-workstreams,license-boundary,outside-counsel-checklist}.md - Add TypeScript examples scaffold - Add OSS->enterprise import isolation test - Apply trade mark to README H1 82/82 tests pass. Live image: ghcr.io/recallworks/recall:0.3.3 --- .gitignore | 4 + README.md | 4 +- docker/single-tenant/Dockerfile | 12 +- docs/gated-workstreams.md | 34 ++ docs/license-boundary.md | 68 ++++ docs/mcp.md | 160 +++++++++ docs/outside-counsel-checklist.md | 80 +++++ examples/README.md | 10 + examples/typescript/.gitignore | 3 + examples/typescript/package.json | 22 ++ examples/typescript/tsconfig.json | 14 + pyproject.toml | 9 +- src/recall/__init__.py | 2 +- src/recall/app.py | 35 +- src/recall/chunking.py | 3 + src/recall/embedder.py | 53 ++- src/recall/mcp_stdio.py | 119 +++++++ src/recall/store.py | 18 ++ src/recall/tools/__init__.py | 9 +- src/recall/tools/answer.py | 262 +++++++++++++++ src/recall/tools/backfill.py | 74 +++++ src/recall/tools/checkpoint.py | 2 + src/recall/tools/recall.py | 115 ++++--- src/recall/tools/recall_filtered.py | 328 +++++++++++++++++++ src/recall/tools/reflect.py | 4 + src/recall/tools/remember.py | 2 + src/recall/transport/http.py | 20 +- tests/fakestore.py | 59 +++- tests/test_no_oss_to_enterprise_imports.py | 28 ++ tests/unit/test_answer.py | 124 +++++++ tests/unit/test_imports.py | 9 +- tests/unit/test_recall_filtered.py | 357 +++++++++++++++++++++ 32 files changed, 1982 insertions(+), 61 deletions(-) create mode 100644 docs/gated-workstreams.md create mode 100644 docs/license-boundary.md create mode 100644 docs/mcp.md create mode 100644 docs/outside-counsel-checklist.md create mode 100644 examples/typescript/.gitignore create mode 100644 examples/typescript/package.json create mode 100644 examples/typescript/tsconfig.json create mode 100644 src/recall/mcp_stdio.py create mode 100644 src/recall/tools/answer.py create mode 100644 src/recall/tools/backfill.py create mode 100644 src/recall/tools/recall_filtered.py create mode 100644 tests/test_no_oss_to_enterprise_imports.py create mode 100644 tests/unit/test_answer.py create mode 100644 tests/unit/test_recall_filtered.py diff --git a/.gitignore b/.gitignore index 8cdee68..989d74b 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,7 @@ __pycache__/ # Local secrets — NEVER commit .tokens.local + +# counsel pre-brief packets (internal-only) +_counsel-packet/ + diff --git a/README.md b/README.md index a2d4601..cd3017e 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ - +
-# Recall +# Recall™ **Open-source memory for AI agents. MCP-native. Self-hosted. One Docker image.** diff --git a/docker/single-tenant/Dockerfile b/docker/single-tenant/Dockerfile index 7c33eda..3a933e5 100644 --- a/docker/single-tenant/Dockerfile +++ b/docker/single-tenant/Dockerfile @@ -1,4 +1,4 @@ -# @wbx-modified copilot-c4a1·MTN | 2026-04-23 | single-tenant Dockerfile | prev: NEW +# @wbx-modified copilot-b1c4 | 2026-04-27 23:36 MTN | v0.3.3 | install [llm,mcp] so SSE + recall-mcp work | prev: copilot-a3f7@2026-04-26 # syntax=docker/dockerfile:1.6 # # Recall — single-tenant agent-memory MCP server. @@ -6,7 +6,10 @@ # Run: docker run -p 8787:8787 -e API_KEY=secret -v recall-data:/data recall:latest # Health: curl http://localhost:8787/health -FROM python:3.14-slim AS base +# Pinned to 3.12-slim: widest wheel coverage (chromadb, onnxruntime, etc.). +# pyproject requires-python >=3.11, so 3.12 is in-band. Avoid 3.14 until upstream +# wheel matrix catches up (was the cause of intermittent GHA build failures). +FROM python:3.12-slim AS base # Install git for optional GIT_REPO_URL sync. Stay minimal otherwise. RUN apt-get update \ @@ -19,10 +22,11 @@ RUN useradd --create-home --uid 10001 --shell /bin/bash recall WORKDIR /app # ---- Dependency layer (cached when src/ changes but pyproject doesn't) ---- +# Pin pip and use --retries to ride through transient PyPI/index hiccups. COPY pyproject.toml README.md ./ COPY src ./src -RUN pip install --no-cache-dir --upgrade pip \ - && pip install --no-cache-dir . +RUN pip install --no-cache-dir --upgrade "pip==24.2" \ + && pip install --no-cache-dir --retries 5 --timeout 60 ".[llm,mcp]" # ---- Runtime layout ---- # Ephemeral local store (ChromaDB SQLite — must NOT be on a network share). diff --git a/docs/gated-workstreams.md b/docs/gated-workstreams.md new file mode 100644 index 0000000..f257813 --- /dev/null +++ b/docs/gated-workstreams.md @@ -0,0 +1,34 @@ + +# Gated Workstreams — Next Actions + +Both items below are referenced in [`/memories/05-audit-trail.md`](file:///c%3A/Users/StevePaltridge/AppData/Roaming/Code/User/globalStorage/github.copilot-chat/memory/05-audit-trail.md) +as "Steve-gated". Agent has done **zero work** on either; this file just +captures the scope so picking one up later is fast. + +## SettingsPilot batch-9 + +- **Where**: `C:\Dev\EncompassSettingsPlugin\schema-extract\generate-batch9.ps1` (to be created); recipes land in `C:\Dev\EncompassSettingsPlugin\_pending-batch9\`. +- **Predecessor**: batch-8 (per memory `S-20260422-batch8`) shipped 50 read-only recipes targeting uncovered `ConfigManager`/`OrgManager`/`BpmManager`/`ServerManager` methods using probe v3.0 reflection dumps. +- **Inputs ready**: `_probe-iconfig-methods.json`, `_probe-ibpm-methods.json`, `_probe-iorg-methods.json`, `_probe-iserver-methods.json` (1020 manager method signatures total). +- **Verb-allowlist guard**: must filter to read-only verbs (`Get|List|Read|Find|Query|Count|Has|Is|Can|Calculate`). Batch-8 blocked 436 mutating-verb method calls. +- **Coverage target**: get from current 226/323 deep-probe keys (70%) to 90% — needs ~65 new probe-eligible methods after dedup vs batch-8. +- **Validation path**: ICE Encompass FormBuilder → Tools → Package Import Wizard → install .empkg → form reopen → export results JSON → diff vs prior batch. +- **Blocker**: batch-9 needs PluginAudit ClientContext blocker resolved first per memory `S-20260422-pluginaudit` ("Server.dll statics") — otherwise probe v3.0 is the ceiling. +- **Owner gate**: Steve confirms (a) priority vs other lender outreach, (b) acceptable to ship recipe-only update without resolving ClientContext blocker (i.e. ship at 80% coverage). + +## IceWhisperer Edge variant + +- **Where**: New `IceWhisperer/extension/edge/` sibling to existing `IceWhisperer/extension/` (Chrome MV3). +- **Status**: Existing extension is Chrome MV3 with Firefox compat shipped this session (per prior batch B+C+D+E+F summary in conversation history — `extension/firefox/` exists). +- **Edge specifics**: + 1. Edge is Chromium-based — most MV3 manifest is identical. + 2. Differs in: extension store submission (Microsoft Partner Center, not CWS), `update_url` field in manifest, optional `browser_specific_settings` for legacy compat. + 3. No code changes likely needed in `background.js`/`content.js`/`popup.*`. Just a separate manifest + store listing. +- **Deliverable**: `extension/edge/manifest.json` + `extension/edge/README.md` describing Microsoft Partner Center submission flow + screenshots. +- **Owner gate**: Steve confirms whether Edge is worth the dual-listing maintenance burden vs telling Edge users "install from Chrome Web Store" (Edge supports CWS extensions natively since 2020). + +## Items NOT in this list (deliberately) + +- **Channel-brain endpoint deploy** — Rule 4 forbids deploy without explicit per-deploy approval. Steve must say "deploy" before any `azd up`/`func publish`/`docker push`. +- **Real Chrome load test** — needs interactive browser; agent cannot drive. +- **Outside-counsel call** — see [`outside-counsel-checklist.md`](./outside-counsel-checklist.md). Steve schedules. diff --git a/docs/license-boundary.md b/docs/license-boundary.md new file mode 100644 index 0000000..51b8389 --- /dev/null +++ b/docs/license-boundary.md @@ -0,0 +1,68 @@ + +# Recall License Boundary + +> One-pager for contributors and customers. The legal text is in +> [`LICENSE`](./LICENSE) (MIT) and [`LICENSE-COMMERCIAL.md`](./LICENSE-COMMERCIAL.md) (BSL 1.1). + +## TL;DR + +| Tree | License | What it covers | Production use | +|------|---------|----------------|----------------| +| `src/recall/` | MIT | Single-tenant memory engine, tools, transports, CLI, store interfaces, the OSS Docker image | Free, forever, any scale | +| `clients/` | MIT | Python + TypeScript SDKs (`recall-client`, `@recallworks/recall-client`) | Free, forever | +| `docker/single-tenant/` | MIT | The image at `ghcr.io/recallworks/recall:0.1.0` | Free, forever | +| `docs/`, `examples/`, `tests/` | MIT | Quickstarts, conventions, conformance tests | Free, forever | +| `enterprise/` | BSL 1.1 | Multi-tenant isolation, SSO connectors, hash-chain audit log, managed-cloud control plane | Free for non-prod and ≤5-seat single-org prod; commercial license for larger; converts to MIT 3 years after each tagged release | + +## Why two licenses + +The OSS core stays MIT so anyone can run it on their own boxes for any +purpose, commercially or otherwise, without a conversation with us. That's +the deal: own your memory, on your hardware, forever. + +The `enterprise/` tree adds the things a hosted competitor would need to +spin up a paid clone overnight: per-tenant isolation, SSO, audit-grade +exports, control plane. We license that under BSL 1.1 — free for small +production and any non-production use, paid only when you're at scale, and +auto-converted to MIT three years after each release. + +## Hard rule: dependency direction + +> **`src/recall/*` MUST NOT import from `enterprise/*`.** +> The reverse is fine. + +The CI test [`tests/test_no_oss_to_enterprise_imports.py`](./tests/test_no_oss_to_enterprise_imports.py) +walks every `.py` under `src/recall/` and fails the build if it finds a +`from enterprise.*` or `import enterprise.*` line. + +If MIT code took a runtime dependency on BSL code, the boundary would be +unilateral: an OSS user could no longer run the OSS core without pulling in +BSL terms. Refuse the dep instead — design the OSS core's interface to be +satisfied by either side. + +## How to build a feature + +Ask: "would a hosted-Recall competitor copy this on day one?" + +* **No** → MIT, lands in `src/recall/`. Examples: a new tool, a new + embedder backend, a CLI flag, an SDK helper. +* **Yes** → BSL, lands in `enterprise/`. Examples: a new SSO connector, + multi-tenant ABAC, a managed-cloud quota service, an audit exporter. + +When you're not sure, default to MIT and move it later. It's easier to +relicense towards proprietary than away from it. + +## Contributing under each license + +Both trees accept external contributions. The CLA + DCO requirements in +[`CONTRIBUTING.md`](./CONTRIBUTING.md) apply uniformly. By contributing to +`enterprise/` you are accepting that your contribution lands under BSL 1.1 +and converts to MIT on the same 3-year clock as the rest of the tree. + +## Buying a commercial license + +If your production deployment exceeds the BSL Additional Use Grant +(more than 5 seats per organization), reach out via the contact in +[`README.md`](./README.md). Commercial licenses are flat-rate per +deployment and include the SOC 2 evidence pack pulled from +`HashChainAuditLog`. diff --git a/docs/mcp.md b/docs/mcp.md new file mode 100644 index 0000000..426476d --- /dev/null +++ b/docs/mcp.md @@ -0,0 +1,160 @@ +# @wbx-modified copilot-b1c4 | 2026-04-27 22:25 MTN | v1.0 | MCP integration guide for Claude Desktop / Cursor / Cline | prev: NEW +# Recall as an MCP server + +Recall ships with first-class support for the [Model Context Protocol](https://modelcontextprotocol.io/), +so you can wire it into Claude Desktop, Cursor, Cline, Continue.dev, or any +other MCP client as a memory tool. Your AI assistant gets persistent memory +across sessions — locally, on your machine, no SaaS in the loop. + +## Two transports, same 16 tools + +Recall exposes the same tool surface over two MCP transports: + +| Transport | Use when | Entry point | +|-----------|---------------------------------------------------------|-------------| +| **stdio** | Running locally, used by Claude Desktop / Cursor / Cline | `recall-mcp` (or `python -m recall.mcp_stdio`) | +| **SSE** | Running as a network service, multiple agents share it | `recall-server` exposes `/sse` automatically when `mcp` extras installed | + +Both serve all 16 tools: `recall`, `recall_filtered`, `answer`, `remember`, +`reindex`, `index_file`, `memory_stats`, `forget`, `reflect`, `anti_pattern`, +`session_close`, `checkpoint`, `pulse`, `maintenance`, `snapshot_index`, +`backfill_epoch`. + +## Install + +```bash +pip install "recall[mcp]" +``` + +That installs the server, all 16 tools, and the MCP runtime (`mcp>=1.27.0`). + +## Claude Desktop + +Edit `~/Library/Application Support/Claude/claude_desktop_config.json` +(macOS) or `%APPDATA%\Claude\claude_desktop_config.json` (Windows): + +```json +{ + "mcpServers": { + "recall": { + "command": "recall-mcp", + "env": { + "RECALL_STORE_DIR": "/Users/you/.recall/store", + "RECALL_LOG_LEVEL": "WARNING" + } + } + } +} +``` + +Restart Claude Desktop. You'll see a new wrench icon — click it and you'll +see all 16 Recall tools available to the conversation. + +## Cursor + +Cursor reads MCP servers from `~/.cursor/mcp.json`: + +```json +{ + "mcpServers": { + "recall": { + "command": "recall-mcp", + "env": { + "RECALL_STORE_DIR": "/Users/you/.recall/store" + } + } + } +} +``` + +After Cursor restart, the tools show up in Composer. + +## Cline (VS Code extension) + +In VS Code: open Cline settings → "MCP Servers" → click "+": + +```json +{ + "recall": { + "command": "recall-mcp", + "args": [], + "env": { + "RECALL_STORE_DIR": "/Users/you/.recall/store" + } + } +} +``` + +## Continue.dev + +In `~/.continue/config.json`: + +```json +{ + "mcpServers": [ + { + "name": "recall", + "command": "recall-mcp", + "env": { + "RECALL_STORE_DIR": "/Users/you/.recall/store" + } + } + ] +} +``` + +## Verify it's working + +After your client restarts, ask: **"What Recall tools do you have access to?"** + +A working setup will list all 16 tools. Then ask: + +> *"Remember that I prefer concise answers."* + +The model should call `remember`. In the next session, ask: + +> *"How do I like my answers?"* + +The model should call `recall` and answer "concise." + +## Bring your own embedding model + +By default Recall uses a small offline embedding model bundled with chroma — +this runs on a laptop with no API keys. + +To use a stronger model, add to your client's `env`: + +```json +"env": { + "RECALL_EMBEDDER": "openai", + "OPENAI_API_KEY": "sk-...", + "RECALL_STORE_DIR": "/Users/you/.recall/store" +} +``` + +## Privacy posture + +- **Stdio mode is fully local.** The MCP client launches `recall-mcp` as a + subprocess on your machine. No network calls unless you opted into a + cloud embedder. +- **No telemetry. Ever.** Recall doesn't phone home about tool calls, + tool counts, or anything else. Read [`auth.py`](../src/recall/auth.py) + and [`app.py`](../src/recall/app.py) — the only network code is the + optional git_sync if you set `RECALL_GIT_REPO_URL`. +- **Store path is yours.** Set `RECALL_STORE_DIR` to anywhere you control. + Default is `~/.recall/store/`. + +## Troubleshooting + +| Symptom | Likely cause | Fix | +|---------|--------------|-----| +| Client shows 0 tools | `pip install "recall[mcp]"` not run | Install with the `mcp` extra | +| `recall-mcp: command not found` | Not in client's PATH | Use absolute path: `"command": "/Users/you/.venv/bin/recall-mcp"` | +| Tools list but calls fail | Store init failed at startup | Check stderr — likely a permissions issue on `RECALL_STORE_DIR` | +| Stdout corruption / parse errors | Something logged to stdout | Recall logs to stderr only — if you wrote a custom plugin, fix it | + +## Source + +- [`src/recall/mcp_stdio.py`](../src/recall/mcp_stdio.py) — stdio entry +- [`src/recall/transport/mcp_sse.py`](../src/recall/transport/mcp_sse.py) — FastMCP wiring +- [`src/recall/tools/__init__.py`](../src/recall/tools/__init__.py) — tool registry diff --git a/docs/outside-counsel-checklist.md b/docs/outside-counsel-checklist.md new file mode 100644 index 0000000..31e55d3 --- /dev/null +++ b/docs/outside-counsel-checklist.md @@ -0,0 +1,80 @@ + +# Outside-Counsel Review — Pre-Brief Checklist + +> Internal-only. Not committed in any release zip. Used to brief outside +> counsel before they look at Recall's license stack and IceWhisperer's +> commercial posture. + +## What we want counsel to confirm + +1. **MIT/BSL boundary holds.** Confirm `src/recall/*` (MIT) takes no runtime + dep on `enterprise/*` (BSL 1.1), and that the CI test + [`tests/test_no_oss_to_enterprise_imports.py`](../tests/test_no_oss_to_enterprise_imports.py) + is the right enforcement mechanism. +2. **BSL 1.1 Additional Use Grant** as drafted in + [`LICENSE-COMMERCIAL.md`](../LICENSE-COMMERCIAL.md) is enforceable and + matches the intent: free for non-prod and ≤5-seat single-org prod, paid + above that, auto-converts to MIT 3 years after each tagged release. +3. **License-boundary one-pager** at [`docs/license-boundary.md`](./license-boundary.md) + accurately summarises the legal stack for a commercial buyer. +4. **IceWhisperer EULA** at `IceWhisperer/bundle/EULA.md` is enforceable in + the United States (Steve's first 5 ICPs are US lenders). Confirm the + binding-arbitration clause survives state-law variance. +5. **ICE non-affiliation disclosure** on the IceWhisperer landing + pricing + pages is sufficient to defeat any trademark-confusion claim by ICE + Mortgage Technology. +6. **Corpus posture (v1.0.2)** described in + `/memories/icewhisperer-corpus-legal.md` v1.2: SDK XML + Developer + Connect + Resource Center articles authored by ICE, scraped under + Steve's senior-ICE standing, redistributed inside customer perimeter + only. Confirm this is defensible. + +## Hard rule for counsel + +> **Counsel may quote any text in this file or in `docs/license-boundary.md` +> directly back at us.** Do not assume any of this is privileged advice +> until they say so in writing. + +## Pre-brief packet (assemble before the call) + +- [ ] [`LICENSE`](../LICENSE) (MIT) +- [ ] [`LICENSE-COMMERCIAL.md`](../LICENSE-COMMERCIAL.md) (BSL 1.1 + grant) +- [ ] [`docs/license-boundary.md`](./license-boundary.md) +- [ ] [`tests/test_no_oss_to_enterprise_imports.py`](../tests/test_no_oss_to_enterprise_imports.py) +- [ ] `IceWhisperer/bundle/EULA.md` +- [ ] `IceWhisperer/_strategy/non-affiliation.md` (if it exists; otherwise + the disclosure block from `pricing.html`) +- [ ] `/memories/icewhisperer-corpus-legal.md` v1.2 — printed/exported +- [ ] One-page founder summary (Steve to write 5 lines) + +## Logistics + +- Engage at least 2 weeks before first paid Team customer onboards + (current pilot is 60-day pre-paid, no Team tier active). +- Counsel must specialise in **OSS licensing AND fintech vendor contracts**. + Not just one of the two. Likely candidate firms: Heather Meeker + (license + OSS), Outside GC (fintech vendor contracts). +- Budget envelope: typically $5-15k for a 4-hour review of this scope. + +## What we are NOT asking counsel to do + +- Re-draft any of these documents. We will iterate based on red-line. +- Review individual customer contracts. Each Team contract uses a stock + MSA + Order Form template that needs its own review pass. +- Opine on patent posture. Recall + IceWhisperer file no patents. + +## Open questions for counsel + +1. Does the BSL Additional Use Grant need a per-deployment seat-counter + audit clause to be enforceable? Or does honor-system + telemetry suffice? +2. Is "powered by Recall" co-branding sufficient to satisfy MIT attribution + in the IceWhisperer bundle? The bundle re-distributes the Recall MIT + image — the MIT attribution lives in `bundle/THIRD-PARTY-NOTICES.md`. +3. Is the Resource Center scrape (ICE-authored articles, partner login + under Steve's name) safer characterised as "fair use" or "agency" + under our reseller agreement? We've documented as agency in + `/memories/icewhisperer-corpus-legal.md` v1.2 — confirm. + +## Owner + +Steve Paltridge. Schedule via Calendly; no agent action. diff --git a/examples/README.md b/examples/README.md index 0896c5d..42ff2b6 100644 --- a/examples/README.md +++ b/examples/README.md @@ -22,6 +22,16 @@ python examples/python/agent_memory.py ## Run a TypeScript example +Zero-config (uses local `package.json`): + +```bash +cd examples/typescript +npm install +npm start +``` + +Or one-liner without installing: + ```bash npm install @recallworks/recall-client npx tsx examples/typescript/agent_memory.ts diff --git a/examples/typescript/.gitignore b/examples/typescript/.gitignore new file mode 100644 index 0000000..2e6fae9 --- /dev/null +++ b/examples/typescript/.gitignore @@ -0,0 +1,3 @@ +node_modules/ +package-lock.json +*.log diff --git a/examples/typescript/package.json b/examples/typescript/package.json new file mode 100644 index 0000000..1932175 --- /dev/null +++ b/examples/typescript/package.json @@ -0,0 +1,22 @@ +{ + "name": "recall-typescript-example", + "version": "0.1.0", + "private": true, + "description": "Minimal Recall agent-memory example (TypeScript). Run a Recall server first, then `npm install && npm start`.", + "type": "module", + "scripts": { + "start": "tsx agent_memory.ts", + "build": "tsc --noEmit" + }, + "dependencies": { + "@recallworks/recall-client": "^0.2.0" + }, + "devDependencies": { + "tsx": "^4.19.0", + "typescript": "^5.6.0" + }, + "engines": { + "node": ">=18" + }, + "license": "MIT" +} diff --git a/examples/typescript/tsconfig.json b/examples/typescript/tsconfig.json new file mode 100644 index 0000000..21f571c --- /dev/null +++ b/examples/typescript/tsconfig.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "moduleResolution": "Bundler", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "resolveJsonModule": true, + "forceConsistentCasingInFileNames": true, + "noEmit": true + }, + "include": ["*.ts"] +} diff --git a/pyproject.toml b/pyproject.toml index 68e2672..c5d9a8d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,11 @@ -# @wbx-modified copilot-c4a1·MTN | 2026-04-23 | Recall Wk1 Day 1 — pyproject for src/ layout | prev: NEW +# @wbx-modified copilot-b1c4 | 2026-04-27 23:35 MTN | v0.3.3 | bump for MCP stdio + SSE wiring + Voyage embedder | prev: copilot-a3f7@2026-04-26 [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [project] name = "recall" -version = "0.1.0" +version = "0.3.3" description = "Open-source agent-memory MCP server. recall.works" readme = "README.md" requires-python = ">=3.11" @@ -28,11 +28,13 @@ dependencies = [ [project.optional-dependencies] mcp = ["mcp>=1.27.0"] +llm = ["openai>=1.40"] test = ["pytest>=9.0.3", "pytest-asyncio>=1.3.0", "httpx>=0.28.1"] -dev = ["recall[mcp,test]", "ruff>=0.5", "mypy>=1.20.2"] +dev = ["recall[mcp,llm,test]", "ruff>=0.5", "mypy>=1.20.2"] [project.scripts] recall-server = "recall.app:main" +recall-mcp = "recall.mcp_stdio:main" [project.urls] Homepage = "https://recall.works" @@ -43,6 +45,7 @@ packages = ["src/recall"] [tool.pytest.ini_options] testpaths = ["tests"] +pythonpath = ["src", "."] addopts = "-q" asyncio_mode = "auto" diff --git a/src/recall/__init__.py b/src/recall/__init__.py index 77939b3..7b3718d 100644 --- a/src/recall/__init__.py +++ b/src/recall/__init__.py @@ -6,4 +6,4 @@ from recall.config import Config """ -__version__ = "0.1.0" +__version__ = "0.3.2" diff --git a/src/recall/app.py b/src/recall/app.py index c062fe2..9126c74 100644 --- a/src/recall/app.py +++ b/src/recall/app.py @@ -1,12 +1,14 @@ -# @wbx-modified copilot-c4a1·MTN | 2026-04-23 | app entry — composes config + store + transports | prev: NEW +# @wbx-modified copilot-b1c4 | 2026-04-27 23:30 MTN | v0.3.3 | mount MCP /sse + /mcp routes | prev: copilot-c4a1@2026-04-23 """Application entry point. Builds a Starlette app with: - ApiKeyAuthMiddleware - GET /health - POST /tool/{name} - - MCP/SSE routes (mounted from FastMCP) + - GET /sse (MCP Server-Sent Events transport, optional) + - POST /mcp (MCP Streamable HTTP transport, optional) +The MCP routes are only mounted if the `mcp` extras are installed. Background thread initializes ChromaDB + optional git sync + initial index. """ @@ -29,6 +31,7 @@ from .tools import checkpoint as _checkpoint_mod from .tools import maintenance as _maintenance_mod from .tools import recall as _recall_mod +from .tools import recall_filtered as _recall_filtered_mod from .tools import reflect as _reflect_mod from .tools import reindex as _reindex_mod from .tools import remember as _remember_mod @@ -42,6 +45,7 @@ def _propagate_config(cfg: Config) -> None: """Push the active Config into every tool module's lazy default.""" for mod in ( _recall_mod, + _recall_filtered_mod, _remember_mod, _reindex_mod, _stats_mod, @@ -109,6 +113,33 @@ def build_app(cfg: Config | None = None, *, start_background: bool = True) -> St Route("/health", health_handler, methods=["GET"]), Route("/tool/{name}", tool_handler, methods=["POST"]), ] + + # Optionally mount FastMCP's SSE + Streamable-HTTP transports so MCP + # clients (Claude Desktop SSE mode, custom agents) can talk to the same + # 16 tools that HTTP /tool/{name} exposes. Lazy import: a Recall install + # without the `mcp` extra simply omits these routes. + # + # FastMCP's sse_app() already exposes /sse + /messages/ at its root, and + # streamable_http_app() exposes /mcp. We therefore mount at "/" and pick + # the SSE app's routes (its built-in dispatcher handles /messages/ correctly). + try: + from .transport.mcp_sse import build_mcp_server + + _mcp = build_mcp_server(name="recall") + _sse_app = _mcp.sse_app() + _http_app = _mcp.streamable_http_app() + # Hoist the SSE app's routes (/sse + /messages/) onto our app + for _r in _sse_app.routes: + routes.append(_r) + # Hoist the streamable-HTTP /mcp route + for _r in _http_app.routes: + routes.append(_r) + log.info("MCP transports mounted at /sse, /messages/, and /mcp") + except ImportError: + log.info("mcp package not installed; SSE/Streamable-HTTP transports disabled") + except Exception: + log.exception("Failed to mount MCP transports (non-fatal)") + app = Starlette(routes=routes) app.state.config = cfg app.add_middleware(ApiKeyAuthMiddleware, api_keys=cfg.api_keys) diff --git a/src/recall/chunking.py b/src/recall/chunking.py index 134b00d..f99503c 100644 --- a/src/recall/chunking.py +++ b/src/recall/chunking.py @@ -4,6 +4,7 @@ from __future__ import annotations import hashlib +import time from datetime import datetime from .store import Store @@ -36,6 +37,7 @@ def index_file(store: Store, filepath: str, chunk_size: int, chunk_overlap: int) chunks = chunk_text(content, filepath, chunk_size, chunk_overlap) batch_size = 40 indexed_at = datetime.now().isoformat() + indexed_at_epoch = time.time() for i in range(0, len(chunks), batch_size): batch = chunks[i : i + batch_size] store.upsert( @@ -46,6 +48,7 @@ def index_file(store: Store, filepath: str, chunk_size: int, chunk_overlap: int) "source": c["source"], "chunk_index": c["chunk_index"], "indexed_at": indexed_at, + "indexed_at_epoch": indexed_at_epoch, } for c in batch ], diff --git a/src/recall/embedder.py b/src/recall/embedder.py index ecd1f02..82ec8de 100644 --- a/src/recall/embedder.py +++ b/src/recall/embedder.py @@ -1,13 +1,14 @@ -# @wbx-modified copilot-a3f7·MTN | 2026-04-23 | Wk2: BYO-embedder seam (default/openai/ollama) | prev: NEW +# @wbx-modified copilot-b1c4 | 2026-04-27 23:25 MTN | v1.1 | added Voyage adapter (claim parity w/ site) | prev: copilot-a3f7@2026-04-23 """Pluggable embedding backends. Lenders, hospitals, and any team handling regulated data refuse to ship their content to a public-cloud embedding API. This module defines a tiny `Embedder` -Protocol plus three reference implementations: +Protocol plus four reference implementations: * ``default`` — uses ChromaDB's bundled all-MiniLM-L6-v2 (fully offline). - * ``openai`` — OpenAI / Azure-OpenAI compatible endpoint. - * ``ollama`` — local Ollama server (offline; recommended for on-prem). + * ``openai`` — OpenAI / Azure-OpenAI / OpenAI-compatible endpoint. + * ``ollama`` — local Ollama server (offline; recommended for on-prem). + * ``voyage`` — Voyage AI (high-quality commercial embedder). Operators select an implementation via env vars (see ``Config``). The store constructor accepts the resulting Embedder; tools never touch this module. @@ -104,6 +105,41 @@ def embed(self, texts: list[str]) -> list[list[float]]: return out +class VoyageEmbedder: + """Voyage AI embedder (https://docs.voyageai.com/). + + Higher retrieval quality than the OpenAI small models on most benchmarks. + Requires VOYAGE_API_KEY (or RECALL_EMBED_API_KEY). + """ + + name = "voyage" + + def __init__(self, model: str, api_key: str) -> None: + try: + import httpx # type: ignore[import-not-found] + except ImportError as e: + raise RuntimeError("RECALL_EMBEDDER=voyage requires `pip install httpx`") from e + if not api_key: + raise RuntimeError("RECALL_EMBEDDER=voyage requires VOYAGE_API_KEY or RECALL_EMBED_API_KEY") + self._httpx = httpx + self._model = model + self._api_key = api_key + log.info("VoyageEmbedder ready. model=%s", model) + + def embed(self, texts: list[str]) -> list[list[float]]: + # Voyage API: POST https://api.voyageai.com/v1/embeddings + # Body: {"input": [...], "model": "..."} + # Response: {"data": [{"embedding": [...]}, ...]} + with self._httpx.Client(timeout=60.0) as client: + r = client.post( + "https://api.voyageai.com/v1/embeddings", + json={"input": texts, "model": self._model}, + headers={"Authorization": f"Bearer {self._api_key}"}, + ) + r.raise_for_status() + return [d["embedding"] for d in r.json()["data"]] + + def make_embedder_from_env() -> Embedder: """Build an Embedder per env vars. Falls back to DefaultChromaEmbedder.""" kind = os.environ.get("RECALL_EMBEDDER", "default").lower() @@ -120,4 +156,11 @@ def make_embedder_from_env() -> Embedder: model=os.environ.get("RECALL_EMBED_MODEL", "nomic-embed-text"), endpoint=os.environ.get("RECALL_EMBED_ENDPOINT", "http://localhost:11434"), ) - raise RuntimeError(f"Unknown RECALL_EMBEDDER='{kind}'. Use one of: default, openai, ollama.") + if kind == "voyage": + return VoyageEmbedder( + model=os.environ.get("RECALL_EMBED_MODEL", "voyage-3"), + api_key=os.environ.get("RECALL_EMBED_API_KEY") or os.environ.get("VOYAGE_API_KEY", ""), + ) + raise RuntimeError( + f"Unknown RECALL_EMBEDDER='{kind}'. Use one of: default, openai, ollama, voyage." + ) diff --git a/src/recall/mcp_stdio.py b/src/recall/mcp_stdio.py new file mode 100644 index 0000000..a5356a7 --- /dev/null +++ b/src/recall/mcp_stdio.py @@ -0,0 +1,119 @@ +# @wbx-modified copilot-b1c4 | 2026-04-27 22:20 MTN | v1.0 | stdio MCP entry for Claude Desktop / Cursor / Cline | prev: NEW +"""Stdio MCP server entry point. + +This is what Claude Desktop, Cursor, Cline, Continue.dev, and other MCP +clients consume via subprocess + JSON-RPC over stdin/stdout. Run via: + + python -m recall.mcp_stdio + +or via the console script: + + recall-mcp + +It composes the same Config, embedder, summarizer, and store as the HTTP +server, then hands stdio to FastMCP. + +Environment variables (all optional — sensible defaults for local use): + RECALL_STORE_DIR where to put SQLite + chroma (default: ~/.recall/store) + RECALL_COLLECTION chroma collection name (default: recall) + RECALL_EMBEDDER 'default' (offline) | 'openai' | 'voyage' + OPENAI_API_KEY required if RECALL_EMBEDDER=openai + +Note: stdio MCP servers MUST NOT print to stdout — that channel is the +MCP transport. All logs go to stderr. +""" + +from __future__ import annotations + +import logging +import os +import sys + + +def main() -> None: + """Initialize stores, then run FastMCP over stdio.""" + # Logs to stderr ONLY (stdout is the MCP transport) + logging.basicConfig( + level=os.environ.get("RECALL_LOG_LEVEL", "WARNING").upper(), + format="%(asctime)s [%(levelname)s recall.mcp] %(message)s", + stream=sys.stderr, + ) + log = logging.getLogger("recall.mcp_stdio") + + from .config import Config + from .embedder import make_embedder_from_env + from .store import init_store + from .summarizer import init_summarizer, make_summarizer_from_env + from .tools import ( + checkpoint as _checkpoint_mod, + ) + from .tools import ( + maintenance as _maintenance_mod, + ) + from .tools import ( + recall as _recall_mod, + ) + from .tools import ( + recall_filtered as _recall_filtered_mod, + ) + from .tools import ( + reflect as _reflect_mod, + ) + from .tools import ( + reindex as _reindex_mod, + ) + from .tools import ( + remember as _remember_mod, + ) + from .tools import ( + stats as _stats_mod, + ) + from .transport.mcp_sse import build_mcp_server + + cfg = Config.from_env() + + # Propagate config into tool modules (same as HTTP path) + for mod in ( + _recall_mod, + _recall_filtered_mod, + _remember_mod, + _reindex_mod, + _stats_mod, + _reflect_mod, + _checkpoint_mod, + _maintenance_mod, + ): + mod.set_config(cfg) + + # Make sure dirs exist + for d in (cfg.store_dir, cfg.artifacts_dir, cfg.repo_dir): + os.makedirs(d, exist_ok=True) + + # Init embedder + summarizer + store synchronously (stdio clients want + # tool calls to work on the first request — no background threading). + try: + embedder = make_embedder_from_env() + except Exception: + log.exception("Embedder init failed; using bundled default") + from .embedder import DefaultChromaEmbedder + + embedder = DefaultChromaEmbedder() + + try: + init_summarizer(make_summarizer_from_env()) + except Exception: + from .summarizer import NoopSummarizer + from .summarizer import init_summarizer as _is + + _is(NoopSummarizer()) + + init_store(cfg.store_dir, cfg.collection_name, embedder=embedder) + log.info("Recall MCP stdio ready. Store: %s", cfg.store_dir) + + # Hand off to FastMCP — it owns stdin/stdout from here. + mcp = build_mcp_server(name="recall") + mcp.run(transport="stdio") + + +if __name__ == "__main__": + main() diff --git a/src/recall/store.py b/src/recall/store.py index 30c6d0d..cba2e7e 100644 --- a/src/recall/store.py +++ b/src/recall/store.py @@ -104,6 +104,24 @@ def get( def delete(self, ids: list[str]) -> None: self._collection.delete(ids=ids) + def get_page(self, limit: int, offset: int, include: list[str] | None = None) -> dict: + kwargs: dict[str, Any] = {"limit": limit, "offset": offset} + if include is not None: + kwargs["include"] = include + return self._collection.get(**kwargs) + + def update_metadatas(self, ids: list[str], metadatas: list[dict]) -> None: + self._collection.update(ids=ids, metadatas=metadatas) + + def get_all_ids(self) -> list[str]: + return self._collection.get(include=[])["ids"] + + def get_by_ids(self, ids: list[str], include: list[str] | None = None) -> dict: + kwargs: dict[str, Any] = {"ids": ids} + if include is not None: + kwargs["include"] = include + return self._collection.get(**kwargs) + # Module-level lazy singleton. app.py calls init_store(); tools call get_store(). _store: Store | None = None diff --git a/src/recall/tools/__init__.py b/src/recall/tools/__init__.py index fc71378..e2c346d 100644 --- a/src/recall/tools/__init__.py +++ b/src/recall/tools/__init__.py @@ -1,4 +1,4 @@ -# @wbx-modified copilot-c4a1·MTN | 2026-04-23 | Recall Wk1 Day 1 — tool registry | prev: NEW +# @wbx-modified copilot-b1c4 | 2026-04-27 19:30 MTN | v1.2 | added recall_filtered + backfill_epoch (port from server-azure v29.7) | prev: copilot-a3f7@2026-04-26 01:20 """Tool modules. Each module exposes a single public callable named after the tool, plus an optional `register(mcp)` helper used by mcp_sse transport. @@ -7,9 +7,12 @@ from __future__ import annotations +from . import answer as _answer +from . import backfill as _backfill from . import checkpoint as _checkpoint from . import maintenance as _maintenance from . import recall as _recall +from . import recall_filtered as _recall_filtered from . import reflect as _reflect from . import reindex as _reindex from . import remember as _remember @@ -17,6 +20,8 @@ TOOL_REGISTRY = { "recall": _recall.recall, + "recall_filtered": _recall_filtered.recall_filtered, + "answer": _answer.answer, "remember": _remember.remember, "reindex": _reindex.reindex, "index_file": _reindex.index_file, @@ -29,6 +34,7 @@ "pulse": _checkpoint.pulse, "maintenance": _maintenance.maintenance, "snapshot_index": _maintenance.snapshot_index, + "backfill_epoch": _backfill.backfill_epoch, } @@ -43,4 +49,5 @@ "checkpoint", "maintenance", "snapshot_index", + "backfill_epoch", } diff --git a/src/recall/tools/answer.py b/src/recall/tools/answer.py new file mode 100644 index 0000000..56ff472 --- /dev/null +++ b/src/recall/tools/answer.py @@ -0,0 +1,262 @@ +# @wbx-modified copilot-a3f7·MTN | 2026-04-26 01:15 MTN | v0.1 | synthesis tool — recall + LLM + cited answer | prev: original +"""answer — synthesize a cited answer from indexed memory. + +Pipeline: + 1. Run a semantic recall against the store (top-N chunks, default 8). + 2. Build a grounded prompt: question + numbered chunks + system rules. + 3. Call the configured LLM provider (default: Azure OpenAI gpt-4o-mini). + 4. Return JSON: {"answer": str, "sources": [{n, source, snippet}]}. + +Provider is a Protocol so the substrate stays vendor-neutral. Tests use +FakeLLM. Production wires AzureOpenAIProvider via env (AZURE_OPENAI_*). +The provider is selected lazily on first call so unit tests that never +invoke `answer` don't need the openai SDK installed. +""" +from __future__ import annotations + +import json +import logging +import os +import textwrap +from dataclasses import dataclass +from typing import Protocol + +from ..config import Config +from ..store import get_store + +log = logging.getLogger("recall.answer") + + +# ---- LLM provider interface ------------------------------------------ +class LLMProvider(Protocol): + """Sync chat completion. Returns the assistant's text reply.""" + + def complete(self, *, system: str, user: str, max_tokens: int = 800) -> str: ... + + +@dataclass +class AzureOpenAIProvider: + """Azure OpenAI chat-completion provider (gpt-4o-mini default deployment). + + Env required at construction time: + AZURE_OPENAI_ENDPOINT — e.g. https://my-aoai.openai.azure.com + AZURE_OPENAI_API_KEY — key, OR omit and DefaultAzureCredential is used + AZURE_OPENAI_DEPLOYMENT — deployment name (default: gpt-4o-mini) + AZURE_OPENAI_API_VERSION — default: 2024-08-01-preview + """ + + endpoint: str + deployment: str + api_version: str + api_key: str | None # None → use DefaultAzureCredential + + def complete(self, *, system: str, user: str, max_tokens: int = 800) -> str: + # Lazy import — keeps the SDK out of the unit-test path. + from openai import AzureOpenAI + + if self.api_key: + client = AzureOpenAI( + api_key=self.api_key, + api_version=self.api_version, + azure_endpoint=self.endpoint, + ) + else: + from azure.identity import DefaultAzureCredential, get_bearer_token_provider + + token_provider = get_bearer_token_provider( + DefaultAzureCredential(), + "https://cognitiveservices.azure.com/.default", + ) + client = AzureOpenAI( + azure_ad_token_provider=token_provider, + api_version=self.api_version, + azure_endpoint=self.endpoint, + ) + resp = client.chat.completions.create( + model=self.deployment, + messages=[ + {"role": "system", "content": system}, + {"role": "user", "content": user}, + ], + max_tokens=max_tokens, + temperature=0.2, # grounded > creative + ) + return resp.choices[0].message.content or "" + + +def _provider_from_env() -> LLMProvider: + endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT", "").strip() + if not endpoint: + raise RuntimeError( + "answer: AZURE_OPENAI_ENDPOINT not set. " + "Set AZURE_OPENAI_ENDPOINT + AZURE_OPENAI_DEPLOYMENT (and either " + "AZURE_OPENAI_API_KEY or rely on DefaultAzureCredential)." + ) + return AzureOpenAIProvider( + endpoint=endpoint, + deployment=os.environ.get("AZURE_OPENAI_DEPLOYMENT", "gpt-4o-mini"), + api_version=os.environ.get("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"), + api_key=os.environ.get("AZURE_OPENAI_API_KEY") or None, + ) + + +# ---- prompt construction --------------------------------------------- +_SYSTEM_PROMPT = textwrap.dedent( + """\ + You are a senior mortgage-operations colleague answering a question for a + teammate. You know Encompass administration, loan workflow, disclosures, + underwriting, secondary, and compliance cold. You speak like a person, not + a citation engine. + + Output format (RFC-lite — apply EVERY answer): + + **Answer.** One declarative sentence stating the bottom line. + + **Details.** One short paragraph (2-5 sentences) of prose. Confident, + conversational, no hedging filler. This is where you teach. + + **Steps.** (Include ONLY if the question is procedural — "how do I...", + "what's the process for..."). Numbered list. One imperative clause per + step. Do not pad. + + **Watch out.** (Include ONLY if there's a real gotcha — file lock, + compliance trap, version-specific bug, common mistake). One or two short + bullets, each starting with the hazard. + + Voice rules: + - Do NOT say "the Sources say" or "according to the provided + information." Speak as if you simply know this. + - Do NOT pepper your reply with inline [1][2][3] markers. The caller + appends a Sources footer from a structured list. + - No throat-clearing. No "great question." No "I hope this helps." + - Bold the four section labels exactly: **Answer.**, **Details.**, + **Steps.**, **Watch out.** Skip a label if the section doesn't apply. + + Honesty rules: + - If the supporting context below does not contain the answer, OR the + question is off-topic for mortgage / Encompass operations, return ONLY + this single line: "**Answer.** I don't have that in your brain." + Optionally add one **Details.** sentence pointing at what would need + to be added. Do NOT invent file numbers, version numbers, API names, + or regulatory cites. + - If the context disagrees with itself, surface the disagreement in + **Details.**; don't silently pick one side. + + The supporting context below is private to this tenant; treat it as the + authoritative knowledge you are recalling. + """ +).strip() + + +def _build_user_prompt(question: str, chunks: list[dict]) -> str: + parts = [f"Question: {question}", "", "Supporting context (from this tenant's brain):"] + for i, c in enumerate(chunks, start=1): + src = c.get("source", "unknown") + body = c.get("text", "").strip() + parts.append(f"--- chunk {i} (source: {src}) ---") + parts.append(body) + parts.append("") + parts.append( + "Now answer the colleague's question in your own voice, as prose, " + "without inline [n] citation markers. If the context above does not " + "support a confident answer, say \"I don't have that in your brain.\"" + ) + return "\n".join(parts) + + +# ---- tool entry point ------------------------------------------------- +_provider: LLMProvider | None = None +_cfg: Config | None = None + + +def set_provider(provider: LLMProvider | None) -> None: + """Tests / alternative providers inject here. None resets to env-default on next call.""" + global _provider + _provider = provider + + +def set_config(config: Config) -> None: + global _cfg + _cfg = config + + +def _default_config() -> Config: + global _cfg + if _cfg is None: + _cfg = Config() + return _cfg + + +def answer( + question: str, + n: int = 8, + type: str = "all", + max_tokens: int = 800, + config: Config | None = None, +) -> str: + """Synthesize a cited answer from indexed memory. + + Args: + question: Natural-language question. + n: Number of chunks to retrieve and ground on (default 8, capped 20). + type: Optional artifact-type filter (matches recall tool). + max_tokens: LLM completion cap (default 800). + config: Runtime config. If None, uses module default. + + Returns JSON string: {"answer", "sources", "model", "chunks_used"}. + On error returns a JSON string with an "error" key (HTTP transport + still returns 200 — the caller inspects the JSON). + """ + cfg = config or _default_config() + store = get_store() + if not question or not question.strip(): + return json.dumps({"error": "question is required"}) + n = min(max(int(n), 1), 20) + if store.count() == 0: + return json.dumps({"error": "memory is empty — run reindex first"}) + where = {"type": type} if type != "all" else None + n = min(n, store.count()) + raw = store.query(query_texts=[question], n_results=n, where=where) + docs = (raw.get("documents") or [[]])[0] + metas = (raw.get("metadatas") or [[]])[0] + if not docs: + return json.dumps({"error": "no matches found"}) + chunks = [ + { + "n": i + 1, + "source": (metas[i] or {}).get("source", "unknown"), + "text": docs[i], + } + for i in range(len(docs)) + ] + + global _provider + if _provider is None: + try: + _provider = _provider_from_env() + except Exception as e: + return json.dumps({"error": str(e)}) + + try: + reply = _provider.complete( + system=_SYSTEM_PROMPT, + user=_build_user_prompt(question, chunks), + max_tokens=max_tokens, + ) + except Exception as e: + log.exception("LLM provider failed") + return json.dumps({"error": f"llm provider failed: {e}"}) + + sources = [ + {"n": c["n"], "source": c["source"], "snippet": c["text"][:240]} + for c in chunks + ] + model_name = getattr(_provider, "deployment", _provider.__class__.__name__) + return json.dumps( + { + "answer": reply.strip(), + "sources": sources, + "model": model_name, + "chunks_used": len(chunks), + } + ) diff --git a/src/recall/tools/backfill.py b/src/recall/tools/backfill.py new file mode 100644 index 0000000..c4b0b4a --- /dev/null +++ b/src/recall/tools/backfill.py @@ -0,0 +1,74 @@ +# @wbx-modified copilot-b1c4 | 2026-04-27 19:30 MTN | v1.0 | one-shot migration: backfill indexed_at_epoch | prev: NEW +"""backfill_epoch — add indexed_at_epoch (numeric) to chunks that only have +indexed_at (ISO string). Idempotent. Paginate by id-order. + +Required because ChromaDB rejects $gte on string fields, so since= queries +in recall_filtered need numeric epoch on every chunk. +""" + +from __future__ import annotations + +import time +from datetime import datetime + +from ..store import get_store + + +def backfill_epoch(start: int = 0, batch_size: int = 2000) -> str: + """Process one page. Call repeatedly with start += batch_size until 'done'. + + Returns: 'progress: scanned=X fixed=Y skipped=Z next=N total=T' or + 'done: scanned=X fixed=Y skipped=Z failed=F total=T'. + + NOT a delete operation. Uses collection.update(ids, metadatas) to add + the new field; total chunk count is invariant across the entire run. + """ + store = get_store() + # Direct chroma helpers added in store.py: get_all_ids / get_by_ids / + # update_metadatas. Tools relying on these accept a ChromaStore-like + # object; tests can supply a fake exposing the same methods. + if not hasattr(store, "get_all_ids"): + return "error: store backend does not support backfill (no get_all_ids)" + all_ids = store.get_all_ids() + total = len(all_ids) + if start >= total: + return f"done: scanned={total} total={total}" + page_ids = all_ids[start : start + batch_size] + page = store.get_by_ids(page_ids, include=["metadatas"]) + update_ids: list[str] = [] + update_metas: list[dict] = [] + skipped = 0 + failed = 0 + for cid, meta in zip(page["ids"], page["metadatas"], strict=False): + if not meta: + skipped += 1 + continue + ep = meta.get("indexed_at_epoch") + if isinstance(ep, (int, float)) and ep > 0: + skipped += 1 + continue + iso = meta.get("indexed_at") + try: + if iso: + ts = datetime.fromisoformat(str(iso).replace("Z", "+00:00")).timestamp() + else: + ts = time.time() + new_meta = dict(meta) + new_meta["indexed_at_epoch"] = float(ts) + update_ids.append(cid) + update_metas.append(new_meta) + except Exception: + failed += 1 + continue + if update_ids: + store.update_metadatas(update_ids, update_metas) + next_start = start + batch_size + if next_start >= total: + return ( + f"done: scanned={start + len(page_ids)} fixed={len(update_ids)} " + f"skipped={skipped} failed={failed} total={total}" + ) + return ( + f"progress: scanned={start + len(page_ids)} fixed={len(update_ids)} " + f"skipped={skipped} failed={failed} next={next_start} total={total}" + ) diff --git a/src/recall/tools/checkpoint.py b/src/recall/tools/checkpoint.py index bf3288a..5d8c8db 100644 --- a/src/recall/tools/checkpoint.py +++ b/src/recall/tools/checkpoint.py @@ -4,6 +4,7 @@ from __future__ import annotations import hashlib +import time from datetime import datetime from ..artifacts import persist_artifact @@ -62,6 +63,7 @@ def checkpoint( "source": f"checkpoint/{session}", "chunk_index": 0, "indexed_at": ts, + "indexed_at_epoch": time.time(), "type": "checkpoint", "domain": domain or "general", "session": session, diff --git a/src/recall/tools/recall.py b/src/recall/tools/recall.py index 1221a4a..c6bc3ac 100644 --- a/src/recall/tools/recall.py +++ b/src/recall/tools/recall.py @@ -1,5 +1,10 @@ -# @wbx-modified copilot-c4a1·MTN | 2026-04-23 | recall tool — semantic search across indexed memory | prev: NEW -"""recall — semantic search across all indexed memory.""" +# @wbx-modified copilot-b1c4 | 2026-04-27 19:30 MTN | v1.2 | structured envelope (rank/distance/type/source/domain/confidence/text) | prev: copilot-c4a1@2026-04-23 +"""recall — semantic search across all indexed memory. + +Public surface: + - recall(query, n, type) -> str (back-compat string for legacy callers) + - _recall_structured(query, n, type) -> dict {result, results} (HTTP envelope) +""" from __future__ import annotations @@ -18,6 +23,76 @@ } +def _recall_rows(query: str, n: int, type: str) -> list[dict]: + """Run query, return structured rows. Pinned schema: + rank, distance, type, source, domain, confidence, text. + """ + if type not in _VALID_TYPES: + return [] + store = get_store() + n = min(max(n, 1), 20) + count = store.count() + if count == 0: + return [] + where_filter = {"type": type} if type != "all" else None + n = min(n, count) + res = store.query(query_texts=[query], n_results=n, where=where_filter) + rows: list[dict] = [] + docs = res["documents"][0] if res.get("documents") else [] + metas = res["metadatas"][0] if res.get("metadatas") else [] + dists = res["distances"][0] if res.get("distances") else [None] * len(docs) + for i, (doc, meta, dist) in enumerate(zip(docs, metas, dists, strict=False)): + rows.append( + { + "rank": i + 1, + "distance": float(dist) if dist is not None else None, + "type": meta.get("type", "document"), + "source": meta.get("source", "unknown"), + "domain": meta.get("domain", "") or None, + "confidence": meta.get("confidence"), + "text": doc, + } + ) + return rows + + +def _format_recall_string(rows: list[dict]) -> str: + """Render rows in the legacy marker-line grammar. + + Marker is INTENTIONALLY MINIMAL: `distance | type` only. Domain and + confidence live in the structured `results[]` array; gateway parsers + expect `type` to be the last field before the closing `---`. + """ + if not rows: + return "No results found." + out = [] + for r in rows: + parts = [f"Result {r['rank']}"] + if r["distance"] is not None: + parts.append(f"distance: {r['distance']:.3f}") + parts.append(f"type: {r['type']}") + out.append(f"--- {' | '.join(parts)} ---\nSource: {r['source']}\n{r['text']}\n") + return "\n".join(out) + + +def _recall_structured( + query: str, n: int = 5, type: str = "all", config: Config | None = None +) -> dict: + """Pinned envelope for HTTP /tool/recall: {result, results}.""" + cfg = config or _default_config() + if type not in _VALID_TYPES: + msg = f"Invalid type '{type}'. Must be one of: {', '.join(sorted(_VALID_TYPES))}" + return {"result": msg, "results": [], "error": msg} + store = get_store() + if store.count() == 0: + return {"result": "Memory is empty. Run 'reindex' first.", "results": []} + rows = _recall_rows(query, n, type) + body = _format_recall_string(rows) + body += compact_checkpoint(store) + body += staleness_check(cfg.stale_minutes) + return {"result": body, "results": rows} + + def recall(query: str, n: int = 5, type: str = "all", config: Config | None = None) -> str: """Semantic search across all indexed memory. @@ -27,40 +102,8 @@ def recall(query: str, n: int = 5, type: str = "all", config: Config | None = No type: Filter — all, reasoning, anti_pattern, reflection, observation, document, checkpoint. config: Runtime config. If None, uses module default. """ - cfg = config or _default_config() - store = get_store() - if type not in _VALID_TYPES: - return f"Invalid type '{type}'. Must be one of: {', '.join(sorted(_VALID_TYPES))}" - n = min(max(n, 1), 20) - count = store.count() - if count == 0: - return "Memory is empty. Run 'reindex' first." - where_filter = {"type": type} if type != "all" else None - n = min(n, count) - results = store.query(query_texts=[query], n_results=n, where=where_filter) - output: list[str] = [] - docs = results["documents"][0] if results.get("documents") else [] - metas = results["metadatas"][0] if results.get("metadatas") else [] - dists = results["distances"][0] if results.get("distances") else [None] * len(docs) - for i, (doc, meta) in enumerate(zip(docs, metas, strict=False)): - source = meta.get("source", "unknown") - art_type = meta.get("type", "document") - domain = meta.get("domain", "") - confidence = meta.get("confidence") - dist = dists[i] if i < len(dists) else None - header = [f"Result {i + 1}"] - if dist is not None: - header.append(f"distance: {dist:.3f}") - header.append(f"type: {art_type}") - if domain: - header.append(f"domain: {domain}") - if confidence is not None: - header.append(f"confidence: {confidence}") - output.append(f"--- {' | '.join(header)} ---\nSource: {source}\n{doc}\n") - result = "\n".join(output) if output else "No results found." - result += compact_checkpoint(store) - result += staleness_check(cfg.stale_minutes) - return result + payload = _recall_structured(query, n=n, type=type, config=config) + return payload["result"] # Lazy default so modules can be imported without env being set (tests). diff --git a/src/recall/tools/recall_filtered.py b/src/recall/tools/recall_filtered.py new file mode 100644 index 0000000..cd3a297 --- /dev/null +++ b/src/recall/tools/recall_filtered.py @@ -0,0 +1,328 @@ +# @wbx-modified copilot-b1c4 | 2026-04-27 21:42 MTN | v1.2 | refined family heuristic + single-family trigger + compute_confidence opt-in (a3f7 align) | prev: copilot-b1c4@2026-04-27 21:14 MTN +"""recall_filtered — structural query over indexed metadata. + +Lets agents ask questions semantic search can't: + - "all anti_patterns from copilot-a3f7 in last 7 days" + - "checkpoints where domain=icewhisperer-gateway" + +Filters BEFORE semantic match. Returns the SAME envelope shape as recall: + {result, results}. HTTP layer wraps with {tool, by}. + +v1.2 additive opts (no breaking change): + - diversity=False (default OFF — opt-in via gateway). + - compute_confidence=False (default OFF — opt-in via gateway). + When ON, envelope adds {low_confidence, families} so gateway can + skip its own pass (brain wins as single source of truth). +""" + +from __future__ import annotations + +from datetime import datetime, timedelta + +from ..config import Config +from ..state import staleness_check +from ..store import get_store +from .recall import _format_recall_string + +_VALID_TYPES = { + "all", + "reasoning", + "anti_pattern", + "reflection", + "observation", + "document", + "checkpoint", +} + + +def _parse_since(since: str) -> tuple[float | None, str | None]: + """Parse since= into (epoch_threshold, iso_threshold). + + Returns (None, None) for empty/unparseable. + Accepts: '7d', '24h', '30m', or an ISO datetime string. + """ + if not since: + return None, None + s = since.strip().lower() + if len(s) >= 2 and s[-1] in ("d", "h", "m") and s[:-1].isdigit(): + n = int(s[:-1]) + unit = s[-1] + delta = {"d": timedelta(days=n), "h": timedelta(hours=n), "m": timedelta(minutes=n)}[unit] + threshold_dt = datetime.now() - delta + return threshold_dt.timestamp(), threshold_dt.isoformat() + try: + dt = datetime.fromisoformat(since) + return dt.timestamp(), dt.isoformat() + except (ValueError, TypeError): + return None, None + + +def _build_filter( + type: str, + domain: str, + session: str, + source_prefix: str, + since_epoch: float | None = None, + since_iso: str | None = None, +) -> dict | None: + """Build a ChromaDB where-filter from structured params. None = no filter. + + For time windows: epoch-only ($gte numeric). Chunks indexed before the + dual-write landed lack indexed_at_epoch and won't match a since= window + until backfill_epoch runs — that's the correct semantic (ChromaDB + rejects $gte on string fields). + """ + clauses: list[dict] = [] + if type and type != "all": + clauses.append({"type": type}) + if domain: + clauses.append({"domain": domain}) + if session: + bare = session.replace("copilot-", "") + clauses.append({"$or": [{"session": bare}, {"session": f"copilot-{bare}"}]}) + # source_prefix handled post-query (no native prefix op in ChromaDB). + if since_epoch is not None: + clauses.append({"indexed_at_epoch": {"$gte": since_epoch}}) + if not clauses: + return None + if len(clauses) == 1: + return clauses[0] + return {"$and": clauses} + + +def _source_family(source: str) -> str: + """Family key — finer cut than first path component. + + Heuristic (matches a3f7 gateway-side): + 1. If filename has 4+ dotted segments (SDK doc spam like + 'EllieMae.Encompass.Configuration.CustomField.md'), use the + first 4 dotted segments as the family — collapses one type + per namespace branch. + 2. Otherwise, use the parent directory of the file. + 3. Fallback to the source itself if no path separator. + """ + if not source: + return "" + src = source.replace("\\", "/").strip("/") + if not src: + return "" + parts = src.split("/") + leaf = parts[-1] + # Strip extension for dotted-segment counting + stem = leaf.rsplit(".", 1)[0] if "." in leaf else leaf + dotted = stem.split(".") + if len(dotted) >= 4: + return ".".join(dotted[:4]) + if len(parts) >= 2: + return parts[-2] + return parts[0] + + +def _diversify(rows: list[dict], n: int, min_families: int) -> list[dict]: + """Reorder rows so top-n covers as many distinct source families as + possible. Stable within family (preserves rank order). Returns the + full list with rebalanced top, capped at len(rows). + """ + if not rows or n <= 1: + return rows + families: dict[str, list[dict]] = {} + order: list[str] = [] + for r in rows: + fam = _source_family(r.get("source", "") or "") + if fam not in families: + families[fam] = [] + order.append(fam) + families[fam].append(r) + if len(order) < min_families: + return rows # not enough diversity available; leave as-is + # Round-robin until we have n picks (or exhaust all). + picked: list[dict] = [] + while len(picked) < n and any(families[f] for f in order): + for fam in order: + if not families[fam]: + continue + picked.append(families[fam].pop(0)) + if len(picked) >= n: + break + # Tail = remaining rows preserving their original relative order. + leftover = [r for fam in order for r in families[fam]] + out = picked + leftover + for i, r in enumerate(out): + r["rank"] = i + 1 + return out + + +def _low_confidence(rows: list[dict], spread: float = 0.05, floor: float = 0.30) -> bool: + """Flag weak-retrieval signal. Two triggers (matches a3f7): + + A. Single-family clustering: n>=4 AND all rows from same family. + Most common failure mode in the IW corpus (SDK-reference spam). + B. Tight + high distance: n>=3, max-min spread<=spread, mean>floor. + """ + if not rows: + return False + # Trigger A — single family with n>=4 + if len(rows) >= 4: + families = {_source_family(r.get("source", "") or "") for r in rows} + families.discard("") + if len(families) == 1: + return True + # Trigger B — tight cluster + high mean distance + dists = [r["distance"] for r in rows if r.get("distance") is not None] + if len(dists) < 3: + return False + if (max(dists) - min(dists)) > spread: + return False + return (sum(dists) / len(dists)) > floor + + +def _recall_filtered_structured( + query: str = "", + n: int = 20, + type: str = "all", + domain: str = "", + session: str = "", + source_prefix: str = "", + since: str = "", + diversity: bool = False, + min_diversity: int = 2, + compute_confidence: bool = False, + config: Config | None = None, +) -> dict: + """Structured-array variant. Same envelope as recall: {result, results}.""" + cfg = config or _default_config() + if type not in _VALID_TYPES: + msg = f"Invalid type '{type}'. Must be one of: {', '.join(sorted(_VALID_TYPES))}" + return {"result": msg, "results": [], "error": msg} + n = min(max(n, 1), 100) + store = get_store() + if store.count() == 0: + return {"result": "Memory is empty. Run 'reindex' first.", "results": []} + since_epoch, since_iso = _parse_since(since) if since else (None, None) + if since and since_epoch is None: + msg = f"Invalid since='{since}'. Use '7d'/'24h'/'30m' or ISO datetime." + return {"result": msg, "results": [], "error": msg} + where = _build_filter( + type, domain, session, source_prefix, + since_epoch=since_epoch, since_iso=since_iso, + ) + + # Over-fetch when diversifying so we have raw material to rebalance. + fetch_n = min(n * 2, 100) if diversity else n + + rows: list[dict] = [] + if query: + res = store.query(query_texts=[query], n_results=fetch_n, where=where) + docs = res["documents"][0] if res.get("documents") else [] + metas = res["metadatas"][0] if res.get("metadatas") else [] + dists = res["distances"][0] if res.get("distances") else [None] * len(docs) + for i, (doc, meta, dist) in enumerate(zip(docs, metas, dists, strict=False)): + rows.append( + { + "rank": i + 1, + "distance": float(dist) if dist is not None else None, + "type": meta.get("type", "document"), + "source": meta.get("source", "unknown"), + "domain": meta.get("domain", "") or None, + "confidence": meta.get("confidence"), + "text": doc, + } + ) + else: + # Pure structural pull — no embedding. + res = store.get(where=where, limit=fetch_n) + docs = res.get("documents") or [] + metas = res.get("metadatas") or [] + for i, (doc, meta) in enumerate(zip(docs, metas, strict=False)): + rows.append( + { + "rank": i + 1, + "distance": None, + "type": meta.get("type", "document"), + "source": meta.get("source", "unknown"), + "domain": meta.get("domain", "") or None, + "confidence": meta.get("confidence"), + "text": doc, + } + ) + if source_prefix: + rows = [r for r in rows if r["source"].startswith(source_prefix)] + for i, r in enumerate(rows): + r["rank"] = i + 1 + + if diversity: + rows = _diversify(rows, n=n, min_families=max(min_diversity, 1)) + + # Trim to the caller-requested n after rerank. + rows = rows[:n] + for i, r in enumerate(rows): + r["rank"] = i + 1 + + body = _format_recall_string(rows) + payload: dict = {"result": body, "results": rows} + if compute_confidence: + low_conf = _low_confidence(rows) + families = sorted({_source_family(r.get("source", "") or "") for r in rows} - {""}) + payload["low_confidence"] = low_conf + payload["families"] = families + if low_conf: + payload["result"] = ( + "[low-confidence: weak retrieval signal — single-family or tight high-distance cluster]\n" + + body + ) + payload["result"] += staleness_check(cfg.stale_minutes) + return payload + + +def recall_filtered( + query: str = "", + n: int = 20, + type: str = "all", + domain: str = "", + session: str = "", + source_prefix: str = "", + since: str = "", + diversity: bool = False, + min_diversity: int = 2, + compute_confidence: bool = False, + config: Config | None = None, +) -> str: + """Structural query over brain metadata. Filters BEFORE semantic match. + + Args: + query: Optional natural-language query. Empty = pure structural pull. + n: Max results (default 20, hard cap 100). + type: all|reasoning|anti_pattern|reflection|observation|document|checkpoint + domain: Exact domain match (e.g. 'icewhisperer-gateway'). + session: Hex agent id, with or without 'copilot-' prefix. + source_prefix: Match if metadata.source startswith this string. + since: Time window — '7d', '24h', '30m', or ISO datetime. + diversity: If True, over-fetch and rebalance results across distinct + source families. Default False (back-compat / opt-in by gateway). + min_diversity: Minimum distinct families required to trigger rerank. + compute_confidence: If True, envelope adds {low_confidence, families}. + Default False (back-compat / opt-in). Gateway can use this to + skip its own pass — brain wins as single source of truth. + """ + payload = _recall_filtered_structured( + query=query, n=n, type=type, domain=domain, session=session, + source_prefix=source_prefix, since=since, + diversity=diversity, min_diversity=min_diversity, + compute_confidence=compute_confidence, config=config, + ) + return payload["result"] + + +_cfg: Config | None = None + + +def _default_config() -> Config: + global _cfg + if _cfg is None: + _cfg = Config() + return _cfg + + +def set_config(config: Config) -> None: + global _cfg + _cfg = config diff --git a/src/recall/tools/reflect.py b/src/recall/tools/reflect.py index 0990f88..100dfb6 100644 --- a/src/recall/tools/reflect.py +++ b/src/recall/tools/reflect.py @@ -7,6 +7,7 @@ from __future__ import annotations import hashlib +import time from datetime import datetime from ..artifacts import persist_artifact @@ -73,6 +74,7 @@ def reflect( "source": f"reasoning/{domain}", "chunk_index": 0, "indexed_at": datetime.now().isoformat(), + "indexed_at_epoch": time.time(), "type": "reasoning", "domain": domain, "result": result.split()[0] if result else "UNKNOWN", @@ -128,6 +130,7 @@ def anti_pattern( "source": f"anti-pattern/{domain}", "chunk_index": 0, "indexed_at": datetime.now().isoformat(), + "indexed_at_epoch": time.time(), "type": "anti_pattern", "domain": domain, "session": session, @@ -195,6 +198,7 @@ def session_close( "source": f"reflection/{session_id}", "chunk_index": 0, "indexed_at": datetime.now().isoformat(), + "indexed_at_epoch": time.time(), "type": "reflection", "domain": "session", "session": session_id, diff --git a/src/recall/tools/remember.py b/src/recall/tools/remember.py index 80bdd4a..9811834 100644 --- a/src/recall/tools/remember.py +++ b/src/recall/tools/remember.py @@ -4,6 +4,7 @@ from __future__ import annotations import hashlib +import time from datetime import datetime from ..artifacts import persist_artifact @@ -44,6 +45,7 @@ def remember(content: str, source: str = "agent-observation", tags: str = "") -> "source": source, "chunk_index": 0, "indexed_at": datetime.now().isoformat(), + "indexed_at_epoch": time.time(), "type": "observation", } if tags: diff --git a/src/recall/transport/http.py b/src/recall/transport/http.py index bb953a4..a8224f0 100644 --- a/src/recall/transport/http.py +++ b/src/recall/transport/http.py @@ -1,4 +1,4 @@ -# @wbx-modified copilot-c4a1·MTN | 2026-04-23 | plain HTTP transport | prev: NEW +# @wbx-modified copilot-b1c4 | 2026-04-27 19:30 MTN | v1.1 | structured envelope for recall + recall_filtered (port from server-azure v29.4) | prev: copilot-c4a1@2026-04-23 """Plain HTTP transport — POST /tool/{name} with JSON body. Used by browser-side UIs and any non-MCP client. Auth is enforced by the @@ -14,10 +14,20 @@ from ..store import is_ready from ..tools import TOOL_REGISTRY +from ..tools.recall import _recall_structured +from ..tools.recall_filtered import _recall_filtered_structured log = logging.getLogger("recall.transport.http") +# Tools that return a structured envelope {result, results} instead of a +# bare string. HTTP layer wraps with {tool, by} but does NOT cast result. +_STRUCTURED_TOOLS = { + "recall": _recall_structured, + "recall_filtered": _recall_filtered_structured, +} + + async def health_handler(request: Request) -> JSONResponse: cfg = request.app.state.config if not is_ready(): @@ -73,4 +83,12 @@ async def tool_handler(request: Request) -> JSONResponse: except Exception as e: log.exception("Tool %s failed", name) return JSONResponse({"error": f"tool failed: {e}"}, status_code=500) + structured = _STRUCTURED_TOOLS.get(name) + if structured is not None: + try: + payload = structured(**args) + except Exception: + log.exception("Structured envelope for %s failed; falling back to string", name) + payload = {"result": str(result), "results": []} + return JSONResponse({**payload, "tool": name, "by": user}) return JSONResponse({"result": str(result), "tool": name, "by": user}) diff --git a/tests/fakestore.py b/tests/fakestore.py index 78d6107..2548924 100644 --- a/tests/fakestore.py +++ b/tests/fakestore.py @@ -1,4 +1,4 @@ -# @wbx-modified copilot-c4a1·MTN | 2026-04-23 | Recall Wk1 Day 1 — in-memory Store fake for unit tests | prev: NEW +# @wbx-modified copilot-b1c4 | 2026-04-27 19:30 MTN | v1.1 | added $and/$or/$gte where ops + backfill helpers | prev: copilot-c4a1@2026-04-23 """In-memory store implementing the Store protocol — used by unit tests to avoid spinning up ChromaDB.""" @@ -14,6 +14,42 @@ class _Row: metadata: dict +def _match_clause(meta: dict, clause: dict) -> bool: + """Evaluate a where-clause against a metadata dict. + + Supports: equality {"k": v}, $and, $or, and {"$gte": n} numeric ops. + """ + if "$and" in clause: + return all(_match_clause(meta, c) for c in clause["$and"]) + if "$or" in clause: + return any(_match_clause(meta, c) for c in clause["$or"]) + for k, v in clause.items(): + if isinstance(v, dict): + for op, operand in v.items(): + got = meta.get(k) + if op == "$gte": + if not (isinstance(got, (int, float)) and got >= operand): + return False + elif op == "$lte": + if not (isinstance(got, (int, float)) and got <= operand): + return False + elif op == "$gt": + if not (isinstance(got, (int, float)) and got > operand): + return False + elif op == "$lt": + if not (isinstance(got, (int, float)) and got < operand): + return False + elif op == "$ne": + if got == operand: + return False + else: + return False + else: + if meta.get(k) != v: + return False + return True + + @dataclass class FakeStore: rows: dict[str, _Row] = field(default_factory=dict) @@ -26,10 +62,9 @@ def upsert(self, ids, documents, metadatas) -> None: self.rows[i] = _Row(id=i, document=doc, metadata=dict(meta)) def query(self, query_texts, n_results, where=None): - # Naive: return rows matching `where` (if any), in insertion order. results = list(self.rows.values()) if where: - results = [r for r in results if all(r.metadata.get(k) == v for k, v in where.items())] + results = [r for r in results if _match_clause(r.metadata, where)] results = results[:n_results] return { "documents": [[r.document for r in results]], @@ -41,7 +76,7 @@ def query(self, query_texts, n_results, where=None): def get(self, where=None, limit=100, include=None): results = list(self.rows.values()) if where: - results = [r for r in results if all(r.metadata.get(k) == v for k, v in where.items())] + results = [r for r in results if _match_clause(r.metadata, where)] results = results[:limit] return { "ids": [r.id for r in results], @@ -53,6 +88,22 @@ def delete(self, ids) -> None: for i in ids: self.rows.pop(i, None) + def get_all_ids(self) -> list[str]: + return list(self.rows.keys()) + + def get_by_ids(self, ids, include=None): + rows = [self.rows[i] for i in ids if i in self.rows] + return { + "ids": [r.id for r in rows], + "documents": [r.document for r in rows], + "metadatas": [r.metadata for r in rows], + } + + def update_metadatas(self, ids, metadatas) -> None: + for i, meta in zip(ids, metadatas, strict=False): + if i in self.rows: + self.rows[i].metadata = dict(meta) + def install(monkeypatch=None) -> FakeStore: """Install a FakeStore as the module-level singleton in recall.store. diff --git a/tests/test_no_oss_to_enterprise_imports.py b/tests/test_no_oss_to_enterprise_imports.py new file mode 100644 index 0000000..e275f48 --- /dev/null +++ b/tests/test_no_oss_to_enterprise_imports.py @@ -0,0 +1,28 @@ +# @wbx-modified copilot-a3f7·MTN | 2026-04-24 | enforce OSS-never-imports-enterprise rule | prev: NEW +"""Guard: nothing under src/recall/ may import from enterprise/. + +This protects the OSS-core/enterprise license boundary. The OSS core is MIT; +the enterprise tree is BSL. If MIT code took a runtime dependency on BSL code +the boundary would be effectively unilateral. +""" +from __future__ import annotations + +import re +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +OSS_SRC = ROOT / "src" / "recall" + +IMPORT_RE = re.compile(r"^\s*(?:from|import)\s+enterprise(?:\.|\s|$)", re.MULTILINE) + + +def test_oss_does_not_import_enterprise() -> None: + offenders: list[str] = [] + for py in OSS_SRC.rglob("*.py"): + text = py.read_text(encoding="utf-8") + if IMPORT_RE.search(text): + offenders.append(str(py.relative_to(ROOT))) + assert not offenders, ( + "OSS core (MIT) imports from enterprise/ (BSL). Reverse the dependency:\n " + + "\n ".join(offenders) + ) diff --git a/tests/unit/test_answer.py b/tests/unit/test_answer.py new file mode 100644 index 0000000..16b1463 --- /dev/null +++ b/tests/unit/test_answer.py @@ -0,0 +1,124 @@ +# @wbx-modified copilot-a3f7·MTN | 2026-04-26 01:25 MTN | v0.1 | answer tool tests w/ FakeLLM (no network) | prev: original +"""Unit tests for the synthesis (answer) tool. + +The tests inject a FakeLLM via `set_provider`, so no Azure OpenAI SDK or +network access is required. They prove the tool: + - retrieves the right number of chunks, + - builds the prompt with numbered Sources, + - returns valid JSON with answer + sources + model, + - degrades cleanly (empty store, blank question, provider failure). +""" +from __future__ import annotations + +import json +from dataclasses import dataclass, field + +import pytest +from fakestore import install + +from recall.config import Config +from recall.tools import answer as answer_mod +from recall.tools import remember as remember_mod + + +@dataclass +class FakeLLM: + reply: str = "synthesized answer [1]" + last_system: str = "" + last_user: str = "" + last_max_tokens: int = 0 + deployment: str = "fake-llm" + raise_on_call: Exception | None = None + calls: int = 0 + + def complete(self, *, system: str, user: str, max_tokens: int = 800) -> str: + self.calls += 1 + self.last_system = system + self.last_user = user + self.last_max_tokens = max_tokens + if self.raise_on_call is not None: + raise self.raise_on_call + return self.reply + + +@pytest.fixture +def cfg(tmp_path): + c = Config() + c.store_dir = str(tmp_path / "store") + c.prebuilt_dir = str(tmp_path / "prebuilt") + c.artifacts_dir = str(tmp_path / "artifacts") + c.auto_snapshot_every = 0 + return c + + +@pytest.fixture +def store(monkeypatch, cfg): + fake = install(monkeypatch) + remember_mod.set_config(cfg) + answer_mod.set_config(cfg) + answer_mod.set_provider(None) # reset provider between tests + return fake + + +def test_answer_blank_question_errors(store): + out = json.loads(answer_mod.answer(question=" ")) + assert out.get("error") == "question is required" + + +def test_answer_empty_store_errors(store): + answer_mod.set_provider(FakeLLM()) + out = json.loads(answer_mod.answer(question="anything")) + assert "error" in out + assert "empty" in out["error"] + + +def test_answer_returns_synthesis_with_sources(store): + remember_mod.remember(content="Loan folder paths use forward slashes only.", source="kb/folders.md", tags="") + remember_mod.remember(content="The /v3 path replaced /v1 for most settings endpoints.", source="kb/api.md", tags="") + llm = FakeLLM(reply="Use forward slashes [1]. Settings moved to /v3 [2].") + answer_mod.set_provider(llm) + + raw = answer_mod.answer(question="how do folder paths work and where do settings live?", n=4) + out = json.loads(raw) + + assert out["answer"].startswith("Use forward slashes") + assert out["model"] == "fake-llm" + assert out["chunks_used"] >= 1 + assert isinstance(out["sources"], list) + assert all({"n", "source", "snippet"} <= s.keys() for s in out["sources"]) + # Prompt was built with a numbered context block. + assert "Supporting context" in llm.last_user + assert "chunk 1" in llm.last_user + + +def test_answer_clamps_n_to_20(store): + for i in range(5): + remember_mod.remember(content=f"chunk {i}", source=f"src{i}", tags="") + llm = FakeLLM() + answer_mod.set_provider(llm) + out = json.loads(answer_mod.answer(question="q", n=999)) + # only 5 chunks exist; tool must cap n by store size, not crash. + assert out["chunks_used"] == 5 + + +def test_answer_propagates_provider_failure_as_json_error(store): + remember_mod.remember(content="hello", source="src", tags="") + answer_mod.set_provider(FakeLLM(raise_on_call=RuntimeError("boom"))) + out = json.loads(answer_mod.answer(question="q")) + assert "error" in out + assert "boom" in out["error"] + + +def test_answer_passes_max_tokens_to_provider(store): + remember_mod.remember(content="x", source="src", tags="") + llm = FakeLLM() + answer_mod.set_provider(llm) + answer_mod.answer(question="q", max_tokens=123) + assert llm.last_max_tokens == 123 + + +def test_answer_registered_in_tool_registry(): + from recall.tools import TOOL_REGISTRY + + assert "answer" in TOOL_REGISTRY + assert TOOL_REGISTRY["answer"] is answer_mod.answer diff --git a/tests/unit/test_imports.py b/tests/unit/test_imports.py index 9a61cd2..115cbdc 100644 --- a/tests/unit/test_imports.py +++ b/tests/unit/test_imports.py @@ -18,16 +18,21 @@ def test_top_level_imports(): assert recall.__version__ -def test_tools_registry_has_thirteen_entries(): +def test_tools_registry_has_fourteen_entries(): from recall.tools import TOOL_REGISTRY, WRITE_TOOLS - assert len(TOOL_REGISTRY) == 13 + # 13 originals + answer (2026-04-26) + recall_filtered + backfill_epoch (2026-04-27 b1c4). + assert len(TOOL_REGISTRY) == 16 assert "recall" in TOOL_REGISTRY + assert "recall_filtered" in TOOL_REGISTRY + assert "answer" in TOOL_REGISTRY assert "checkpoint" in TOOL_REGISTRY assert "snapshot_index" in TOOL_REGISTRY + assert "backfill_epoch" in TOOL_REGISTRY # forget is a write tool too — but per delete=archive guardrail it # archives, doesn't delete. Tracked separately below. assert "snapshot_index" in WRITE_TOOLS + assert "backfill_epoch" in WRITE_TOOLS def test_config_from_env_requires_api_key(monkeypatch): diff --git a/tests/unit/test_recall_filtered.py b/tests/unit/test_recall_filtered.py new file mode 100644 index 0000000..7455d2f --- /dev/null +++ b/tests/unit/test_recall_filtered.py @@ -0,0 +1,357 @@ +# @wbx-modified copilot-b1c4 | 2026-04-27 19:30 MTN | v1.0 | tests for recall_filtered + backfill_epoch | prev: NEW +"""Tests for recall_filtered + backfill_epoch.""" + +from __future__ import annotations + +import time +from datetime import datetime, timedelta + +import pytest + +from tests.fakestore import install + + +def _seed(fake, epoch: float | None = None, **meta): + """Add a row with given metadata.""" + base = { + "source": "test", + "chunk_index": 0, + "indexed_at": datetime.now().isoformat(), + "type": "observation", + } + if epoch is not None: + base["indexed_at_epoch"] = epoch + base.update(meta) + rid = f"id-{len(fake.rows) + 1}" + fake.upsert([rid], [f"doc {rid}"], [base]) + return rid + + +def test_parse_since_relative(): + from recall.tools.recall_filtered import _parse_since + + ep, iso = _parse_since("7d") + assert ep is not None and iso is not None + assert ep < time.time() + + ep24, _ = _parse_since("24h") + assert ep24 is not None + assert ep24 > _parse_since("7d")[0] + + +def test_parse_since_invalid(): + from recall.tools.recall_filtered import _parse_since + + assert _parse_since("garbage") == (None, None) + assert _parse_since("") == (None, None) + + +def test_build_filter_combinations(): + from recall.tools.recall_filtered import _build_filter + + assert _build_filter("all", "", "", "", None, None) is None + assert _build_filter("anti_pattern", "", "", "", None, None) == {"type": "anti_pattern"} + + f = _build_filter("anti_pattern", "icewhisperer", "", "", None, None) + assert f == {"$and": [{"type": "anti_pattern"}, {"domain": "icewhisperer"}]} + + f = _build_filter("all", "", "a3f7", "", None, None) + assert "$or" in f + assert {"session": "a3f7"} in f["$or"] + assert {"session": "copilot-a3f7"} in f["$or"] + + f = _build_filter("all", "", "", "", 1000.0, "x") + assert f == {"indexed_at_epoch": {"$gte": 1000.0}} + + +def test_recall_filtered_empty_store(monkeypatch): + install(monkeypatch) + from recall.tools.recall_filtered import _recall_filtered_structured + + payload = _recall_filtered_structured(query="x") + assert payload["results"] == [] + assert "empty" in payload["result"].lower() + + +def test_recall_filtered_by_type(monkeypatch): + fake = install(monkeypatch) + _seed(fake, type="anti_pattern", domain="d1", source="anti-pattern/d1") + _seed(fake, type="reflection", domain="d1", source="reflection/d1") + _seed(fake, type="observation") + + from recall.tools.recall_filtered import _recall_filtered_structured + + payload = _recall_filtered_structured(type="anti_pattern") + assert len(payload["results"]) == 1 + assert payload["results"][0]["type"] == "anti_pattern" + + +def test_recall_filtered_since_epoch_only(monkeypatch): + fake = install(monkeypatch) + now = time.time() + _seed(fake, epoch=now - 86400 * 10, source="old") # 10 days ago + _seed(fake, epoch=now - 3600, source="recent") # 1 hr ago + _seed(fake, epoch=None, source="legacy") # no epoch -- excluded by since= + + from recall.tools.recall_filtered import _recall_filtered_structured + + payload = _recall_filtered_structured(since="7d") + sources = {r["source"] for r in payload["results"]} + assert "recent" in sources + assert "old" not in sources + assert "legacy" not in sources # missing epoch -> excluded + + +def test_recall_filtered_session_normalization(monkeypatch): + fake = install(monkeypatch) + _seed(fake, session="a3f7", source="bare-session") + _seed(fake, session="copilot-a3f7", source="prefixed-session") + _seed(fake, session="b1c4", source="other") + + from recall.tools.recall_filtered import _recall_filtered_structured + + payload = _recall_filtered_structured(session="a3f7") + sources = {r["source"] for r in payload["results"]} + assert sources == {"bare-session", "prefixed-session"} + + +def test_recall_filtered_source_prefix(monkeypatch): + fake = install(monkeypatch) + _seed(fake, source="checkpoint/a3f7") + _seed(fake, source="checkpoint/b1c4") + _seed(fake, source="reflection/a3f7") + + from recall.tools.recall_filtered import _recall_filtered_structured + + payload = _recall_filtered_structured(source_prefix="checkpoint/") + sources = {r["source"] for r in payload["results"]} + assert sources == {"checkpoint/a3f7", "checkpoint/b1c4"} + + +def test_recall_filtered_envelope_shape(monkeypatch): + fake = install(monkeypatch) + _seed(fake, type="observation") + + from recall.tools.recall_filtered import _recall_filtered_structured + + payload = _recall_filtered_structured(type="observation") + assert "result" in payload + assert "results" in payload + assert isinstance(payload["results"], list) + if payload["results"]: + row = payload["results"][0] + for k in ("rank", "distance", "type", "source", "domain", "confidence", "text"): + assert k in row + + +def test_recall_filtered_invalid_type(monkeypatch): + install(monkeypatch) + from recall.tools.recall_filtered import _recall_filtered_structured + + payload = _recall_filtered_structured(type="bogus") + assert "error" in payload + assert payload["results"] == [] + + +def test_backfill_epoch_idempotent(monkeypatch): + fake = install(monkeypatch) + iso = (datetime.now() - timedelta(hours=2)).isoformat() + fake.upsert( + ["a", "b", "c"], + ["doc a", "doc b", "doc c"], + [ + {"source": "x", "chunk_index": 0, "indexed_at": iso, "type": "observation"}, + {"source": "y", "chunk_index": 0, "indexed_at": iso, "type": "observation"}, + {"source": "z", "chunk_index": 0, "indexed_at": iso, "type": "observation"}, + ], + ) + + from recall.tools.backfill import backfill_epoch + + r1 = backfill_epoch(start=0, batch_size=2) + assert "fixed=2" in r1 + assert "next=2" in r1 + + r2 = backfill_epoch(start=2, batch_size=2) + assert "done" in r2 + assert "fixed=1" in r2 + + # Idempotent re-run: nothing left to fix. + r3 = backfill_epoch(start=0, batch_size=10) + assert "done" in r3 + assert "fixed=0" in r3 + assert "skipped=3" in r3 + + # Verify all rows now have indexed_at_epoch. + for r in fake.rows.values(): + assert isinstance(r.metadata.get("indexed_at_epoch"), float) + + +def test_backfill_no_delete(monkeypatch): + """Critical: backfill_epoch is delete-free (per delete=archive guardrail).""" + fake = install(monkeypatch) + iso = datetime.now().isoformat() + for i in range(5): + fake.upsert( + [f"id{i}"], + [f"doc{i}"], + [{"source": "x", "chunk_index": 0, "indexed_at": iso, "type": "observation"}], + ) + before = fake.count() + + from recall.tools.backfill import backfill_epoch + + backfill_epoch(start=0, batch_size=10) + assert fake.count() == before + + +def test_source_family(): + from recall.tools.recall_filtered import _source_family + + # 4+ dotted segments → first 4 segments (SDK doc spam collapse) + assert _source_family("EllieMae.Encompass.Configuration.CustomField.md") == "EllieMae.Encompass.Configuration.CustomField" + assert _source_family("path/to/EllieMae.Encompass.BusinessRules.Rule.Foo.md") == "EllieMae.Encompass.BusinessRules.Rule" + # <4 dotted → parent dir + assert _source_family("sdk-reference/Foo/Bar.md") == "Foo" + assert _source_family("checkpoint\\a3f7\\note.md") == "a3f7" + assert _source_family("/data/repo/encompass/forum/topic.md") == "forum" + assert _source_family("admin-settings/04-fields.md") == "admin-settings" + assert _source_family("standalone") == "standalone" + assert _source_family("") == "" + assert _source_family("/") == "" + + +def test_low_confidence_signal(): + from recall.tools.recall_filtered import _low_confidence + + # Trigger A — single family with n>=4 + rows_single_fam = [ + {"distance": 0.10, "source": "sdk/Foo/A.md"}, + {"distance": 0.15, "source": "sdk/Foo/B.md"}, + {"distance": 0.20, "source": "sdk/Foo/C.md"}, + {"distance": 0.25, "source": "sdk/Foo/D.md"}, + ] + assert _low_confidence(rows_single_fam) is True + + # Multi-family low-distance — not low confidence + rows_multi = [ + {"distance": 0.10, "source": "sdk/Foo/A.md"}, + {"distance": 0.11, "source": "forum/topic.md"}, + {"distance": 0.12, "source": "admin/settings.md"}, + {"distance": 0.13, "source": "knowledge/x.md"}, + ] + assert _low_confidence(rows_multi) is False + + # Trigger B — tight cluster + high mean (multi-family but all weak) + rows_tight_high = [ + {"distance": 0.31, "source": "a/x.md"}, + {"distance": 0.32, "source": "b/y.md"}, + {"distance": 0.33, "source": "c/z.md"}, + ] + assert _low_confidence(rows_tight_high) is True + + # Tight but low mean — strong signal, not low confidence + rows_tight_low = [ + {"distance": 0.10, "source": "a/x.md"}, + {"distance": 0.11, "source": "b/y.md"}, + {"distance": 0.12, "source": "c/z.md"}, + ] + assert _low_confidence(rows_tight_low) is False + + # Wide spread — strong differentiation, not low confidence + rows_wide = [ + {"distance": 0.10, "source": "a/x.md"}, + {"distance": 0.25, "source": "b/y.md"}, + {"distance": 0.40, "source": "c/z.md"}, + ] + assert _low_confidence(rows_wide) is False + + # Too few rows + assert _low_confidence([{"distance": 0.50, "source": "a/x.md"}]) is False + + # No distances + multi-family + n<4 — not low confidence + assert _low_confidence( + [{"distance": None, "source": "a/x.md"}, {"distance": None, "source": "b/y.md"}] + ) is False + + +def test_diversify_rebalances_families(): + from recall.tools.recall_filtered import _diversify + + # 5 from family A, 2 from family B; without diversity, top-3 = all A. + rows = [ + {"rank": 1, "source": "A/1", "distance": 0.31}, + {"rank": 2, "source": "A/2", "distance": 0.32}, + {"rank": 3, "source": "A/3", "distance": 0.33}, + {"rank": 4, "source": "A/4", "distance": 0.34}, + {"rank": 5, "source": "B/1", "distance": 0.35}, + {"rank": 6, "source": "A/5", "distance": 0.36}, + {"rank": 7, "source": "B/2", "distance": 0.37}, + ] + out = _diversify(rows, n=3, min_families=2) + families = {r["source"].split("/")[0] for r in out[:3]} + assert "B" in families # B got promoted into top-3 + assert out[0]["source"] == "A/1" # first pick still preserves rank 1 + assert out[1]["source"] == "B/1" # round-robin picks B next + # Ranks rewritten + assert out[0]["rank"] == 1 + assert out[1]["rank"] == 2 + + +def test_diversify_skips_when_homogeneous(): + from recall.tools.recall_filtered import _diversify + + rows = [ + {"rank": 1, "source": "A/1", "distance": 0.31}, + {"rank": 2, "source": "A/2", "distance": 0.32}, + ] + out = _diversify(rows, n=2, min_families=2) + # Only one family -> returns as-is + assert out[0]["source"] == "A/1" + assert out[1]["source"] == "A/2" + + +def test_recall_filtered_diversity_param(monkeypatch): + fake = install(monkeypatch) + for i in range(4): + _seed(fake, source=f"sdk-reference/Foo/Bar{i}.md", type="document") + _seed(fake, source="forum/topic_5140.md", type="document") + _seed(fake, source="forum/topic_9001.md", type="document") + + from recall.tools.recall_filtered import _recall_filtered_structured + + # diversity=True + compute_confidence=True → both fields populated + payload = _recall_filtered_structured( + n=3, diversity=True, min_diversity=2, compute_confidence=True + ) + assert "low_confidence" in payload + assert "families" in payload + assert isinstance(payload["families"], list) + + +def test_recall_filtered_compute_confidence_default_off(monkeypatch): + """Default behavior (compute_confidence=False) omits low_confidence/families.""" + fake = install(monkeypatch) + _seed(fake, type="observation") + + from recall.tools.recall_filtered import _recall_filtered_structured + + payload = _recall_filtered_structured(type="observation") + assert "low_confidence" not in payload + assert "families" not in payload + # Envelope still has core fields + assert "result" in payload + assert "results" in payload + + +def test_recall_filtered_compute_confidence_opt_in(monkeypatch): + fake = install(monkeypatch) + _seed(fake, type="observation", source="forum/x.md") + + from recall.tools.recall_filtered import _recall_filtered_structured + + payload = _recall_filtered_structured(type="observation", compute_confidence=True) + assert "low_confidence" in payload + assert "families" in payload + assert isinstance(payload["low_confidence"], bool) + assert isinstance(payload["families"], list) From 52584306e31da5ec9395aef2f21bb2d2b1790153 Mon Sep 17 00:00:00 2001 From: Steve Paltridge Date: Mon, 27 Apr 2026 23:11:47 -0600 Subject: [PATCH 3/4] =?UTF-8?q?fix:=20ruff=20lint=20=E2=80=94=20drop=20unu?= =?UTF-8?q?sed=20cfg/field/pytest?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/unit/test_answer.py | 2 +- tests/unit/test_recall_filtered.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/unit/test_answer.py b/tests/unit/test_answer.py index 16b1463..a1d1ba3 100644 --- a/tests/unit/test_answer.py +++ b/tests/unit/test_answer.py @@ -11,7 +11,7 @@ from __future__ import annotations import json -from dataclasses import dataclass, field +from dataclasses import dataclass import pytest from fakestore import install diff --git a/tests/unit/test_recall_filtered.py b/tests/unit/test_recall_filtered.py index 7455d2f..3252900 100644 --- a/tests/unit/test_recall_filtered.py +++ b/tests/unit/test_recall_filtered.py @@ -6,8 +6,6 @@ import time from datetime import datetime, timedelta -import pytest - from tests.fakestore import install From 0731259272e11c3e956cf87d75e0278d4341f65e Mon Sep 17 00:00:00 2001 From: Steve Paltridge Date: Mon, 27 Apr 2026 23:12:47 -0600 Subject: [PATCH 4/4] =?UTF-8?q?fix:=20drop=20unused=20cfg=20binding=20in?= =?UTF-8?q?=20answer()=20=E2=80=94=20preserve=20=5Fdefault=5Fconfig()=20si?= =?UTF-8?q?de=20effect?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/recall/tools/answer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/recall/tools/answer.py b/src/recall/tools/answer.py index 56ff472..6d74602 100644 --- a/src/recall/tools/answer.py +++ b/src/recall/tools/answer.py @@ -207,7 +207,10 @@ def answer( On error returns a JSON string with an "error" key (HTTP transport still returns 200 — the caller inspects the JSON). """ - cfg = config or _default_config() + if config is not None: + set_config(config) + else: + _default_config() store = get_store() if not question or not question.strip(): return json.dumps({"error": "question is required"})