diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 87c047de..69853d55 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -54,6 +54,10 @@ jobs: npm ci - name: Run Verification Test + # `tests/test_omnibus.ts` was ported to vitest as + # `tests/omnibus.test.ts` in cbc8c84; this step has been failing + # ever since. Run the whole vitest suite so omnibus + the rest of + # the per-tenant / temporal / multilingual specs are exercised. run: | cd packages/openmemory-js - npx tsx tests/test_omnibus.ts + npm test diff --git a/packages/openmemory-js/src/ai/mcp.ts b/packages/openmemory-js/src/ai/mcp.ts index 8b3ed026..a8aba265 100644 --- a/packages/openmemory-js/src/ai/mcp.ts +++ b/packages/openmemory-js/src/ai/mcp.ts @@ -81,7 +81,42 @@ const send_err = ( const uid = (val?: string | null) => (val?.trim() ? val.trim() : undefined); -export const create_mcp_srv = () => { +/** + * Resolve the effective user_id for a tool call. + * + * The HTTP MCP route runs through the same `authenticate_api_request` + * middleware as the REST routes (see src/server/index.ts), so for every + * authenticated MCP call we have a `tenant` derived from the API key. + * That tenant is the source of truth for ownership, mirroring the REST + * `require_tenant` + `reject_tenant_mismatch` model. + * + * - tenant set + no arg -> use tenant + * - tenant set + matching arg -> use tenant + * - tenant set + mismatching arg -> throw (becomes an MCP isError) + * - tenant unset (stdio transport etc) -> use the arg as supplied + * + * Stdio MCP keeps its existing behaviour — there is no HTTP request to + * carry an API key, so tenant is undefined and the tool falls back to + * whatever user_id the client passed (or `add_hsg_memory`'s "anonymous" + * default). + */ +const resolve_user_id = ( + tenant: string | undefined, + arg: string | null | undefined, +): string | undefined => { + const trimmed = uid(arg); + if (tenant) { + if (trimmed && trimmed !== tenant) { + throw new Error( + "tenant_mismatch: user_id does not match authenticated tenant; omit user_id or pass the tenant identifier", + ); + } + return tenant; + } + return trimmed; +}; + +export const create_mcp_srv = (tenant?: string) => { const srv = new McpServer( { name: "openmemory-mcp", @@ -182,7 +217,7 @@ export const create_mcp_srv = () => { user_id, project_id, }) => { - const u = uid(user_id); + const u = resolve_user_id(tenant, user_id); const proj = uid(project_id); const results: any = { type, query }; const at_date = at ? new Date(at) : new Date(); @@ -368,7 +403,7 @@ export const create_mcp_srv = () => { metadata, user_id, }) => { - const u = uid(user_id); + const u = resolve_user_id(tenant, user_id); const proj = uid(project_id); const results: any = { type }; @@ -485,7 +520,7 @@ export const create_mcp_srv = () => { metadata, user_id, }) => { - const u = uid(user_id); + const u = resolve_user_id(tenant, user_id); // Force global scope for this tool const proj = "system_global"; const results: any = { type }; @@ -571,6 +606,15 @@ export const create_mcp_srv = () => { .describe("Salience boost amount (default 0.1)"), }, async ({ id, boost }) => { + if (tenant) { + // When HTTP-bound, refuse to reinforce another tenant's memory. + const mem = await q.get_mem.get(id); + if (!mem || mem.user_id !== tenant) { + throw new Error( + `Memory ${id} not found for user ${tenant}`, + ); + } + } await reinforce_memory(id, boost); return { content: [ @@ -602,7 +646,7 @@ export const create_mcp_srv = () => { .describe("Validate project identifier"), }, async ({ id, user_id, project_id }) => { - const u = uid(user_id); + const u = resolve_user_id(tenant, user_id); const proj = uid(project_id); if (u || proj) { // Pre-check ownership if user_id/project_id provided @@ -675,7 +719,7 @@ export const create_mcp_srv = () => { .describe("Restrict results to a specific project identifier"), }, async ({ limit, sector, user_id, project_id }) => { - const u = uid(user_id); + const u = resolve_user_id(tenant, user_id); const proj = uid(project_id); let rows: mem_row[]; @@ -750,7 +794,7 @@ export const create_mcp_srv = () => { ), }, async ({ id, include_vectors, user_id }) => { - const u = uid(user_id); + const u = resolve_user_id(tenant, user_id); const mem = await q.get_mem.get(id); if (!mem) return { @@ -875,7 +919,20 @@ export const mcp = (app: any) => { // Create a fresh transport + server per request to support // multiple clients (MCP SDK 1.27 rejects re-initialization // on a single transport instance). - const srv = create_mcp_srv(); + // + // `req.tenant` is set by the global `authenticate_api_request` + // middleware (src/server/index.ts). Threading it into the + // per-request server is what scopes MCP tool calls to the + // authenticated tenant — without this, tools either wrote + // memories with user_id="anonymous" (invisible to REST + // `/memory/all` which is tenant-scoped) or read across + // every tenant. See resolve_user_id() for the per-tool + // contract. + const tenant_from_req = + typeof (req as any).tenant === "string" + ? ((req as any).tenant as string) + : undefined; + const srv = create_mcp_srv(tenant_from_req); const trans = new StreamableHTTPServerTransport({ sessionIdGenerator: undefined, enableJsonResponse: true, diff --git a/packages/openmemory-js/tests/mcp_per_tenant.test.ts b/packages/openmemory-js/tests/mcp_per_tenant.test.ts new file mode 100644 index 00000000..ccddefe9 --- /dev/null +++ b/packages/openmemory-js/tests/mcp_per_tenant.test.ts @@ -0,0 +1,201 @@ +// Force synthetic embeddings + sqlite backend BEFORE importing anything +// that loads cfg/db. vitest.config.ts already sets these via env, but keep +// this guard for standalone tsx runs. +process.env.OM_EMBEDDINGS = "synthetic"; +process.env.OM_EMBEDDING_FALLBACK = "synthetic"; +process.env.OM_METADATA_BACKEND = process.env.OM_METADATA_BACKEND || "sqlite"; +process.env.OM_VECTOR_BACKEND = process.env.OM_VECTOR_BACKEND || "sqlite"; + +import { beforeEach, describe, expect, it } from "vitest"; +import { Client } from "@modelcontextprotocol/sdk/client/index.js"; +import { InMemoryTransport } from "@modelcontextprotocol/sdk/inMemory.js"; +import { create_mcp_srv } from "../src/ai/mcp"; +import { run_async, q } from "../src/core/db"; + +const T_ALICE = "tenant-alice-mcp"; +const T_BOB = "tenant-bob-mcp"; + +async function cleanup() { + await run_async(`DELETE FROM memories`); + try { + await run_async(`DELETE FROM vectors`); + } catch { + /* schema variant */ + } + try { + await run_async(`DELETE FROM openmemory_vectors`); + } catch { + /* schema variant */ + } + try { + await run_async(`DELETE FROM waypoints`); + } catch { + /* schema variant */ + } +} + +async function connect_client(tenant?: string) { + const srv = create_mcp_srv(tenant); + const [client_transport, server_transport] = + InMemoryTransport.createLinkedPair(); + await srv.connect(server_transport); + const client = new Client({ name: "test-client", version: "0.0.0" }); + await client.connect(client_transport); + return { client, srv }; +} + +function parse_items(result: any): Array<{ id: string; user_id?: string }> { + // openmemory_list returns two text blocks; the second is a JSON dump. + const blocks = (result?.content ?? []) as Array<{ + type: string; + text: string; + }>; + const jsonBlock = blocks + .filter((b) => b.type === "text") + .map((b) => b.text) + .find((t) => t.trim().startsWith("{")); + if (!jsonBlock) return []; + const parsed = JSON.parse(jsonBlock); + return parsed.items ?? []; +} + +function parse_store(result: any): { id?: string; project_id?: string } { + const blocks = (result?.content ?? []) as Array<{ + type: string; + text: string; + }>; + const jsonBlock = blocks + .filter((b) => b.type === "text") + .map((b) => b.text) + .find((t) => t.trim().startsWith("{")); + if (!jsonBlock) return {}; + const parsed = JSON.parse(jsonBlock); + return { id: parsed?.hsg?.id, project_id: parsed?.project_id }; +} + +describe("MCP per-tenant scoping", () => { + beforeEach(async () => { + await cleanup(); + }); + + it("openmemory_store binds writes to the authenticated tenant", async () => { + const { client } = await connect_client(T_ALICE); + const stored = await client.callTool({ + name: "openmemory_store", + arguments: { + content: + "Nginx 502 on a fresh VM: check that the upstream service is actually running before looking at nginx config.", + tags: ["nginx", "sysadmin"], + }, + }); + const { id } = parse_store(stored); + expect(id).toBeTruthy(); + + // The DB row must carry the tenant as user_id — without this fix + // it would have been "anonymous" and invisible to REST /memory/all. + const row = await q.get_mem.get(id!); + expect(row).toBeTruthy(); + expect(row.user_id).toBe(T_ALICE); + expect(row.project_id).toBe("system_global"); + }); + + it("openmemory_list returns the tenant's own MCP-stored memories (regression)", async () => { + // Reproduces the symptom from the bug report: a memory stored via + // MCP openmemory_store must appear in MCP openmemory_list on the + // same authenticated session. + const { client } = await connect_client(T_ALICE); + + await client.callTool({ + name: "openmemory_store", + arguments: { + content: + "Nginx 502 on a fresh VM: check the upstream service is running before touching nginx config.", + tags: ["nginx"], + }, + }); + + const listed = await client.callTool({ + name: "openmemory_list", + arguments: { limit: 50 }, + }); + const items = parse_items(listed); + expect(items.length).toBeGreaterThan(0); + expect(items.every((i) => i.user_id === T_ALICE)).toBe(true); + }); + + it("openmemory_list isolates tenants from each other", async () => { + const alice = await connect_client(T_ALICE); + const bob = await connect_client(T_BOB); + + await alice.client.callTool({ + name: "openmemory_store", + arguments: { content: "Alice's private dev notes about nginx." }, + }); + await bob.client.callTool({ + name: "openmemory_store", + arguments: { content: "Bob's private dev notes about postgres." }, + }); + + const bob_list = parse_items( + await bob.client.callTool({ + name: "openmemory_list", + arguments: { limit: 50 }, + }), + ); + // Bob must not see Alice's memories. + expect(bob_list.every((i) => i.user_id === T_BOB)).toBe(true); + expect(bob_list.length).toBe(1); + + const alice_list = parse_items( + await alice.client.callTool({ + name: "openmemory_list", + arguments: { limit: 50 }, + }), + ); + expect(alice_list.every((i) => i.user_id === T_ALICE)).toBe(true); + expect(alice_list.length).toBe(1); + }); + + it("openmemory_store rejects a user_id arg that disagrees with the tenant", async () => { + const { client } = await connect_client(T_ALICE); + const result: any = await client.callTool({ + name: "openmemory_store", + arguments: { + content: "attempt to forge another tenant's identity", + user_id: T_BOB, + }, + }); + // ToolRegistry catches errors and turns them into an isError result + // with a textual "Error: ..." block. + expect(result.isError).toBe(true); + const text = (result.content ?? []) + .map((b: any) => b.text ?? "") + .join("\n"); + expect(text).toMatch(/tenant_mismatch/); + }); + + it("stdio-style server (no tenant) preserves legacy behaviour", async () => { + // No tenant bound — this is the stdio MCP shape. Stored memories + // get the "anonymous" fallback from add_hsg_memory and openmemory_list + // returns everything in the table (the pre-existing local-dev contract). + const { client } = await connect_client(undefined); + const stored = await client.callTool({ + name: "openmemory_store", + arguments: { content: "stdio-mode memory with no tenant binding" }, + }); + const { id } = parse_store(stored); + expect(id).toBeTruthy(); + + const row = await q.get_mem.get(id!); + expect(row.user_id).toBe("anonymous"); + + const items = parse_items( + await client.callTool({ + name: "openmemory_list", + arguments: { limit: 50 }, + }), + ); + expect(items.length).toBe(1); + expect(items[0].id).toBe(id); + }); +});