diff --git a/CHANGELOG.md b/CHANGELOG.md index 089df37..4439f0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,9 @@ Tagged releases are published to npm from GitHub Actions when a **GitHub Release - Vitest **global** coverage thresholds in `vitest.config.ts` (lines 73%, statements 72%, branches 58%, functions 76% — measured baseline minus slack); `npm run test:coverage` exits non-zero when any bucket regresses. - `@vitest/coverage-v8` devDependency for coverage reports (`lcov`, `json-summary`, HTML). - `docs/` reference set (TOOLS, CONFIGURATION, SECURITY, CONTRIBUTING, CI_CD, FAQ, MIGRATION, RELEASING) and worked examples `examples/suggest-flow-demo.ts`, `examples/guided-query-demo.ts`, `examples/library-embedding-demo.ts`. +- `teardownServer()` export to reset process-global MCP state (suggest-flow gate, namespaces cache, URL generator registry, active config, shared `PineconeClient`) so `setupServer()` can run again in the same Node process (tests, re-embedding). +- Namespace trimming for the suggest-flow gate and gated tools (`normalizeNamespace`); use the same trimmed `namespace` for `suggest_query_params` and downstream `query` / `count` / `query_documents`. +- Successful `query` / `query_documents` / `guided_query` payloads may include `degraded`, `degradation_reason`, and `hybrid_leg_failed` when rerank or a hybrid leg fails but the tool still returns hits; `guided_query` `decision_trace` adds `rerank_status`. ### Changed @@ -34,7 +37,8 @@ Tagged releases are published to npm from GitHub Actions when a **GitHub Release - **Breaking (MCP):** Single hybrid `query` tool with `preset` (`fast` | `detailed` | `full`); removed separate `query_fast` / `query_detailed` tool registrations. - `resolveConfig()` throws if the Pinecone API key is missing (after trim); library callers must supply `apiKey` via overrides or set `PINECONE_API_KEY`. - `withTimeout` aborts an internal `AbortSignal` on deadline (cooperative cancellation). -- `PineconeClient`: shared hit-field extraction, safer merge dedup without empty `_id` collisions, metadata sampling skips zero-vector probe when dimension is unknown, `listNamespacesFromKeywordIndex` surfaces errors via `{ ok: false }`. +- `PineconeClient`: constructor reads index name, rerank model, and default top-k only from `PineconeClientConfig` (not `process.env`); shared hit-field extraction, safer merge dedup without empty `_id` collisions, metadata sampling skips zero-vector probe when dimension is unknown, `listNamespacesFromKeywordIndex` surfaces errors via `{ ok: false }`. +- `setupServer()` throws if called twice in one process without `teardownServer()` first; README library-embedding section documents the teardown pattern. - Metadata filter manual validation accepts primitive arrays for `$in`/`$nin` including numbers (matches Zod). - README: deployment model for process-global gate/cache/registry; adjusted feature wording vs pre-1.0 semver. - `.npmignore` no longer excludes `dist/` (still shipped via `package.json` `files`). diff --git a/README.md b/README.md index c5d846c..5b79fc2 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ When a tool fails, the MCP tool result sets **`isError: true`**. The `text` cont Success payloads are unchanged and do **not** wrap `ToolError`. Clients that still expect `{ "status": "error", "message": "..." }` must migrate to the shape above. -For successful `query` / `guided_query` payloads, **rerank/hybrid fidelity** is described in [docs/TOOLS.md](docs/TOOLS.md) (row-level `reranked`, current lack of a top-level `degraded` envelope). +For successful `query`, `query_documents`, and `guided_query` payloads, **rerank/hybrid fidelity** is described in [docs/TOOLS.md](docs/TOOLS.md#rerank-and-hybrid-degradation) (row-level `reranked`, top-level `degraded` / `degradation_reason`, and optional `hybrid_leg_failed`; `query_documents` propagates the same fields on its nested query payload when applicable). ## Features @@ -106,9 +106,11 @@ The server uses **process-global** memory for the suggest-flow gate (`suggest_qu ### Library embedding (`setupServer`) -Treat **`setupServer()` as one logical server per Node process**: it mutates shared module singletons (suggest-flow map, namespaces cache, URL registry, config context, shared `PineconeClient` slot). A second `setupServer()` without a coordinated teardown can leave stale or mixed state for in-flight requests — **spawn a separate process** per isolated instance until an explicit lifecycle API is documented in the changelog. +Treat **`setupServer()` as one logical server per Node process**: it mutates shared module singletons (suggest-flow map, namespaces cache, URL registry, config context, shared `PineconeClient` slot). A **second** `setupServer()` in the same process **throws** unless you call **`teardownServer()`** first. -Recommended pattern: `resolveConfig` → `setPineconeClient(new PineconeClient(...))` → `await setupServer(config)` → connect one MCP transport. See [examples/library-embedding-demo.ts](examples/library-embedding-demo.ts) and [docs/TOOLS.md](docs/TOOLS.md#suggest-flow-gate). +Recommended pattern: `resolveConfig` → `setPineconeClient(new PineconeClient(...))` → `await setupServer(config)` → connect one MCP transport. For tests or re-initialization in the same process, call `teardownServer()` then `setupServer(config)` again. For isolated production tenants, prefer **one server per Node process** (or separate OS processes) rather than sharing one embedder across tenants. + +Import `setupServer` and `teardownServer` from `@will-cppa/pinecone-read-only-mcp`. See [examples/library-embedding-demo.ts](examples/library-embedding-demo.ts) and [docs/TOOLS.md](docs/TOOLS.md#suggest-flow-gate). ### Custom URL generators diff --git a/docs/TOOLS.md b/docs/TOOLS.md index 0785e3d..7cebb83 100644 --- a/docs/TOOLS.md +++ b/docs/TOOLS.md @@ -99,7 +99,7 @@ Tools **`query`**, **`count`**, and **`query_documents`** require a prior succes | `metadata_filter` | object | no | Metadata filter | | `fields` | string[] | no | Pinecone fields to return | -**Success (`QueryResponse`):** `{ status: 'success', mode?: 'query' \| 'query_fast' \| 'query_detailed', query, namespace, metadata_filter?, result_count, results[], fields? }`. +**Success (`QueryResponse`):** `{ status: 'success', mode?: 'query' \| 'query_fast' \| 'query_detailed', query, namespace, metadata_filter?, result_count, results[], fields?, degraded?, degradation_reason?, hybrid_leg_failed? }`. Each row: `document_id`, `paper_number` (deprecated alias), `title`, `author`, `url`, `content`, `score`, `reranked`, optional `metadata`. @@ -114,9 +114,19 @@ Each row: `document_id`, `paper_number` (deprecated alias), `title`, `author`, ` } ``` -### Rerank fallback and row-level fidelity +### Rerank and hybrid degradation -When reranking is requested but the rerank API fails, the server still returns **`status: 'success'`** with rows where `reranked: false`. Treat **`reranked: false`** as lower confidence when reranking was expected (`preset` detailed/full). Structured stderr logs include the failure; there is **no** separate top-level `degraded` flag in the current JSON envelope—client UX should combine `preset`, `use_reranking`, and per-row `reranked` (see project issue backlog for envelope-level degradation). +When reranking is requested but the rerank API fails, the server still returns **`status: 'success'`** with rows where `reranked: false`, plus envelope fields: + +| Field | When set | Meaning | +| ----- | -------- | ------- | +| `degraded` | `true` | Rerank was attempted and failed (or another degradation path fired) | +| `degradation_reason` | string | Human-readable detail for MCP/LLM clients (e.g. `rerank_failed: timeout after 5000ms`) | +| `hybrid_leg_failed` | `'dense'` \| `'sparse'` \| omitted / `null` | Exactly one hybrid search leg failed while the other returned hits | + +Treat **`degraded: true`** as lower confidence even when `status` is `success`. Combine with per-row `reranked`, `preset`, and `use_reranking`. Structured stderr logs may include additional detail. + +`query_documents` propagates the same flags on its nested query payload when applicable. --- @@ -167,7 +177,9 @@ When reranking is requested but the rerank API fails, the server still returns * **Success:** `{ status: 'success', decision_trace, result }` where `result` is either a count payload or a `QueryResponse`-shaped query payload. -**`decision_trace` fields (non-exhaustive):** `cache_hit`, `input_namespace`, `routed_namespace`, `selected_namespace`, `ranked_namespaces`, `suggested_fields`, `suggested_tool`, `selected_tool`, `explanation`, `enrich_urls`. +**`decision_trace` fields (non-exhaustive):** `cache_hit`, `input_namespace`, `routed_namespace`, `selected_namespace`, `ranked_namespaces`, `suggested_fields`, `suggested_tool`, `selected_tool`, `explanation`, `enrich_urls`, `rerank_status` (`success` \| `skipped` \| `failed`). + +When the inner query path runs, `result` includes the same `degraded`, `degradation_reason`, and `hybrid_leg_failed` fields as `query` (see [Rerank and hybrid degradation](#rerank-and-hybrid-degradation)). **Example:** diff --git a/examples/demo-mock-pinecone-client.ts b/examples/demo-mock-pinecone-client.ts new file mode 100644 index 0000000..9592062 --- /dev/null +++ b/examples/demo-mock-pinecone-client.ts @@ -0,0 +1,88 @@ +/** + * Mock PineconeClient for examples: no network; returns canned namespaces and hits. + * Namespace `mailing` matches built-in URL generator demos in the README. + */ + +import { + PineconeClient, + type CountParams, + type CountResult, + type HybridQueryResult, + type KeywordIndexNamespacesResult, + type KeywordSearchParams, + type PineconeMetadataValue, + type QueryParams, + type SearchResult, +} from '@will-cppa/pinecone-read-only-mcp'; + +export const DEMO_NAMESPACE = 'mailing'; + +const demoMetadata: Record = { + document_number: 'D-100', + title: 'Demo document', + chunk_text: 'This is synthetic chunk text for the week-3 examples.', +}; + +const demoHit: SearchResult = { + id: 'demo-hit-1', + content: String(demoMetadata['chunk_text']), + score: 0.95, + metadata: demoMetadata, + reranked: true, +}; + +export class DemoMockPineconeClient extends PineconeClient { + constructor() { + super({ apiKey: '00000000-0000-0000-0000-000000000000' }); + } + + override async listNamespacesWithMetadata(): Promise< + Array<{ namespace: string; recordCount: number; metadata: Record }> + > { + return [ + { + namespace: DEMO_NAMESPACE, + recordCount: 42, + metadata: { + document_number: 'string', + title: 'string', + chunk_text: 'string', + url: 'string', + }, + }, + ]; + } + + override async listNamespacesFromKeywordIndex(): Promise { + return { + ok: true, + namespaces: [{ namespace: DEMO_NAMESPACE, recordCount: 42 }], + }; + } + + override async checkIndexes(): Promise<{ ok: boolean; errors: string[] }> { + return { ok: true, errors: [] }; + } + + override async query(params: QueryParams): Promise { + const reranked = params.useReranking !== false; + const row: SearchResult = { + ...demoHit, + reranked, + metadata: { ...demoMetadata }, + }; + return { + results: [row], + degraded: false, + hybrid_leg_failed: null, + }; + } + + override async count(_params: CountParams): Promise { + return { count: 7, truncated: false }; + } + + override async keywordSearch(_params: KeywordSearchParams): Promise { + return []; + } +} diff --git a/examples/library-embedding-demo.ts b/examples/library-embedding-demo.ts index cfc777a..1c60edc 100644 --- a/examples/library-embedding-demo.ts +++ b/examples/library-embedding-demo.ts @@ -8,10 +8,8 @@ * * **Single process:** `setupServer` registers tools against process-global * singletons (suggest-flow state, namespaces cache, URL registry, active config). - * Do **not** call `setupServer` twice in one process for isolated tenants unless - * you accept shared state — prefer **one server per Node process** or external - * process isolation. (A future release may add an explicit teardown API; see - * CHANGELOG when available.) + * A second `setupServer` throws — call `teardownServer()` first to re-initialize + * (tests). For isolated tenants in production, prefer one server per Node process. */ import { diff --git a/examples/mcp-linked-transport.ts b/examples/mcp-linked-transport.ts new file mode 100644 index 0000000..91d42f8 --- /dev/null +++ b/examples/mcp-linked-transport.ts @@ -0,0 +1,53 @@ +/** + * Minimal in-memory MCP transport pair for examples (no subprocess / stdio). + * Each `send` delivers the JSON-RPC message to the peer's `onmessage` on a microtask. + */ + +import type { JSONRPCMessage } from '@modelcontextprotocol/sdk/types.js'; +import type { Transport } from '@modelcontextprotocol/sdk/shared/transport.js'; + +export function createLinkedTransports(): { + clientTransport: Transport; + serverTransport: Transport; +} { + let closed = false; + const clientTransport: Transport = { + onmessage: undefined, + onclose: undefined, + onerror: undefined, + async start() {}, + async send(message: JSONRPCMessage) { + queueMicrotask(() => { + if (closed) return; + serverTransport.onmessage?.(message); + }); + }, + async close() { + if (closed) return; + closed = true; + clientTransport.onclose?.(); + serverTransport.onclose?.(); + }, + }; + + const serverTransport: Transport = { + onmessage: undefined, + onclose: undefined, + onerror: undefined, + async start() {}, + async send(message: JSONRPCMessage) { + queueMicrotask(() => { + if (closed) return; + clientTransport.onmessage?.(message); + }); + }, + async close() { + if (closed) return; + closed = true; + clientTransport.onclose?.(); + serverTransport.onclose?.(); + }, + }; + + return { clientTransport, serverTransport }; +} diff --git a/examples/tsconfig.json b/examples/tsconfig.json new file mode 100644 index 0000000..0c20ccd --- /dev/null +++ b/examples/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "Node16", + "moduleResolution": "Node16", + "strict": true, + "noEmit": true, + "skipLibCheck": true, + "baseUrl": ".", + "paths": { + "@will-cppa/pinecone-read-only-mcp": ["../dist/server.js"] + } + }, + "include": ["./**/*.ts"] +}