diff --git a/CHANGELOG.md b/CHANGELOG.md
index 089df37..4439f0e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,9 @@ Tagged releases are published to npm from GitHub Actions when a **GitHub Release
 - Vitest **global** coverage thresholds in `vitest.config.ts` (lines 73%, statements 72%, branches 58%, functions 76% — measured baseline minus slack); `npm run test:coverage` exits non-zero when any bucket regresses.
 - `@vitest/coverage-v8` devDependency for coverage reports (`lcov`, `json-summary`, HTML).
 - `docs/` reference set (TOOLS, CONFIGURATION, SECURITY, CONTRIBUTING, CI_CD, FAQ, MIGRATION, RELEASING) and worked examples `examples/suggest-flow-demo.ts`, `examples/guided-query-demo.ts`, `examples/library-embedding-demo.ts`.
+- `teardownServer()` export to reset process-global MCP state (suggest-flow gate, namespaces cache, URL generator registry, active config, shared `PineconeClient`) so `setupServer()` can run again in the same Node process (tests, re-embedding).
+- Namespace trimming for the suggest-flow gate and gated tools (`normalizeNamespace`); use the same trimmed `namespace` for `suggest_query_params` and downstream `query` / `count` / `query_documents`.
+- Successful `query` / `query_documents` / `guided_query` payloads may include `degraded`, `degradation_reason`, and `hybrid_leg_failed` when rerank or a hybrid leg fails but the tool still returns hits; `guided_query` `decision_trace` adds `rerank_status`.
 
 ### Changed
 
@@ -34,7 +37,8 @@ Tagged releases are published to npm from GitHub Actions when a **GitHub Release
 - **Breaking (MCP):** Single hybrid `query` tool with `preset` (`fast` | `detailed` | `full`); removed separate `query_fast` / `query_detailed` tool registrations.
 - `resolveConfig()` throws if the Pinecone API key is missing (after trim); library callers must supply `apiKey` via overrides or set `PINECONE_API_KEY`.
 - `withTimeout` aborts an internal `AbortSignal` on deadline (cooperative cancellation).
-- `PineconeClient`: shared hit-field extraction, safer merge dedup without empty `_id` collisions, metadata sampling skips zero-vector probe when dimension is unknown, `listNamespacesFromKeywordIndex` surfaces errors via `{ ok: false }`.
+- `PineconeClient`: constructor reads index name, rerank model, and default top-k only from `PineconeClientConfig` (not `process.env`); shared hit-field extraction, safer merge dedup without empty `_id` collisions, metadata sampling skips zero-vector probe when dimension is unknown, `listNamespacesFromKeywordIndex` surfaces errors via `{ ok: false }`.
+- `setupServer()` throws if called twice in one process without `teardownServer()` first; README library-embedding section documents the teardown pattern.
 - Metadata filter manual validation accepts primitive arrays for `$in`/`$nin` including numbers (matches Zod).
 - README: deployment model for process-global gate/cache/registry; adjusted feature wording vs pre-1.0 semver.
 - `.npmignore` no longer excludes `dist/` (still shipped via `package.json` `files`).
diff --git a/README.md b/README.md
index c5d846c..5b79fc2 100644
--- a/README.md
+++ b/README.md
@@ -35,7 +35,7 @@ When a tool fails, the MCP tool result sets **`isError: true`**. The `text` cont
 
 Success payloads are unchanged and do **not** wrap `ToolError`. Clients that still expect `{ "status": "error", "message": "..." }` must migrate to the shape above.
 
-For successful `query` / `guided_query` payloads, **rerank/hybrid fidelity** is described in [docs/TOOLS.md](docs/TOOLS.md) (row-level `reranked`, current lack of a top-level `degraded` envelope).
+For successful `query`, `query_documents`, and `guided_query` payloads, **rerank/hybrid fidelity** is described in [docs/TOOLS.md](docs/TOOLS.md#rerank-and-hybrid-degradation) (row-level `reranked`, top-level `degraded` / `degradation_reason`, and optional `hybrid_leg_failed`; `query_documents` propagates the same fields on its nested query payload when applicable).
 
 ## Features
 
@@ -106,9 +106,11 @@ The server uses **process-global** memory for the suggest-flow gate (`suggest_qu
 
 ### Library embedding (`setupServer`)
 
-Treat **`setupServer()` as one logical server per Node process**: it mutates shared module singletons (suggest-flow map, namespaces cache, URL registry, config context, shared `PineconeClient` slot). A second `setupServer()` without a coordinated teardown can leave stale or mixed state for in-flight requests — **spawn a separate process** per isolated instance until an explicit lifecycle API is documented in the changelog.
+Treat **`setupServer()` as one logical server per Node process**: it mutates shared module singletons (suggest-flow map, namespaces cache, URL registry, config context, shared `PineconeClient` slot). A **second** `setupServer()` in the same process **throws** unless you call **`teardownServer()`** first.
 
-Recommended pattern: `resolveConfig` → `setPineconeClient(new PineconeClient(...))` → `await setupServer(config)` → connect one MCP transport. See [examples/library-embedding-demo.ts](examples/library-embedding-demo.ts) and [docs/TOOLS.md](docs/TOOLS.md#suggest-flow-gate).
+Recommended pattern: `resolveConfig` → `setPineconeClient(new PineconeClient(...))` → `await setupServer(config)` → connect one MCP transport. For tests or re-initialization in the same process, call `teardownServer()` then `setupServer(config)` again. For isolated production tenants, prefer **one server per Node process** (or separate OS processes) rather than sharing one embedder across tenants.
+
+Import `setupServer` and `teardownServer` from `@will-cppa/pinecone-read-only-mcp`. See [examples/library-embedding-demo.ts](examples/library-embedding-demo.ts) and [docs/TOOLS.md](docs/TOOLS.md#suggest-flow-gate).
 
 ### Custom URL generators
 
diff --git a/docs/TOOLS.md b/docs/TOOLS.md
index 0785e3d..7cebb83 100644
--- a/docs/TOOLS.md
+++ b/docs/TOOLS.md
@@ -99,7 +99,7 @@ Tools **`query`**, **`count`**, and **`query_documents`** require a prior succes
 | `metadata_filter` | object | no | Metadata filter |
 | `fields` | string[] | no | Pinecone fields to return |
 
-**Success (`QueryResponse`):** `{ status: 'success', mode?: 'query' \| 'query_fast' \| 'query_detailed', query, namespace, metadata_filter?, result_count, results[], fields? }`.
+**Success (`QueryResponse`):** `{ status: 'success', mode?: 'query' \| 'query_fast' \| 'query_detailed', query, namespace, metadata_filter?, result_count, results[], fields?, degraded?, degradation_reason?, hybrid_leg_failed? }`.
 
 Each row: `document_id`, `paper_number` (deprecated alias), `title`, `author`, `url`, `content`, `score`, `reranked`, optional `metadata`.
 
@@ -114,9 +114,19 @@ Each row: `document_id`, `paper_number` (deprecated alias), `title`, `author`, `
 }
 ```
 
-### Rerank fallback and row-level fidelity
+### Rerank and hybrid degradation
 
-When reranking is requested but the rerank API fails, the server still returns **`status: 'success'`** with rows where `reranked: false`. Treat **`reranked: false`** as lower confidence when reranking was expected (`preset` detailed/full). Structured stderr logs include the failure; there is **no** separate top-level `degraded` flag in the current JSON envelope—client UX should combine `preset`, `use_reranking`, and per-row `reranked` (see project issue backlog for envelope-level degradation).
+When reranking is requested but the rerank API fails, the server still returns **`status: 'success'`** with rows where `reranked: false`, plus envelope fields:
+
+| Field | When set | Meaning |
+| ----- | -------- | ------- |
+| `degraded` | `true` | Rerank was attempted and failed (or another degradation path fired) |
+| `degradation_reason` | string | Human-readable detail for MCP/LLM clients (e.g. `rerank_failed: timeout after 5000ms`) |
+| `hybrid_leg_failed` | `'dense'` \| `'sparse'` \| omitted / `null` | Exactly one hybrid search leg failed while the other returned hits |
+
+Treat **`degraded: true`** as lower confidence even when `status` is `success`. Combine with per-row `reranked`, `preset`, and `use_reranking`. Structured stderr logs may include additional detail.
+
+`query_documents` propagates the same flags on its nested query payload when applicable.
 
 ---
 
@@ -167,7 +177,9 @@ When reranking is requested but the rerank API fails, the server still returns *
 
 **Success:** `{ status: 'success', decision_trace, result }` where `result` is either a count payload or a `QueryResponse`-shaped query payload.
 
-**`decision_trace` fields (non-exhaustive):** `cache_hit`, `input_namespace`, `routed_namespace`, `selected_namespace`, `ranked_namespaces`, `suggested_fields`, `suggested_tool`, `selected_tool`, `explanation`, `enrich_urls`.
+**`decision_trace` fields (non-exhaustive):** `cache_hit`, `input_namespace`, `routed_namespace`, `selected_namespace`, `ranked_namespaces`, `suggested_fields`, `suggested_tool`, `selected_tool`, `explanation`, `enrich_urls`, `rerank_status` (`success` \| `skipped` \| `failed`).
+
+When the inner query path runs, `result` includes the same `degraded`, `degradation_reason`, and `hybrid_leg_failed` fields as `query` (see [Rerank and hybrid degradation](#rerank-and-hybrid-degradation)).
 
 **Example:**
 
diff --git a/examples/demo-mock-pinecone-client.ts b/examples/demo-mock-pinecone-client.ts
new file mode 100644
index 0000000..9592062
--- /dev/null
+++ b/examples/demo-mock-pinecone-client.ts
@@ -0,0 +1,88 @@
+/**
+ * Mock PineconeClient for examples: no network; returns canned namespaces and hits.
+ * Namespace `mailing` matches built-in URL generator demos in the README.
+ */
+
+import {
+  PineconeClient,
+  type CountParams,
+  type CountResult,
+  type HybridQueryResult,
+  type KeywordIndexNamespacesResult,
+  type KeywordSearchParams,
+  type PineconeMetadataValue,
+  type QueryParams,
+  type SearchResult,
+} from '@will-cppa/pinecone-read-only-mcp';
+
+export const DEMO_NAMESPACE = 'mailing';
+
+const demoMetadata: Record<string, PineconeMetadataValue> = {
+  document_number: 'D-100',
+  title: 'Demo document',
+  chunk_text: 'This is synthetic chunk text for the week-3 examples.',
+};
+
+const demoHit: SearchResult = {
+  id: 'demo-hit-1',
+  content: String(demoMetadata['chunk_text']),
+  score: 0.95,
+  metadata: demoMetadata,
+  reranked: true,
+};
+
+export class DemoMockPineconeClient extends PineconeClient {
+  constructor() {
+    super({ apiKey: '00000000-0000-0000-0000-000000000000' });
+  }
+
+  override async listNamespacesWithMetadata(): Promise<
+    Array<{ namespace: string; recordCount: number; metadata: Record<string, string> }>
+  > {
+    return [
+      {
+        namespace: DEMO_NAMESPACE,
+        recordCount: 42,
+        metadata: {
+          document_number: 'string',
+          title: 'string',
+          chunk_text: 'string',
+          url: 'string',
+        },
+      },
+    ];
+  }
+
+  override async listNamespacesFromKeywordIndex(): Promise<KeywordIndexNamespacesResult> {
+    return {
+      ok: true,
+      namespaces: [{ namespace: DEMO_NAMESPACE, recordCount: 42 }],
+    };
+  }
+
+  override async checkIndexes(): Promise<{ ok: boolean; errors: string[] }> {
+    return { ok: true, errors: [] };
+  }
+
+  override async query(params: QueryParams): Promise<HybridQueryResult> {
+    const reranked = params.useReranking !== false;
+    const row: SearchResult = {
+      ...demoHit,
+      reranked,
+      metadata: { ...demoMetadata },
+    };
+    return {
+      results: [row],
+      degraded: false,
+      hybrid_leg_failed: null,
+    };
+  }
+
+  override async count(_params: CountParams): Promise<CountResult> {
+    return { count: 7, truncated: false };
+  }
+
+  override async keywordSearch(_params: KeywordSearchParams): Promise<SearchResult[]> {
+    return [];
+  }
+}
diff --git a/examples/library-embedding-demo.ts b/examples/library-embedding-demo.ts
index cfc777a..1c60edc 100644
--- a/examples/library-embedding-demo.ts
+++ b/examples/library-embedding-demo.ts
@@ -8,10 +8,8 @@
  *
  * **Single process:** `setupServer` registers tools against process-global
  * singletons (suggest-flow state, namespaces cache, URL registry, active config).
- * Do **not** call `setupServer` twice in one process for isolated tenants unless
- * you accept shared state — prefer **one server per Node process** or external
- * process isolation. (A future release may add an explicit teardown API; see
- * CHANGELOG when available.)
+ * A second `setupServer` throws — call `teardownServer()` first to re-initialize
+ * (tests). For isolated tenants in production, prefer one server per Node process.
  */
 
 import {
diff --git a/examples/mcp-linked-transport.ts b/examples/mcp-linked-transport.ts
new file mode 100644
index 0000000..91d42f8
--- /dev/null
+++ b/examples/mcp-linked-transport.ts
@@ -0,0 +1,53 @@
+/**
+ * Minimal in-memory MCP transport pair for examples (no subprocess / stdio).
+ * Each `send` delivers the JSON-RPC message to the peer's `onmessage` on a microtask.
+ */
+
+import type { JSONRPCMessage } from '@modelcontextprotocol/sdk/types.js';
+import type { Transport } from '@modelcontextprotocol/sdk/shared/transport.js';
+
+export function createLinkedTransports(): {
+  clientTransport: Transport;
+  serverTransport: Transport;
+} {
+  let closed = false;
+  const clientTransport: Transport = {
+    onmessage: undefined,
+    onclose: undefined,
+    onerror: undefined,
+    async start() {},
+    async send(message: JSONRPCMessage) {
+      queueMicrotask(() => {
+        if (closed) return;
+        serverTransport.onmessage?.(message);
+      });
+    },
+    async close() {
+      if (closed) return;
+      closed = true;
+      clientTransport.onclose?.();
+      serverTransport.onclose?.();
+    },
+  };
+
+  const serverTransport: Transport = {
+    onmessage: undefined,
+    onclose: undefined,
+    onerror: undefined,
+    async start() {},
+    async send(message: JSONRPCMessage) {
+      queueMicrotask(() => {
+        if (closed) return;
+        clientTransport.onmessage?.(message);
+      });
+    },
+    async close() {
+      if (closed) return;
+      closed = true;
+      clientTransport.onclose?.();
+      serverTransport.onclose?.();
+    },
+  };
+
+  return { clientTransport, serverTransport };
+}
diff --git a/examples/tsconfig.json b/examples/tsconfig.json
new file mode 100644
index 0000000..0c20ccd
--- /dev/null
+++ b/examples/tsconfig.json
@@ -0,0 +1,15 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "Node16",
+    "moduleResolution": "Node16",
+    "strict": true,
+    "noEmit": true,
+    "skipLibCheck": true,
+    "baseUrl": ".",
+    "paths": {
+      "@will-cppa/pinecone-read-only-mcp": ["../dist/server.js"]
+    }
+  },
+  "include": ["./**/*.ts"]
+}