Skip to content

Commit 164b132

Browse files
author
StackMemory Bot (CLI)
committed
feat: multi-provider routing with sensitive content guard
Add complexity-based routing to distribute tasks across providers (Anthropic, Cerebras, DeepInfra, OpenRouter) based on task difficulty. Sensitive content (API keys, secrets, PII) is automatically detected and restricted to approved providers only. - Add sensitive-guard: pre-routing check blocks secrets from third-party LLMs - Add complexity-scorer: 0-1 scoring with regression test corpus (26 pinned prompts) - Add provider-pricing: cost table for routing decisions - Add Anthropic Batch API client with 50% discount async processing - Add Cerebras + DeepInfra adapter stubs (OpenAI-compatible) - Add MCP provider handlers (delegate_to_model, list_providers, provider_status) - Refactor provider-adapter: remove unused Gemini types, deduplicate headers - Fix: preference path no longer bypasses sensitive guard in model-router - Fix: timer leak in batch-client waitForCompletion (unref) - Bump version to 1.2.0
1 parent d42be65 commit 164b132

29 files changed

Lines changed: 4413 additions & 470 deletions

package.json

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@stackmemoryai/stackmemory",
3-
"version": "1.1.2",
3+
"version": "1.2.0",
44
"description": "Project-scoped memory for AI coding tools. Durable context across sessions with MCP integration, frames, smart retrieval, Claude Code skills, and automatic hooks.",
55
"engines": {
66
"node": ">=20.0.0",
@@ -93,6 +93,10 @@
9393
"format": "prettier --write src/**/*.ts scripts/**/*.ts",
9494
"test": "vitest",
9595
"test:ui": "vitest --ui",
96+
"test:unit": "vitest run --project unit",
97+
"test:integration": "vitest run --project integration",
98+
"test:live": "vitest run --project live",
99+
"test:all": "vitest run",
96100
"test:run": "vitest run",
97101
"test:pre-publish": "./scripts/test-pre-publish-quick.sh",
98102
"test:pre-commit": "vitest related --run --reporter=dot --silent --bail=1",

scripts/test-pre-publish-quick.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ log_success "Package structure valid"
5353

5454
# Core tests + search benchmark (100-frame smoke)
5555
log_info "Running tests..."
56-
npx vitest run --reporter=dot --bail=3 --retry 1 2>&1 | tail -5
56+
npm run test:all -- --reporter=dot --bail=3 --retry 1 2>&1 | tail -5
5757
if [ ${PIPESTATUS[0]} -ne 0 ]; then
5858
log_error "Tests failed"
5959
fi

src/core/config/feature-flags.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ export interface FeatureFlags {
1515
aiSummaries: boolean;
1616
skills: boolean;
1717
ralph: boolean;
18+
multiProvider: boolean;
1819
}
1920

2021
/**
@@ -62,6 +63,11 @@ export function isFeatureEnabled(feature: keyof FeatureFlags): boolean {
6263
// Ralph enabled by default in development (unless explicitly disabled)
6364
// For npm package users, must be explicitly enabled
6465
return process.env['STACKMEMORY_RALPH'] !== 'false';
66+
case 'multiProvider':
67+
return (
68+
process.env['STACKMEMORY_MULTI_PROVIDER'] === 'true' ||
69+
process.env['STACKMEMORY_MULTI_PROVIDER'] === '1'
70+
);
6571
default:
6672
return false;
6773
}
@@ -78,6 +84,7 @@ export function getFeatureFlags(): FeatureFlags {
7884
aiSummaries: isFeatureEnabled('aiSummaries'),
7985
skills: isFeatureEnabled('skills'),
8086
ralph: isFeatureEnabled('ralph'),
87+
multiProvider: isFeatureEnabled('multiProvider'),
8188
};
8289
}
8390

@@ -105,5 +112,8 @@ export function logFeatureStatus(): void {
105112
console.log(
106113
` Ralph: ${flags.ralph ? 'enabled' : 'disabled (set STACKMEMORY_RALPH=true)'}`
107114
);
115+
console.log(
116+
` MultiProvider: ${flags.multiProvider ? 'enabled' : 'disabled (set STACKMEMORY_MULTI_PROVIDER=true)'}`
117+
);
108118
}
109119
}
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
2+
import { CerebrasAdapter } from '../cerebras-adapter.js';
3+
4+
describe('CerebrasAdapter', () => {
5+
let adapter: CerebrasAdapter;
6+
7+
beforeEach(() => {
8+
adapter = new CerebrasAdapter({
9+
apiKey: 'test-key',
10+
baseUrl: 'https://api.cerebras.ai/v1',
11+
});
12+
});
13+
14+
it('should have correct id and name', () => {
15+
expect(adapter.id).toBe('cerebras');
16+
expect(adapter.name).toBe('Cerebras');
17+
});
18+
19+
it('should not support any provider extensions', () => {
20+
expect(adapter.supportsExtension('claude')).toBe(false);
21+
expect(adapter.supportsExtension('gpt')).toBe(false);
22+
expect(adapter.supportsExtension('gemini')).toBe(false);
23+
});
24+
25+
it('should list Cerebras models', async () => {
26+
const models = await adapter.listModels();
27+
expect(models).toContain('llama-4-scout-17b-16e-instruct');
28+
expect(models.length).toBeGreaterThan(0);
29+
});
30+
31+
it('should use Cerebras base URL by default', () => {
32+
const defaultAdapter = new CerebrasAdapter({ apiKey: 'key' });
33+
// Validate by attempting a completion with a mock — the URL is set internally
34+
expect(defaultAdapter).toBeDefined();
35+
});
36+
37+
it('should format request as OpenAI-compatible', async () => {
38+
const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
39+
new Response(
40+
JSON.stringify({
41+
choices: [{ message: { content: 'hello' }, finish_reason: 'stop' }],
42+
usage: { prompt_tokens: 10, completion_tokens: 5 },
43+
}),
44+
{ status: 200 }
45+
)
46+
);
47+
48+
const result = await adapter.complete([{ role: 'user', content: 'test' }], {
49+
model: 'llama-4-scout-17b-16e-instruct',
50+
maxTokens: 100,
51+
});
52+
53+
expect(result.content[0]).toMatchObject({ type: 'text', text: 'hello' });
54+
expect(result.usage.inputTokens).toBe(10);
55+
expect(result.usage.outputTokens).toBe(5);
56+
57+
const [url, opts] = fetchSpy.mock.calls[0];
58+
expect(url).toContain('cerebras.ai');
59+
expect((opts as any).headers?.Authorization).toBe('Bearer test-key');
60+
61+
fetchSpy.mockRestore();
62+
});
63+
64+
it('should throw on API error during complete', async () => {
65+
const fetchSpy = vi
66+
.spyOn(globalThis, 'fetch')
67+
.mockResolvedValueOnce(
68+
new Response('Unauthorized', {
69+
status: 401,
70+
statusText: 'Unauthorized',
71+
})
72+
);
73+
74+
await expect(
75+
adapter.complete([{ role: 'user', content: 'test' }], {
76+
model: 'llama3.1-8b',
77+
maxTokens: 100,
78+
})
79+
).rejects.toThrow('GPT API error: 401');
80+
81+
fetchSpy.mockRestore();
82+
});
83+
});
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import { describe, it, expect, vi, beforeEach } from 'vitest';
2+
import { DeepInfraAdapter } from '../deepinfra-adapter.js';
3+
4+
describe('DeepInfraAdapter', () => {
5+
let adapter: DeepInfraAdapter;
6+
7+
beforeEach(() => {
8+
adapter = new DeepInfraAdapter({
9+
apiKey: 'test-key',
10+
baseUrl: 'https://api.deepinfra.com/v1/openai',
11+
});
12+
});
13+
14+
it('should have correct id and name', () => {
15+
expect(adapter.id).toBe('deepinfra');
16+
expect(adapter.name).toBe('DeepInfra');
17+
});
18+
19+
it('should not support any provider extensions', () => {
20+
expect(adapter.supportsExtension('claude')).toBe(false);
21+
expect(adapter.supportsExtension('gpt')).toBe(false);
22+
});
23+
24+
it('should list DeepInfra models', async () => {
25+
const models = await adapter.listModels();
26+
expect(models).toContain('THUDM/glm-4-9b-chat');
27+
expect(models.length).toBeGreaterThan(0);
28+
});
29+
30+
it('should use DeepInfra base URL by default', () => {
31+
const defaultAdapter = new DeepInfraAdapter({ apiKey: 'key' });
32+
expect(defaultAdapter).toBeDefined();
33+
});
34+
35+
it('should format request correctly for DeepInfra endpoint', async () => {
36+
const fetchSpy = vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce(
37+
new Response(
38+
JSON.stringify({
39+
choices: [
40+
{
41+
message: { content: 'response from glm' },
42+
finish_reason: 'stop',
43+
},
44+
],
45+
usage: { prompt_tokens: 20, completion_tokens: 15 },
46+
}),
47+
{ status: 200 }
48+
)
49+
);
50+
51+
const result = await adapter.complete(
52+
[{ role: 'user', content: 'hello' }],
53+
{ model: 'THUDM/glm-4-9b-chat', maxTokens: 256 }
54+
);
55+
56+
expect(result.content[0]).toMatchObject({
57+
type: 'text',
58+
text: 'response from glm',
59+
});
60+
expect(result.usage).toEqual({ inputTokens: 20, outputTokens: 15 });
61+
62+
const [url] = fetchSpy.mock.calls[0];
63+
expect(url).toContain('deepinfra.com');
64+
65+
fetchSpy.mockRestore();
66+
});
67+
68+
it('should throw on API error during complete', async () => {
69+
const fetchSpy = vi
70+
.spyOn(globalThis, 'fetch')
71+
.mockResolvedValueOnce(
72+
new Response('rate limited', {
73+
status: 429,
74+
statusText: 'Too Many Requests',
75+
})
76+
);
77+
78+
await expect(
79+
adapter.complete([{ role: 'user', content: 'test' }], {
80+
model: 'THUDM/glm-4-9b-chat',
81+
maxTokens: 100,
82+
})
83+
).rejects.toThrow('GPT API error: 429');
84+
85+
fetchSpy.mockRestore();
86+
});
87+
});
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/**
2+
* Live integration test for OpenRouter via GPTAdapter.
3+
* Skipped when OPENROUTER_API_KEY is not set.
4+
*/
5+
import { describe, it, expect } from 'vitest';
6+
import { GPTAdapter } from '../provider-adapter.js';
7+
8+
const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY;
9+
const MODEL = 'meta-llama/llama-4-scout';
10+
11+
describe.skipIf(!OPENROUTER_API_KEY)('OpenRouter GPTAdapter (live)', () => {
12+
let adapter: GPTAdapter;
13+
14+
adapter = new GPTAdapter({
15+
apiKey: OPENROUTER_API_KEY!,
16+
baseUrl: 'https://openrouter.ai/api',
17+
});
18+
19+
it('should complete a prompt and return text content', async () => {
20+
const result = await adapter.complete(
21+
[{ role: 'user', content: 'Reply with exactly: hello world' }],
22+
{ model: MODEL, maxTokens: 64, temperature: 0 }
23+
);
24+
25+
const text = result.content
26+
.filter((c) => c.type === 'text')
27+
.map((c) => (c as { type: 'text'; text: string }).text)
28+
.join('');
29+
30+
expect(text.length).toBeGreaterThan(0);
31+
expect(result.stopReason).toBeDefined();
32+
}, 30_000);
33+
34+
it('should return usage with inputTokens > 0', async () => {
35+
const result = await adapter.complete(
36+
[{ role: 'user', content: 'Say hi' }],
37+
{ model: MODEL, maxTokens: 16, temperature: 0 }
38+
);
39+
40+
expect(result.usage.inputTokens).toBeGreaterThan(0);
41+
expect(result.usage.outputTokens).toBeGreaterThan(0);
42+
}, 30_000);
43+
44+
it('should validate connection successfully', async () => {
45+
const ok = await adapter.validateConnection();
46+
expect(ok).toBe(true);
47+
}, 30_000);
48+
});

0 commit comments

Comments
 (0)