Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
# REDIS_TTL_MODELS=86400
# REDIS_KEY_RESPONSES=gomodel:response:
# REDIS_TTL_RESPONSES=3600
# Opt-in when config.yaml has no cache.response.simple block (e.g. env-only deploys). Omit otherwise.
# RESPONSE_CACHE_SIMPLE_ENABLED=true

# Optional: Custom cache directory for local file cache
# GOMODEL_CACHE_DIR=.cache
Expand Down
27 changes: 27 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,35 @@ permissions:
contents: write # Required to create releases

jobs:
semantic-cache-e2e:
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout
uses: actions/checkout@v6

- name: Set up Go
uses: actions/setup-go@v6
with:
go-version: 1.26.1
cache: true

- name: Run semantic cache release scenarios
env:
OUTPUT_DIR: ${{ runner.temp }}/release-semantic-cache
run: tests/e2e/run-release-semantic-cache.sh

- name: Upload semantic cache logs
if: always()
uses: actions/upload-artifact@v4
with:
name: semantic-cache-release-e2e
path: ${{ runner.temp }}/release-semantic-cache

goreleaser:
runs-on: ubuntu-latest
needs: semantic-cache-e2e
steps:
- name: Checkout
uses: actions/checkout@v6
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@ jobs:
steps:
- uses: actions/checkout@v6

- name: Install system dependencies
run: sudo apt-get update && sudo apt-get install -y libsecret-1-dev

- name: Setup Node.js
uses: actions/setup-node@v6
with:
Expand Down
4 changes: 3 additions & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ make tidy # go mod tidy
make clean # Remove bin/
make record-api # Record API responses for contract tests
make swagger # Regenerate Swagger docs
make infra # Docker Compose: Redis, Postgres, MongoDB, Adminer only
make image # Docker Compose: full stack (GOModel + Prometheus)
```

**Single test:** `go test ./internal/providers -v -run TestName`
Expand Down Expand Up @@ -107,7 +109,7 @@ Full reference: `.env.template` and `config/config.yaml`
- **Storage:** `STORAGE_TYPE` (sqlite), `SQLITE_PATH` (data/gomodel.db), `POSTGRES_URL`, `MONGODB_URL`
- **Audit logging:** `LOGGING_ENABLED` (false), `LOGGING_LOG_BODIES` (false), `LOGGING_LOG_HEADERS` (false), `LOGGING_RETENTION_DAYS` (30)
- **Usage tracking:** `USAGE_ENABLED` (true), `ENFORCE_RETURNING_USAGE_DATA` (true), `USAGE_RETENTION_DAYS` (90)
- **Cache:** `CACHE_TYPE` (local), `CACHE_REFRESH_INTERVAL` (3600s), `REDIS_URL`, `REDIS_KEY_MODELS`, `REDIS_TTL_MODELS`, `REDIS_KEY_RESPONSES`, `REDIS_TTL_RESPONSES`
- **Cache:** `CACHE_TYPE` (local), `CACHE_REFRESH_INTERVAL` (3600s), `REDIS_URL`, `REDIS_KEY_MODELS`, `REDIS_TTL_MODELS`. Exact response cache uses `cache.response.simple` in `config.yaml` (optional `enabled`); `REDIS_KEY_RESPONSES`, `REDIS_TTL_RESPONSES`, and `REDIS_URL` apply only when that block exists or when `RESPONSE_CACHE_SIMPLE_ENABLED=true`. Semantic response cache uses `cache.response.semantic` (optional `enabled`); when enabled, `embedder.provider` must name a key in the top-level `providers` map (no default embedder). At runtime that key is resolved against the same env-merged, credential-filtered provider set as routing (not YAML-only), so env-only credentials apply. `vector_store.type` must be set explicitly to one of `qdrant`, `pgvector`, `pinecone`, `weaviate` (each has its own nested config and `SEMANTIC_CACHE_*` env vars). Tuning via `SEMANTIC_CACHE_*` applies when the semantic block exists or `SEMANTIC_CACHE_ENABLED=true`.
- **HTTP client:** `HTTP_TIMEOUT` (600s), `HTTP_RESPONSE_HEADER_TIMEOUT` (600s)
- **Resilience:** Configured via `config/config.yaml` — global `resilience.retry.*` and `resilience.circuit_breaker.*` defaults with optional per-provider overrides under `providers.<name>.resilience.retry.*` and `providers.<name>.resilience.circuit_breaker.*`. Retry defaults: `max_retries` (3), `initial_backoff` (1s), `max_backoff` (30s), `backoff_factor` (2.0), `jitter_factor` (0.1). Circuit breaker defaults: `failure_threshold` (5), `success_threshold` (2), `timeout` (30s)
- **Metrics:** `METRICS_ENABLED` (false), `METRICS_ENDPOINT` (/metrics)
Expand Down
12 changes: 10 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: all build run clean tidy test test-e2e test-integration test-contract test-all lint lint-fix record-api swagger install-tools perf-check perf-bench
.PHONY: all build run clean tidy test test-e2e test-integration test-contract test-all lint lint-fix record-api swagger install-tools perf-check perf-bench infra image

all: build

Expand All @@ -13,7 +13,7 @@ LDFLAGS := -X "gomodel/internal/version.Version=$(VERSION)" \
-X "gomodel/internal/version.Date=$(DATE)"

install-tools:
@command -v golangci-lint > /dev/null 2>&1 || (echo "Installing golangci-lint..." && go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.10.1)
@command -v golangci-lint > /dev/null 2>&1 || (echo "Installing golangci-lint..." && go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.11.4)
@command -v pre-commit > /dev/null 2>&1 || (echo "Installing pre-commit..." && pip install pre-commit==4.5.1)
@echo "All tools are ready"

Expand All @@ -31,6 +31,14 @@ clean:
tidy:
go mod tidy

# Docker Compose: Redis, PostgreSQL, MongoDB, Adminer (no app image build)
infra:
docker compose up -d

# Docker Compose: full stack (GOModel + Prometheus; builds app image when needed)
image:
docker compose --profile app up -d

# Run unit tests only
test:
go test ./internal/... ./config/... -v
Expand Down
22 changes: 15 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,22 @@ Example model identifiers are illustrative and subject to change; consult provid
make run
```

### Docker Compose (Full Stack)
### Docker Compose

Includes GOModel + Redis + PostgreSQL + MongoDB + Adminer + Prometheus:
**Infrastructure only** (Redis, PostgreSQL, MongoDB, Adminer — no image build):

```bash
docker compose up -d
# or: make infra
```

**Full stack** (adds GOModel + Prometheus; builds the app image):

```bash
cp .env.template .env
# Add your API keys to .env
docker compose up -d
docker compose --profile app up -d
# or: make image
```

| Service | URL |
Expand Down Expand Up @@ -204,15 +212,15 @@ Or via environment variables: `REDIS_URL`, `REDIS_KEY_RESPONSES`, `REDIS_TTL_RES

Responses served from this layer carry `X-Cache: HIT (exact)`.

### Layer 2 — Semantic cache *(coming soon)*
### Layer 2 — Semantic cache

Embeds the last user message with `all-MiniLM-L6-v2` (local ONNX, zero external dependency) and performs a KNN vector search. Semantically equivalent queries — e.g. *"What's the capital of France?"* vs *"Which city is France's capital?"* — return the same cached response without an upstream LLM call.
Embeds the last user message via your configured provider’s OpenAI-compatible `/v1/embeddings` API (`cache.response.semantic.embedder.provider` must name a key in the top-level `providers` map) and performs a KNN vector search. Semantically equivalent queries — e.g. *"What's the capital of France?"* vs *"Which city is France's capital?"* — can return the same cached response without an upstream LLM call.

Expected hit rates: ~60–70% in high-repetition workloads vs. ~18% for exact-match alone.

Responses served from this layer carry `X-Cache: HIT (semantic)`.

Supported vector backends: `sqlite-vec` (default, embedded), `pgvector`, `qdrant`.
Supported vector backends: `qdrant`, `pgvector`, `pinecone`, `weaviate` (set `cache.response.semantic.vector_store.type` and the matching nested block).

Both cache layers run **after** guardrail/execution-plan patching so they always see the final prompt. Use `Cache-Control: no-cache` or `Cache-Control: no-store` to bypass caching per-request.

Expand All @@ -234,12 +242,12 @@ See [DEVELOPMENT.md](DEVELOPMENT.md) for testing, linting, and pre-commit setup.
| Administrative endpoints | ✅ | Admin API and dashboard ship with usage, audit, and model views. |
| Guardrails | ✅ | The guardrails pipeline is implemented and can be enabled from config. |
| System prompt guardrails | ✅ | `inject`, `override`, and `decorator` modes are supported. |
| Semantic response cache | ✅ | Exact-match Redis plus optional semantic layer (API embeddings, `qdrant` / `pgvector` / `pinecone` / `weaviate`) — see [ADR-0006](docs/adr/0006-semantic-response-cache.md). |

## In Progress

| Area | Status | Notes |
| ---- | :----: | ----- |
| Semantic response cache | 🚧 | Exact-match Redis cache is live. Semantic (vector KNN) layer with local `all-MiniLM-L6-v2` embedder is in progress — see [ADR-0006](docs/adr/0006-semantic-response-cache.md). |
| Billing management | 🚧 | Usage and pricing primitives exist, but billing workflows are not complete. |
| Budget management | 🚧 | Gateway-level budget enforcement and policy controls are not implemented yet. |
| Guardrails depth | 🚧 | The system prompt guardrail is available today; broader guardrail types are still to come. |
Expand Down
124 changes: 107 additions & 17 deletions config/cache_validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,17 @@ func TestValidateCacheConfig_RedisOnly(t *testing.T) {
}
}

func boolPtr(b bool) *bool { return &b }

func TestValidateCacheConfig_SemanticDisabledIgnoresInvalidVectorStore(t *testing.T) {
cfg := &CacheConfig{
Model: ModelCacheConfig{
Local: &LocalCacheConfig{CacheDir: ".cache"},
Redis: nil,
},
Response: ResponseCacheConfig{
Semantic: SemanticCacheConfig{
Enabled: false,
Semantic: &SemanticCacheConfig{
Enabled: boolPtr(false),
VectorStore: VectorStoreConfig{
Type: "qdrant",
// Intentionally missing URL — valid because semantic cache is off.
Expand All @@ -107,10 +109,11 @@ func TestValidateCacheConfig_SemanticEnabledRequiresQdrantURL(t *testing.T) {
Redis: nil,
},
Response: ResponseCacheConfig{
Semantic: SemanticCacheConfig{
Enabled: true,
Semantic: &SemanticCacheConfig{
Enabled: boolPtr(true),
SimilarityThreshold: 0.9,
TTL: 3600,
TTL: intPtr(3600),
Embedder: EmbedderConfig{Provider: "openai"},
VectorStore: VectorStoreConfig{
Type: "qdrant",
},
Expand All @@ -122,20 +125,47 @@ func TestValidateCacheConfig_SemanticEnabledRequiresQdrantURL(t *testing.T) {
}
}

func TestValidateCacheConfig_SemanticEnabledRequiresQdrantCollection(t *testing.T) {
cfg := &CacheConfig{
Model: ModelCacheConfig{
Local: &LocalCacheConfig{CacheDir: ".cache"},
Redis: nil,
},
Response: ResponseCacheConfig{
Semantic: &SemanticCacheConfig{
Enabled: boolPtr(true),
SimilarityThreshold: 0.9,
TTL: intPtr(3600),
Embedder: EmbedderConfig{Provider: "openai"},
VectorStore: VectorStoreConfig{
Type: "qdrant",
Qdrant: QdrantConfig{URL: "http://localhost:6333"},
},
},
},
}
if err := ValidateCacheConfig(cfg); err == nil {
t.Fatal("expected error when qdrant collection empty")
}
}

func TestValidateCacheConfig_SemanticSimilarityThresholdInvalid(t *testing.T) {
base := CacheConfig{
Model: ModelCacheConfig{
Local: &LocalCacheConfig{CacheDir: ".cache"},
Redis: nil,
},
Response: ResponseCacheConfig{
Semantic: SemanticCacheConfig{
Enabled: true,
TTL: 3600,
Semantic: &SemanticCacheConfig{
Enabled: boolPtr(true),
TTL: intPtr(3600),
Embedder: EmbedderConfig{Provider: "openai"},
VectorStore: VectorStoreConfig{
Type: "sqlite-vec",
SQLiteVec: SQLiteVecConfig{
Path: ".cache/semantic.db",
Type: "pgvector",
PGVector: PGVectorConfig{
URL: "postgres://localhost/test",
Table: "gomodel_semantic_cache",
Dimension: 1536,
},
},
},
Expand Down Expand Up @@ -165,21 +195,81 @@ func TestValidateCacheConfig_SemanticSimilarityThresholdInvalid(t *testing.T) {
}
}

func TestValidateCacheConfig_SemanticRequiresEmbedderProvider(t *testing.T) {
cfg := &CacheConfig{
Model: ModelCacheConfig{
Local: &LocalCacheConfig{CacheDir: ".cache"},
Redis: nil,
},
Response: ResponseCacheConfig{
Semantic: &SemanticCacheConfig{
Enabled: boolPtr(true),
SimilarityThreshold: 0.9,
TTL: intPtr(3600),
VectorStore: VectorStoreConfig{
Type: "pgvector",
PGVector: PGVectorConfig{
URL: "postgres://localhost/test",
Dimension: 768,
},
},
},
},
}
err := ValidateCacheConfig(cfg)
if err == nil {
t.Fatal("expected error when semantic enabled without embedder provider")
}
if !strings.Contains(err.Error(), "embedder.provider") {
t.Fatalf("unexpected error: %v", err)
}
}

func TestValidateCacheConfig_SemanticRejectsLocalEmbedder(t *testing.T) {
cfg := &CacheConfig{
Model: ModelCacheConfig{
Local: &LocalCacheConfig{CacheDir: ".cache"},
Redis: nil,
},
Response: ResponseCacheConfig{
Semantic: &SemanticCacheConfig{
Enabled: boolPtr(true),
SimilarityThreshold: 0.9,
TTL: intPtr(3600),
Embedder: EmbedderConfig{Provider: "local"},
VectorStore: VectorStoreConfig{
Type: "pgvector",
PGVector: PGVectorConfig{
URL: "postgres://localhost/test",
Dimension: 768,
},
},
},
},
}
err := ValidateCacheConfig(cfg)
if err == nil {
t.Fatal("expected error for local embedder provider")
}
}

func TestValidateCacheConfig_SemanticNegativeTTL(t *testing.T) {
cfg := &CacheConfig{
Model: ModelCacheConfig{
Local: &LocalCacheConfig{CacheDir: ".cache"},
Redis: nil,
},
Response: ResponseCacheConfig{
Semantic: SemanticCacheConfig{
Enabled: true,
Semantic: &SemanticCacheConfig{
Enabled: boolPtr(true),
SimilarityThreshold: 0.9,
TTL: -1,
TTL: intPtr(-1),
Embedder: EmbedderConfig{Provider: "openai"},
VectorStore: VectorStoreConfig{
Type: "sqlite-vec",
SQLiteVec: SQLiteVecConfig{
Path: ".cache/semantic.db",
Type: "pgvector",
PGVector: PGVectorConfig{
URL: "postgres://localhost/test",
Dimension: 768,
},
},
},
Expand Down
31 changes: 31 additions & 0 deletions config/config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,37 @@ cache:
# url: "redis://localhost:6379"
# key: "gomodel:models"
# ttl: 86400 # 24 hours in seconds
# response:
# simple: # omit the whole `simple` key to disable exact-match caching (unless RESPONSE_CACHE_SIMPLE_ENABLED=true)
# enabled: true # default when `simple` is present; set false to disable while keeping the block
# redis:
# url: "redis://localhost:6379"
# key: "gomodel:response:"
# ttl: 3600
# semantic: # omit the whole `semantic` key to disable semantic caching (unless SEMANTIC_CACHE_ENABLED=true)
# enabled: true
# embedder:
# provider: openai # must match a key under `providers` (openai, gemini, groq, …)
# model: text-embedding-3-small # optional; provider-specific defaults apply if omitted
# vector_store:
# type: qdrant # required: qdrant | pgvector | pinecone | weaviate (pick one block below)
# qdrant:
# url: "http://localhost:6333"
# collection: "gomodel_semantic"
# api_key: "" # optional for local Qdrant
# # pgvector:
# # url: "postgres://user:pass@localhost:5432/gomodel"
# # table: "gomodel_semantic_cache" # optional; default if omitted
# # dimension: 1536 # must match embedding model output
# # pinecone:
# # host: "https://your-index.svc.region.pinecone.io" # data-plane host
# # api_key: "..."
# # namespace: "" # optional
# # dimension: 1536 # must match your Pinecone index
# # weaviate:
# # url: "http://localhost:8080"
# # class: "GomodelSemanticCache" # PascalCase recommended (GraphQL)
# # api_key: "" # optional

storage:
type: "sqlite" # "sqlite", "postgresql", or "mongodb"
Expand Down
Loading