ENTERPILOT
diff --git a/‎.env.template‎
Lines changed: 5 additions & 6 deletions b/‎.env.template‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎2026-03-16_ARCHITECTURE_SNAPSHOT.md‎
Lines changed: 6 additions & 6 deletions b/‎2026-03-16_ARCHITECTURE_SNAPSHOT.md‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎AGENTS.md‎
Lines changed: 2 additions & 2 deletions b/‎AGENTS.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎CLAUDE.md‎
Lines changed: 3 additions & 3 deletions b/‎CLAUDE.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎GETTING_STARTED.md‎
Lines changed: 26 additions & 32 deletions b/‎GETTING_STARTED.md‎
Lines changed: 26 additions & 32 deletions
diff --git a/‎METRICS_CONFIGURATION.md‎
Lines changed: 2 additions & 3 deletions b/‎METRICS_CONFIGURATION.md‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎Makefile‎
Lines changed: 16 additions & 3 deletions b/‎Makefile‎
Lines changed: 16 additions & 3 deletions
@@ -99,13 +99,12 @@
 # MODEL_LIST_URL=https://raw.githubusercontent.com/ENTERPILOT/ai-model-list/refs/heads/main/models.min.json
 
 # Model Access Configuration
-# Process-wide default for concrete provider models when no persisted override exists and model overrides are enabled (default: true)
+# Process-wide default for provider models when no persisted override exists (default: true)
 # Set to false to keep models unavailable until a model override allows one or more user paths.
 # MODELS_ENABLED_BY_DEFAULT=true
-# Persisted model overrides are opt-in (default: false).
-# Set true to load/enforce model overrides and enable dashboard editing.
-# MODEL_OVERRIDES_ENABLED=false
-# Hide concrete provider models from GET /v1/models and expose only enabled aliases (default: false).
+# Enable/disable persisted model overrides and dashboard editing (default: true).
+# MODEL_OVERRIDES_ENABLED=true
+# Hide provider models from GET /v1/models and expose only enabled aliases (default: false).
 # KEEP_ONLY_ALIASES_AT_MODELS_ENDPOINT=false
 
 # Fallback & Workflow Configuration
@@ -259,7 +258,7 @@
 # OPENROUTER_API_KEY=sk-or-...
 # OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
 # OPENROUTER_SITE_URL=https://gomodel.enterpilot.io
-# OPENROUTER_APP_NAME=GOModel
+# OPENROUTER_APP_NAME=GoModel
 
 # Azure OpenAI
 # AZURE_API_KEY=...
 
@@ -1,4 +1,4 @@
-# GOModel Architecture Snapshot
+# GoModel Architecture Snapshot
 
 This document is a point-in-time architecture snapshot based on the code and runtime wiring present on March 16, 2026.
 
@@ -153,11 +153,11 @@ Echo + Handler"]
 
 ## 2. Request-Scoped Data Objects
 
-| Object | Created by | Contains | Consumed by |
-| --- | --- | --- | --- |
-| `RequestSnapshot` | `RequestSnapshotCapture()` | Immutable ingress transport data: method, path, route params, query params, headers, content type, captured body bytes, `BodyNotCaptured`, request id, trace metadata | `DeriveWhiteBoxPrompt`, audit logging, passthrough semantic enrichers, any later logic that needs raw ingress fidelity |
-| `WhiteBoxPrompt` | `core.DeriveWhiteBoxPrompt(snapshot)` | Best-effort semantics: route type, operation type, route hints, stream intent, JSON parsed flag, cached typed request objects, cached route metadata | workflow resolution, canonical request decoding, passthrough/file/batch helpers |
-| `Workflow` | `WorkflowResolutionWithResolver(...)` or `ensureTranslatedRequestWorkflow(...)` | Control-plane decision: endpoint descriptor, execution mode, capabilities, provider type, resolved model selector, passthrough info | response cache, translated handlers, passthrough handlers, audit-log enrichment |
+| Object            | Created by                                                                      | Contains                                                                                                                                                              | Consumed by                                                                                                            |
+| ----------------- | ------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------- |
+| `RequestSnapshot` | `RequestSnapshotCapture()`                                                      | Immutable ingress transport data: method, path, route params, query params, headers, content type, captured body bytes, `BodyNotCaptured`, request id, trace metadata | `DeriveWhiteBoxPrompt`, audit logging, passthrough semantic enrichers, any later logic that needs raw ingress fidelity |
+| `WhiteBoxPrompt`  | `core.DeriveWhiteBoxPrompt(snapshot)`                                           | Best-effort semantics: route type, operation type, route hints, stream intent, JSON parsed flag, cached typed request objects, cached route metadata                  | workflow resolution, canonical request decoding, passthrough/file/batch helpers                                        |
+| `Workflow`        | `WorkflowResolutionWithResolver(...)` or `ensureTranslatedRequestWorkflow(...)` | Control-plane decision: endpoint descriptor, execution mode, capabilities, provider type, resolved model selector, passthrough info                                   | response cache, translated handlers, passthrough handlers, audit-log enrichment                                        |
 
 Important constraints:
 
 
@@ -8,8 +8,8 @@ GoModel is a high-performance AI gateway in Go that routes requests to multiple
 
 Follow Postel's Law: be conservative in what you send, liberal in what you accept.
 
-- GOModel accepts client requests generously (e.g. `max_tokens` for any model) and adapts them to each provider's specific requirements before forwarding (e.g. translating `max_tokens` → `max_completion_tokens` for OpenAI reasoning models).
-- GOModel accepts providers' responses liberally and passes them to the user in a conservative OpenAI-compatible shape.
+- GoModel accepts client requests generously (e.g. `max_tokens` for any model) and adapts them to each provider's specific requirements before forwarding (e.g. translating `max_tokens` → `max_completion_tokens` for OpenAI reasoning models).
+- GoModel accepts providers' responses liberally and passes them to the user in a conservative OpenAI-compatible shape.
 
 [The Twelve-Factor App](https://12factor.net/).
 
 
@@ -4,10 +4,10 @@ Guidance for AI models (like Claude) working with this codebase.
 
 ## Project Overview
 
-**GOModel** is a high-performance AI gateway in Go that routes requests to multiple AI model providers (OpenAI, Anthropic, Gemini, Groq, xAI, Oracle, Ollama). LiteLLM killer.
+**GoModel** is a high-performance AI gateway in Go that routes requests to multiple AI model providers (OpenAI, Anthropic, Gemini, Groq, xAI, Oracle, Ollama). LiteLLM killer.
 
 **Go:** 1.26.2
-**Repo:** https://github.com/ENTERPILOT/GOModel
+**Repo:** https://github.com/ENTERPILOT/GoModel
 
 - **Stage:** Development - backward compatibility is not a concern
 - **Design philosophy:**
@@ -36,7 +36,7 @@ make clean             # Remove bin/
 make record-api        # Record API responses for contract tests
 make swagger           # Regenerate Swagger docs
 make infra             # Docker Compose: Redis, Postgres, MongoDB, Adminer only
-make image             # Docker Compose: full stack (GOModel + Prometheus)
+make image             # Docker Compose: full stack (GoModel + Prometheus)
 ```
 
 **Single test:** `go test ./internal/providers -v -run TestName`
 
@@ -102,26 +102,24 @@ providers:
     api_key: ${ANTHROPIC_API_KEY}
     resilience:
       retry:
-        max_retries: 5          # Anthropic supports long requests — allow more retries
+        max_retries: 5 # Anthropic supports long requests — allow more retries
   ollama:
     type: ollama
     base_url: ${OLLAMA_BASE_URL:-http://localhost:11434/v1}
     resilience:
       circuit_breaker:
-        failure_threshold: 10   # local service — tolerate more transient failures
+        failure_threshold: 10 # local service — tolerate more transient failures
         timeout: 5s
 ```
 
 **Effective resilience per provider:**
 
-
 | Provider  | max_retries      | failure_threshold | cb timeout        |
 | --------- | ---------------- | ----------------- | ----------------- |
 | openai    | 2 (global)       | 3 (global)        | 15s (global)      |
 | anthropic | **5** (override) | 3 (global)        | 15s (global)      |
 | ollama    | 2 (global)       | **10** (override) | **5s** (override) |
 
-
 Only fields that are explicitly listed under a provider's `resilience:` block are overridden. Everything else silently inherits from the global section.
 
 ---
@@ -163,7 +161,6 @@ GROQ_API_KEY=gsk_...
 
 All resilience settings can be overridden at runtime via env vars. Env vars always beat both code defaults and YAML values.
 
-
 | Variable                            | Type     | Default   | Description                                                                                                                                                                                                                                                 |
 | ----------------------------------- | -------- | --------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `RETRY_MAX_RETRIES`                 | int      | `3`       | Maximum retry attempts per request                                                                                                                                                                                                                          |
@@ -174,36 +171,33 @@ All resilience settings can be overridden at runtime via env vars. Env vars alwa
 | `CIRCUIT_BREAKER_FAILURE_THRESHOLD` | int      | `5`       | Consecutive failures before opening                                                                                                                                                                                                                         |
 | `CIRCUIT_BREAKER_SUCCESS_THRESHOLD` | int      | `2`       | Consecutive successes to close again                                                                                                                                                                                                                        |
 | `CIRCUIT_BREAKER_TIMEOUT`           | duration | `30s`     | How long the circuit stays open                                                                                                                                                                                                                             |
-| `LOG_FORMAT`                        | string   | *(unset)* | Auto-detects based on environment: colorized text on a TTY, JSON otherwise. Set to `text` to force human-readable output (no colors if not a TTY), or `json` to force structured JSON even on a TTY (recommended for production, CloudWatch, Datadog, GCP). |
-| `LOG_LEVEL`                         | string   | `info`    | Minimum runtime log level. Supported values are `debug`, `info`, `warn`, and `error`. Common aliases such as `dbg`, `inf`, `warning`, and `err` are also accepted.                                                                                      |
-
+| `LOG_FORMAT`                        | string   | _(unset)_ | Auto-detects based on environment: colorized text on a TTY, JSON otherwise. Set to `text` to force human-readable output (no colors if not a TTY), or `json` to force structured JSON even on a TTY (recommended for production, CloudWatch, Datadog, GCP). |
+| `LOG_LEVEL`                         | string   | `info`    | Minimum runtime log level. Supported values are `debug`, `info`, `warn`, and `error`. Common aliases such as `dbg`, `inf`, `warning`, and `err` are also accepted.                                                                                          |
 
 Provider credentials:
 
-
-| Variable             | Provider                                      |
-| -------------------- | --------------------------------------------- |
-| `OPENAI_API_KEY`     | OpenAI                                        |
-| `OPENAI_BASE_URL`    | OpenAI (custom endpoint)                      |
-| `ANTHROPIC_API_KEY`  | Anthropic                                     |
-| `ANTHROPIC_BASE_URL` | Anthropic (custom endpoint)                   |
-| `GEMINI_API_KEY`     | Google Gemini                                 |
-| `GEMINI_BASE_URL`    | Gemini (custom endpoint)                      |
-| `OPENROUTER_API_KEY` | OpenRouter (default base URL: `https://openrouter.ai/api/v1`) |
-| `OPENROUTER_BASE_URL` | OpenRouter (custom endpoint override)        |
+| Variable              | Provider                                                                       |
+| --------------------- | ------------------------------------------------------------------------------ |
+| `OPENAI_API_KEY`      | OpenAI                                                                         |
+| `OPENAI_BASE_URL`     | OpenAI (custom endpoint)                                                       |
+| `ANTHROPIC_API_KEY`   | Anthropic                                                                      |
+| `ANTHROPIC_BASE_URL`  | Anthropic (custom endpoint)                                                    |
+| `GEMINI_API_KEY`      | Google Gemini                                                                  |
+| `GEMINI_BASE_URL`     | Gemini (custom endpoint)                                                       |
+| `OPENROUTER_API_KEY`  | OpenRouter (default base URL: `https://openrouter.ai/api/v1`)                  |
+| `OPENROUTER_BASE_URL` | OpenRouter (custom endpoint override)                                          |
 | `OPENROUTER_SITE_URL` | OpenRouter attribution URL override (default: `https://gomodel.enterpilot.io`) |
-| `OPENROUTER_APP_NAME` | OpenRouter attribution title override (default: `GOModel`) |
-| `XAI_API_KEY`        | xAI / Grok                                    |
-| `XAI_BASE_URL`       | xAI (custom endpoint)                         |
-| `GROQ_API_KEY`       | Groq                                          |
-| `GROQ_BASE_URL`      | Groq (custom endpoint)                        |
-| `AZURE_API_KEY`      | Azure OpenAI                                  |
-| `AZURE_BASE_URL`     | Azure OpenAI deployment base URL              |
-| `AZURE_API_VERSION`  | Azure OpenAI API version override (default: `2024-10-21`) |
-| `ORACLE_API_KEY`     | Oracle                                        |
-| `ORACLE_BASE_URL`    | Oracle OpenAI-compatible base URL             |
-| `OLLAMA_BASE_URL`    | Ollama (default: `http://localhost:11434/v1`) |
-
+| `OPENROUTER_APP_NAME` | OpenRouter attribution title override (default: `GoModel`)                     |
+| `XAI_API_KEY`         | xAI / Grok                                                                     |
+| `XAI_BASE_URL`        | xAI (custom endpoint)                                                          |
+| `GROQ_API_KEY`        | Groq                                                                           |
+| `GROQ_BASE_URL`       | Groq (custom endpoint)                                                         |
+| `AZURE_API_KEY`       | Azure OpenAI                                                                   |
+| `AZURE_BASE_URL`      | Azure OpenAI deployment base URL                                               |
+| `AZURE_API_VERSION`   | Azure OpenAI API version override (default: `2024-10-21`)                      |
+| `ORACLE_API_KEY`      | Oracle                                                                         |
+| `ORACLE_BASE_URL`     | Oracle OpenAI-compatible base URL                                              |
+| `OLLAMA_BASE_URL`     | Ollama (default: `http://localhost:11434/v1`)                                  |
 
 See `.env.template` for the full list of all configurable environment variables.
 
@@ -233,7 +227,7 @@ If your Oracle endpoint does not return a usable model list, configure `provider
 **Azure ships with a pinned API version by default.**
 If you do not set `AZURE_API_VERSION`, the gateway sends `api-version=2024-10-21`. Override it only when you need a different Azure API version.
 
-**OpenRouter gets GOModel attribution headers by default.**
+**OpenRouter gets GoModel attribution headers by default.**
 When the `openrouter` provider is used, the gateway adds `HTTP-Referer` and `X-OpenRouter-Title` unless the request already provides them. Override the defaults with `OPENROUTER_SITE_URL` and `OPENROUTER_APP_NAME`.
 
 **Partial YAML fields leave the rest at defaults.**
 
@@ -1,12 +1,12 @@
 # Prometheus Metrics Configuration Guide
 
-This guide explains how to configure Prometheus metrics in GOModel.
+This guide explains how to configure Prometheus metrics in GoModel.
 
 ## Quick Start
 
 ### Disabled by Default
 
-Metrics are **disabled by default**. To enable metrics collection, set `METRICS_ENABLED=true` and start GOModel:
+Metrics are **disabled by default**. To enable metrics collection, set `METRICS_ENABLED=true` and start GoModel:
 
 ```bash
 export METRICS_ENABLED=true
@@ -174,7 +174,6 @@ If you need to protect the metrics endpoint further:
    ```
 
 2. **Use network-level security:**
-
    - Configure firewall rules to allow only Prometheus server
    - Use private network for metrics collection
    - Deploy Prometheus in the same VPC/network
 
@@ -1,11 +1,12 @@
-.PHONY: all build run clean tidy test test-race test-dashboard test-e2e test-integration test-contract test-all lint lint-fix record-api swagger install-tools perf-check perf-bench infra image
+.PHONY: all build run clean tidy test test-race test-dashboard test-e2e test-integration test-contract test-all lint lint-fix record-api swagger docs-openapi install-tools perf-check perf-bench infra image
 
 all: build
 
 # Get version info
 VERSION ?= $(shell git describe --tags --always --dirty)
 COMMIT ?= $(shell git rev-parse --short HEAD)
 DATE ?= $(shell date -u +"%Y-%m-%dT%H:%M:%SZ")
+DOCS_API_SERVERS ?= http://localhost:8080
 
 # Linker flags to inject version info
 LDFLAGS := -X "gomodel/internal/version.Version=$(VERSION)" \
@@ -35,7 +36,7 @@ tidy:
 infra:
 	docker compose up -d
 
-# Docker Compose: full stack (GOModel + Prometheus; builds app image when needed)
+# Docker Compose: full stack (GoModel + Prometheus; builds app image when needed)
 image:
 	docker compose --profile app up -d
 
@@ -87,8 +88,20 @@ swagger:
 	go run github.com/swaggo/swag/cmd/swag init --generalInfo main.go \
 		--dir cmd/gomodel,internal \
 		--output cmd/gomodel/docs \
-		--outputTypes go,json \
+		--outputTypes go \
 		--parseDependency
+	$(MAKE) docs-openapi
+
+docs-openapi:
+	@tmp_dir=$$(mktemp -d); \
+	trap 'rm -rf "$$tmp_dir"' EXIT; \
+	go run github.com/swaggo/swag/cmd/swag init --quiet --generalInfo main.go \
+		--dir cmd/gomodel,internal \
+		--output "$$tmp_dir" \
+		--outputTypes json \
+		--parseDependency; \
+	npx -y swagger2openapi@7.0.8 --patch -o docs/openapi.json "$$tmp_dir/swagger.json"; \
+	DOCS_API_SERVERS="$(DOCS_API_SERVERS)" node -e 'const fs = require("fs"); const file = "docs/openapi.json"; const urls = (process.env.DOCS_API_SERVERS || "").split(",").map((url) => url.trim()).filter(Boolean); if (!urls.length) throw new Error("DOCS_API_SERVERS must include at least one URL"); const spec = JSON.parse(fs.readFileSync(file, "utf8")); spec.servers = urls.map((url) => ({ url, description: /(^https?:\/\/)?(localhost|127\.0\.0\.1)(:|\/|$$)/.test(url) ? "Local GoModel" : "GoModel" })); fs.writeFileSync(file, JSON.stringify(spec, null, 2) + "\n");'
 
 # Run linter
 lint: