diff --git a/modules/ROOT/nav.adoc b/modules/ROOT/nav.adoc index 6426d74..db1ff0a 100644 --- a/modules/ROOT/nav.adoc +++ b/modules/ROOT/nav.adoc @@ -1,37 +1,37 @@ * xref:ROOT:index.adoc[Overview] -* xref:get-started:index.adoc[Get started] +* xref:get-started:index.adoc[Get Started] ** xref:get-started:byoc-prereqs.adoc[Prerequisites] ** xref:get-started:byoc-quickstart.adoc[Quickstart] -** xref:get-started:first-agent.adoc[Build your first agent] -** xref:get-started:rpk-install.adoc[Install or update rpk] -** xref:get-started:invite-team.adoc[Invite your team] +** xref:get-started:first-agent.adoc[Build Your First Agent] +** xref:get-started:rpk-install.adoc[Install or Update rpk] +** xref:get-started:invite-team.adoc[Invite Your Team] * xref:agents:index.adoc[Agents] ** xref:agents:overview.adoc[Overview] ** xref:agents:quickstart.adoc[Quickstart] ** xref:agents:concepts.adoc[Concepts] -** xref:agents:a2a-concepts.adoc[Agent-to-agent concepts] -** xref:agents:create-agent.adoc[Create a declarative agent] -** xref:agents:byoa-register.adoc[Register your own agent (BYOA)] -** xref:agents:system-prompts.adoc[System prompt best practices] -** xref:agents:architecture-patterns.adoc[Architecture patterns] -** xref:agents:integration-overview.adoc[Integration overview] -** xref:agents:pipeline-integration-patterns.adoc[Pipeline integration patterns] -** xref:agents:monitor.adoc[Monitor agents] +** xref:agents:a2a-concepts.adoc[Agent-to-Agent Concepts] +** xref:agents:create-agent.adoc[Create a Declarative Agent] +** xref:agents:byoa-register.adoc[Register Your Own Agent (BYOA)] +** xref:agents:system-prompts.adoc[System Prompt Best Practices] +** xref:agents:architecture-patterns.adoc[Architecture Patterns] +** xref:agents:integration-overview.adoc[Integration Overview] +** xref:agents:pipeline-integration-patterns.adoc[Pipeline Integration Patterns] +** xref:agents:monitor.adoc[Monitor Agents] ** xref:agents:troubleshoot/index.adoc[Troubleshoot] -*** xref:agents:troubleshoot/troubleshoot-ai-agents.adoc[Troubleshoot AI agents] +*** xref:agents:troubleshoot/troubleshoot-ai-agents.adoc[Troubleshoot AI Agents] *** xref:agents:tutorials/index.adoc[Tutorials] -**** xref:agents:tutorials/customer-support-agent.adoc[Customer support agent] -**** xref:agents:tutorials/transaction-dispute-resolution.adoc[Transaction dispute resolution] +**** xref:agents:tutorials/customer-support-agent.adoc[Customer Support Agent] +**** xref:agents:tutorials/transaction-dispute-resolution.adoc[Transaction Dispute Resolution] * xref:mcp:index.adoc[MCP Servers] ** xref:mcp:overview.adoc[Overview] -** xref:mcp:create-server.adoc[Create a server] -** xref:mcp:register-remote.adoc[Register a self-managed server] +** xref:mcp:create-server.adoc[Create a Server] +** xref:mcp:register-remote.adoc[Register a Self-Managed Server] ** xref:mcp:oauth-providers.adoc[Configure an OAuth Provider] -** xref:mcp:user-delegated-oauth.adoc[User-delegated OAuth] -*** xref:mcp:github-oauth-tutorial.adoc[Tutorial: Set up GitHub OAuth] -** xref:mcp:test-tools.adoc[Test a server's tools] -** xref:mcp:managed/index.adoc[Managed catalog] -*** xref:mcp:managed/managed-catalog.adoc[Catalog reference] +** xref:mcp:user-delegated-oauth.adoc[User-Delegated OAuth] +*** xref:mcp:github-oauth-tutorial.adoc[Tutorial: Set Up GitHub OAuth] +** xref:mcp:test-tools.adoc[Test a Server's Tools] +** xref:mcp:managed/index.adoc[Managed Catalog] +*** xref:mcp:managed/managed-catalog.adoc[Catalog Reference] *** xref:mcp:managed/bamboohr.adoc[BambooHR] *** xref:mcp:managed/sql.adoc[SQL] *** xref:mcp:managed/kafka.adoc[Kafka] @@ -48,30 +48,30 @@ ** xref:ai-gateway:overview.adoc[Overview] ** xref:ai-gateway:gateway-quickstart.adoc[Quickstart] ** xref:ai-gateway:gateway-architecture.adoc[Architecture] -** xref:ai-gateway:configure-provider.adoc[Configure your LLM provider] -** xref:ai-gateway:routing-cel.adoc[CEL routing] -** xref:ai-gateway:aggregation.adoc[MCP aggregation] -** xref:ai-gateway:connect-agent.adoc[Connect your agent] +** xref:ai-gateway:configure-provider.adoc[Configure Your LLM Provider] +** xref:ai-gateway:routing-cel.adoc[CEL Routing] +** xref:ai-gateway:aggregation.adoc[MCP Aggregation] +** xref:ai-gateway:connect-agent.adoc[Connect Your Agent] *** xref:ai-gateway:admin/index.adoc[For Admins] -**** xref:ai-gateway:admin/setup-guide.adoc[Setup guide] +**** xref:ai-gateway:admin/setup-guide.adoc[Setup Guide] *** xref:ai-gateway:builders/index.adoc[For Builders] -**** xref:ai-gateway:builders/discover-gateways.adoc[Discover gateways] +**** xref:ai-gateway:builders/discover-gateways.adoc[Discover Gateways] * xref:governance:index.adoc[Governance] -** xref:governance:dashboard/index.adoc[Governance dashboard] -*** xref:governance:dashboard/overview.adoc[Read the governance overview] +** xref:governance:dashboard/index.adoc[Governance Dashboard] +*** xref:governance:dashboard/overview.adoc[Read the Governance Overview] ** xref:governance:guardrails/index.adoc[Guardrails] *** xref:governance:guardrails/overview.adoc[Overview] -*** xref:governance:guardrails/create-guardrail.adoc[Create a guardrail] -*** xref:governance:guardrails/types-reference.adoc[Evaluator types] -*** xref:governance:guardrails/violations.adoc[Read violations] -*** xref:governance:guardrails/cost-tracking.adoc[Cost tracking] -** xref:governance:budgets.adoc[Token budgets and limits] -** xref:governance:kill-switch.adoc[Kill switch] +*** xref:governance:guardrails/create-guardrail.adoc[Create a Guardrail] +*** xref:governance:guardrails/types-reference.adoc[Evaluator Types] +*** xref:governance:guardrails/violations.adoc[Read Violations] +*** xref:governance:guardrails/cost-tracking.adoc[Cost Tracking] +** xref:governance:budgets.adoc[Token Budgets and Limits] +** xref:governance:kill-switch.adoc[Kill Switch] * xref:observability:index.adoc[Observability] ** xref:observability:concepts.adoc[Concepts] -** xref:observability:transcripts.adoc[Read a transcript] -** xref:observability:byoa-telemetry.adoc[BYOA telemetry (OpenTelemetry)] -** xref:observability:ingest-custom-traces.adoc[Ingest custom traces] +** xref:observability:transcripts.adoc[Read a Transcript] +** xref:observability:byoa-telemetry.adoc[BYOA Telemetry (OpenTelemetry)] +** xref:observability:ingest-custom-traces.adoc[Ingest Custom Traces] ** xref:observability:metrics.adoc[Metrics] ** xref:observability:logs.adoc[Logs] * xref:integrations:index.adoc[Integrations] @@ -80,9 +80,9 @@ ** xref:integrations:continue.adoc[Continue] ** xref:integrations:cline.adoc[Cline] ** xref:integrations:copilot.adoc[GitHub Copilot] -** xref:integrations:remote-mcp-clients.adoc[Remote MCP clients (Claude Desktop, ChatGPT, Gemini)] +** xref:integrations:remote-mcp-clients.adoc[Remote MCP Clients (Claude Desktop, ChatGPT, Gemini)] * xref:reference:index.adoc[Reference] -** xref:reference:api.adoc[API reference] +** xref:reference:api.adoc[API Reference] ** xref:reference:rpk/index.adoc[rpk Commands] *** xref:reference:rpk/rpk-ai/rpk-ai.adoc[rpk ai] **** xref:reference:rpk/rpk-ai/rpk-ai-install.adoc[] diff --git a/modules/ai-gateway/pages/configure-provider.adoc b/modules/ai-gateway/pages/configure-provider.adoc index c8acce6..225a87b 100644 --- a/modules/ai-gateway/pages/configure-provider.adoc +++ b/modules/ai-gateway/pages/configure-provider.adoc @@ -40,7 +40,7 @@ The first card on the page collects identity fields. |*Name* |Yes -|Machine identifier. Lowercase letters, numbers, and hyphens only (`^[a-z][a-z0-9-]*$`), up to 63 characters. Immutable after creation. Appears in the proxy URL (`/llm/v1/providers//...`). The form auto-suggests a friendly name (for example, `red-space-bear`); override it if you want something more descriptive. +|Machine identifier. Lowercase letters, numbers, and hyphens only (`^[a-z][a-z0-9-]*$`), up to 63 characters. Immutable after creation. Appears in the proxy URL (`/llm/v1/providers//...`). The form auto-suggests a friendly name (for example, `red-space-bear`). Override it if you want something more descriptive. |*Display name* (Advanced options) |No @@ -70,7 +70,7 @@ The *Provider type* card shows five cards. Pick the one that matches your upstre |Invoke foundation models (Claude, Llama, Titan, Nova, Mistral, AI21 Jamba) hosted inside your AWS account. Requires an AWS region and credentials (static, STS-assumed role, or the default credential chain). Supports the native Bedrock APIs (`InvokeModel`, `Converse`) and an OpenAI-compatible Chat Completions endpoint for `gpt-oss` models. See <> for picking the right model identifier. |*OpenAI-compatible* -|Point at any OpenAI-compatible endpoint that ships `/v1/chat/completions` (vLLM, Ollama, LM Studio, LocalAI, Together, Groq, OpenRouter). Useful for self-hosted models and aggregator gateways. Requires a *Base URL*; authentication is optional. +|Point at any OpenAI-compatible endpoint that ships `/v1/chat/completions` (vLLM, Ollama, LM Studio, LocalAI, Together, Groq, OpenRouter). Useful for self-hosted models and aggregator gateways. Requires a *Base URL*. Authentication is optional. |=== Selecting a type reveals the type-specific configuration block below the picker. @@ -178,11 +178,11 @@ Redpanda stores models as structured `ProviderModel` entries (one entry per mode After you create the provider, the detail page renders each model as a row with capability badges (*Vision*, *Reasoning*, *Streaming*, and others lifted from the catalog), the model's 7-day spend, and a link to the per-model detail page. The model list supports search and filtering. -The detail page also carries a *Last 7 days* KPI strip (*TOTAL SPEND*, *REQUESTS*, *TOKENS*) with sparklines and _vs previous period_ deltas. *View all* on each card opens the relevant Governance drill-down (Spending, Requests, or Tokens) with this provider pre-filtered. See xref:governance:dashboard/overview.adoc[the governance overview] for the drill-down details. +The detail page also carries a *Last 7 days* KPI strip (*TOTAL SPEND*, *REQUESTS*, *TOKENS*) with sparklines and _vs previous period_ deltas. *View all* on each card opens the *Cost & Usage* tab with this provider pre-filtered so you can drill into spend, request, or token trends. == Save and verify -. Click *Create provider*. The button activates once *Name* and *Type* are both set; the right-hand *Summary* panel checks them off as you fill them in. +. Click *Create provider*. The button activates after *Name* and *Type* are both set. The *Summary* panel checks them off as you fill them in. . On the provider's detail page, the *Connection* card shows your *Proxy URL*, *Discovery* URL, *Base URL*, and *API key ref*. Copy the *Proxy URL*: this is where your applications point. . Scroll to the *Verify connection* section. Pick a model from the dropdown and click *Test Connection*. The status updates from _Not tested yet_ to a pass/fail indicator. Use the *Show commands* disclosure if you want to see the equivalent curl or SDK call. . To wire up an application, open *Connect your app* further down the page or follow xref:ai-gateway:connect-agent.adoc[Connect your agent]. @@ -192,7 +192,7 @@ A successful Test Connection result confirms that the provider's credentials, re [[bedrock-inference-profiles]] == AWS Bedrock: Inference profiles and IAM -Bedrock has three concepts that affect how you configure a provider: foundation models, cross-region inference profiles, and IAM. Get these right and the *Test connection* check passes; get them wrong and you see `AccessDenied` or `ValidationException` errors. +Bedrock has three concepts that affect how you configure a provider: foundation models, cross-region inference profiles, and IAM. Get these right and the *Test connection* check passes. Get them wrong and you see `AccessDenied` or `ValidationException` errors. === Foundation models versus inference profiles @@ -234,7 +234,7 @@ Anthropic Claude 4.6+ models (Sonnet 4.6, Opus 4.6, Opus 4.7) cannot be invoked Older 4.5 and earlier Claude models still accept bare IDs. ==== -Pricing varies by profile. The bare foundation-model ID and the `global.` profile share AWS's headline rate; geo profiles (`us.`, `eu.`, `apac.`, `au.`, `jp.`) carry approximately a 10% cross-region inference premium. Use `global.` when you want the headline rate and don't need a specific geography; use `us.` / `eu.` / `apac.` when data residency matters. +Pricing varies by profile. The bare foundation-model ID and the `global.` profile share AWS's headline rate; geo profiles (`us.`, `eu.`, `apac.`, `au.`, `jp.`) carry approximately a 10% cross-region inference premium. Use `global.` when you want the headline rate and don't need a specific geography. Use `us.` / `eu.` / `apac.` when data residency matters. === IAM policy patterns @@ -302,7 +302,7 @@ The *LLM Providers* list page is the at-a-glance home for every provider in your |First two model identifiers exposed by the provider, plus a `+N` overflow chip when more are configured. |*Spend (7d)* -|Spend over the last 7 days with a small sparkline and a "vs previous period" delta. The window is fixed at 7 days on this view; longer-range analysis runs through the xref:governance:dashboard/overview.adoc[governance dashboard]. +|Spend over the last 7 days with a small sparkline and a "vs previous period" delta. The window is fixed at 7 days on this view. Longer-range analysis runs through the xref:governance:dashboard/overview.adoc[governance dashboard]. |*Updated* |Relative timestamp of the last edit. @@ -310,11 +310,30 @@ The *LLM Providers* list page is the at-a-glance home for every provider in your A list/grid view toggle in the top-right switches between table and card layouts. The *Filter* button narrows the list by provider type, status, or name. The *Create provider* button opens the create flow described in <>. +[[view-cost-and-usage]] +== View cost and usage + +The *Cost & Usage* tab on the *LLM Providers* page tracks spend, request volume, and token volume over time across providers and models. Use it when you want to understand which provider or model generated usage during a selected time window. + +The page includes these charts: + +* *Spend over time*: Estimated spend in USD for the selected range. +* *Requests over time*: Request count for the selected range. +* *Tokens over time*: Token count for the selected range. + +Use *Group by* to switch the chart breakdown between providers, models, and token type. Group by provider to see which upstream consumed the most budget. Group by model to see which model drove spend inside one or more providers. Group by token type to separate input, output, cached, cache-write, and reasoning usage where those buckets apply. + +Use *Filter* to narrow the charts by provider, model, cost type, token type, or user. Each filter appears as a chip above the chart, and you can combine them. For example, filter to one Anthropic provider, drill into `claude-opus-4-7`, then limit the spend view to input tokens. + +The date-range picker supports last 7 days, last 14 days, last 30 days, last 90 days, month to date, quarter to date, year to date, and custom ranges. The chart subtitle shows the selected date range and bucket size. + +The spend chart footer summarizes the selected view by cost bucket, including total, input, output, cached, cache writes, and reasoning when the selected traffic includes those categories. + == Edit, disable, or delete a provider * *Edit*: Click *Edit* on the detail page. You can change any field *except* `Name` and `Type`, which are immutable. Model lists, credential references, and the enabled state can all change. * *Disable*: Click *Disable* on the detail page. The provider remains in the list, but requests to its proxy URL are rejected until you enable it again. Use this when you want to pause traffic without losing configuration. -* *Delete*: Scroll to the *Delete this provider* section at the bottom of the detail page and click *Delete*. The action is permanent; in-flight requests fail and downstream clients receive errors until reconfigured. +* *Delete*: Scroll to the *Delete this provider* section at the bottom of the detail page and click *Delete*. The action is permanent. In-flight requests fail and downstream clients receive errors until reconfigured. [[troubleshooting]] == Troubleshooting @@ -351,7 +370,7 @@ A list/grid view toggle in the top-right switches between table and card layouts AI Gateway does not provide these capabilities. For current status, consult the ADP release notes. * *Multi-provider routing, failover, and retries across providers.* A synthetic provider that fans requests to multiple upstreams is not part of AI Gateway. -* *Spend limits.* Per-user, per-org, and global cost caps are not available. The provider detail page shows a *Cost & usage* placeholder labeled "Coming soon"; see xref:governance:budgets.adoc[Token budgets and limits]. +* *Spend limits.* Per-user, per-org, and global cost caps are not available. AI Gateway records spend and token usage for reporting in the provider *Cost & Usage* view and Governance, but it does not enforce budget caps. See xref:governance:budgets.adoc[Token budgets and limits]. * *Rate limits.* Requests-per-second, per-minute, or per-day limits are not available. * *Managed MCP aggregation at the gateway.* Register MCP tool servers separately under *MCP Servers* in the ADP UI. diff --git a/modules/ai-gateway/pages/overview.adoc b/modules/ai-gateway/pages/overview.adoc index 7a8a6b3..2b4c6fe 100644 --- a/modules/ai-gateway/pages/overview.adoc +++ b/modules/ai-gateway/pages/overview.adoc @@ -30,7 +30,7 @@ AI Gateway consolidates provider access behind the following capabilities. === Traffic stays in your VPC -LLM requests are proxied through your dataplane's AI Gateway. API keys are stored in your dataplane's secret store and never leave your infrastructure. Upstream calls leave your VPC only when the LLM provider is third-party (OpenAI, Anthropic, Google AI); self-hosted OpenAI-compatible endpoints stay entirely inside your network. +LLM requests are proxied through your dataplane's AI Gateway. API keys are stored in your dataplane's secret store and never leave your infrastructure. Upstream calls leave your VPC only when the LLM provider is third-party (OpenAI, Anthropic, Google AI). Self-hosted OpenAI-compatible endpoints stay entirely inside your network. === Centralized secrets @@ -38,11 +38,11 @@ The upstream API key (or AWS credentials for Bedrock) lives in the Redpanda secr === A managed proxy URL per provider -Every provider you create has its own URL of the form `/llm/v1/providers//`. Your application points its SDK at this URL instead of the upstream, continues to use the provider's native API, and authenticates to Redpanda with a short-lived OIDC access token. The gateway base is a cluster-specific subdomain (for example, `aigw..clusters.rdpa.co`); copy the exact value from the *Proxy URL* field on any provider's detail page. +Every provider you create has its own URL of the form `/llm/v1/providers//`. Your application points its SDK at this URL instead of the upstream, continues to use the provider's native API, and authenticates to Redpanda with a short-lived OIDC access token. The gateway base is a cluster-specific subdomain (for example, `aigw..clusters.rdpa.co`). Copy the exact value from the *Proxy URL* field on any provider's detail page. === Native SDK compatibility -Use the provider's own SDK: OpenAI, Anthropic, Google AI, AWS Bedrock, or any OpenAI-compatible client (vLLM, Ollama, LM Studio, LocalAI, Together, Groq, OpenRouter). AI Gateway does not require a single unified SDK; it forwards native requests to the native upstream. +Use the provider's own SDK: OpenAI, Anthropic, Google AI, AWS Bedrock, or any OpenAI-compatible client (vLLM, Ollama, LM Studio, LocalAI, Together, Groq, OpenRouter). AI Gateway does not require a single unified SDK. It forwards native requests to the native upstream. === Managed authentication @@ -50,13 +50,13 @@ Applications authenticate to ADP with OIDC service accounts instead of long-live === Per-provider observability -The provider's detail page in the ADP UI records request and token counts. +The provider's detail page in the ADP UI records spend, request counts, and token counts for the last 7 days. The *Cost & Usage* tab on the *LLM Providers* page expands that view with time-series charts, provider and model grouping, date ranges, and filters for provider, model, cost type, token type, and user. == What's in the UI In the ADP UI you'll find four areas: -* *LLM Providers*: Create, edit, enable, and delete providers. This is the home of AI Gateway configuration. +* *LLM Providers*: Create, edit, enable, and delete providers, and review provider-level cost and usage. This is the home of AI Gateway configuration. * *MCP Servers*: Register glossterm:MCP[] tool servers for agents. Separate from the AI Gateway proxy URL. * *OAuth Providers*: Register OAuth providers for user-delegated flows (for example, GitHub or Google). * *My Connections*: Per-user OAuth token management. @@ -110,7 +110,7 @@ It is not the right fit when you: AI Gateway does not provide these capabilities. For current status, consult the ADP release notes. * *Multi-provider routing, failover, and retries.* A synthetic provider that fans requests to multiple upstreams is not part of AI Gateway. -* *Spend limits.* Per-user, per-org, and global cost caps are not available. The provider detail page shows a *Cost & usage* placeholder labeled "Coming soon"; see xref:governance:budgets.adoc[Token budgets and limits] for the read-only spending visibility. +* *Spend limits.* Per-user, per-org, and global cost caps are not available. AI Gateway records spend and token usage for reporting in the provider *Cost & Usage* view and Governance, but it does not enforce budget caps. See xref:governance:budgets.adoc[Token budgets and limits] for the read-only spending visibility. * *Rate limits.* Requests-per-second, per-minute, or per-day caps are not available. * *Managed MCP aggregation at the gateway.* Register MCP tool servers separately under *MCP Servers* in the ADP UI. diff --git a/modules/get-started/pages/byoc-prereqs.adoc b/modules/get-started/pages/byoc-prereqs.adoc index 2acc519..6c7aa0f 100644 --- a/modules/get-started/pages/byoc-prereqs.adoc +++ b/modules/get-started/pages/byoc-prereqs.adoc @@ -1,4 +1,4 @@ -= ADP BYOC prerequisites += ADP BYOC Prerequisites :description: Requirements for running Redpanda ADP on Redpanda Cloud BYOC environments on AWS. :page-topic-type: reference :personas: platform_admin @@ -9,7 +9,7 @@ // Source: `cloudv2` `terraform/provisioners/aws-redpanda-applications/*.tf`, `adp/manifests/CLAUDE.md`, and BYOC support partials, verified 2026-05-10. // TODO: Confirm the Redpanda Cloud UI label and status indicator for ADP enablement after an authenticated Cloud walkthrough. -ADP on BYOC uses Redpanda Cloud BYOC infrastructure on AWS. Use this reference to confirm the requirements before you create ADP resources on a BYOC environment. +ADP on BYOC uses Redpanda Bring Your Own Cloud (BYOC) infrastructure on AWS. Use this reference to confirm the requirements before you create ADP resources on a BYOC environment. Use this reference to: diff --git a/modules/get-started/pages/byoc-quickstart.adoc b/modules/get-started/pages/byoc-quickstart.adoc index e24a582..30432a5 100644 --- a/modules/get-started/pages/byoc-quickstart.adoc +++ b/modules/get-started/pages/byoc-quickstart.adoc @@ -1,4 +1,4 @@ -= ADP BYOC quickstart += ADP BYOC Quickstart :description: Start with ADP on a Redpanda Cloud BYOC environment on AWS. :page-topic-type: how-to :personas: platform_admin diff --git a/modules/get-started/pages/index.adoc b/modules/get-started/pages/index.adoc index d072f8a..e6d612f 100644 --- a/modules/get-started/pages/index.adoc +++ b/modules/get-started/pages/index.adoc @@ -1,3 +1,3 @@ -= Get started += Get Started :description: Set up your ADP environment, build your first agent, and invite your team. :page-layout: index diff --git a/modules/governance/pages/budgets.adoc b/modules/governance/pages/budgets.adoc index fdd212c..6047849 100644 --- a/modules/governance/pages/budgets.adoc +++ b/modules/governance/pages/budgets.adoc @@ -35,14 +35,24 @@ Cost is reported in *microcents*. 1 cent = 100 microcents, $1 = 10,000 microcent // TODO: confirm whether spending events are captured by default for every deployment, or whether some deployments require an opt-in flag. Open Q A1 in the companion plan. +=== Per-request pricing variations + +A few request- or response-time signals change the rate ADP applies to a single call. You don't configure these; the spending pipeline picks them up from the upstream response or request and bills accordingly. + +* *Anthropic fast mode*: Anthropic exposes a fast-mode option on some models (for example, Opus 4.6 fast) that carries a per-token premium over the default rate. ADP reads the `speed` field on each Anthropic response and bills fast-mode calls at the model's fast-mode rate. Requests without a `speed` field fall back to the default rate card. +* *Context-tier pricing*: A few models charge a different rate once a request crosses a context-length threshold. Gemini Pro, for example, prices requests above a 128K-token context at a higher tier than shorter requests. ADP passes the call's context-token count to the rate card so requests at or above the threshold bill at the tiered rate automatically. + == Where to view your spend -You don't view spend on this page. The dashboard, transcripts, and breakdown queries are the read surfaces: +You don't view spend on this page. The *LLM Providers* page, dashboard, transcripts, and breakdown queries are the read surfaces: [cols="1,3"] |=== |Surface |Use it for +|*Cost & Usage* tab (*LLM Providers* page) +|Time-series spend, request, and token charts across providers and models. Use it to group by provider, model, or token type, then filter by provider, model, cost type, token type, or user. See xref:ai-gateway:configure-provider.adoc#view-cost-and-usage[View cost and usage]. + |*Governance dashboard* |Summary cards (total spend, agent count, request count, trend), provider breakdown chart, events timeline, agents and MCP servers tables. The single-pane-of-glass view across your whole deployment. See xref:governance:dashboard/overview.adoc[Read the governance overview]. @@ -59,7 +69,7 @@ For more expressive queries, `SpendingFilter` also accepts an AIP-160 `filter` e [NOTE] ==== -`total_tokens` is derived server-side from input + output + cache tokens; you don't need to compute it client-side, and the value is consistent across all four `SpendingService` methods. +`total_tokens` is derived server-side from input + output + cache tokens. You don't need to compute it client-side, and the value is consistent across all four `SpendingService` methods. ==== // TODO: confirm `user_id` and `organization_id` are populated automatically from request context (OIDC claims) or require setup. Open Q A2 in the companion plan. @@ -135,9 +145,9 @@ The catalog default for the bucket appears alongside your override; clear the ov Three rules govern how the cost-reporting pipeline applies overrides: -* Overrides apply to *new spending events* from the moment they're saved. Historical events keep the rate card that was in effect when the event was recorded; the dashboard's _Compare to previous period_ overlay reflects that. +* Overrides apply to *new spending events* from the moment they're saved. Historical events keep the rate card that was in effect when the event was recorded. The dashboard's _Compare to previous period_ overlay reflects that. * Overrides are scoped to one model at a time. To roll out a new contract across an entire provider's catalog, edit each model in turn. -* Cache-read pricing follows the catalog default; only cache-write tiers are overridable. Cache-read tokens still surface in the *CACHED* bucket on the dashboard and in `SpendingService` responses. +* Cache-read pricing follows the catalog default. Only cache-write tiers are overridable. Cache-read tokens still surface in the *CACHED* bucket on the dashboard and in `SpendingService` responses. // TODO: confirm whether overrides surface in `SpendingService.GetSpendingBreakdown` responses (for example, as a flag indicating which line items used overrides) or are applied invisibly. Walk against `adp-production` once the override editor is live there.