From 4f147d06208f205a5f4cc78253447a6c0b0377a5 Mon Sep 17 00:00:00 2001 From: Ken Jiang Date: Wed, 1 Apr 2026 19:46:26 -0400 Subject: [PATCH 1/5] native inference billing --- apis/cloudflare/src/billing.ts | 82 ++++++++++++++++++++++++++ apis/cloudflare/src/env.ts | 1 + apis/cloudflare/src/proxy.ts | 50 ++++++++++++---- apis/cloudflare/wrangler-template.toml | 2 + packages/proxy/edge/index.ts | 6 +- packages/proxy/src/proxy.ts | 74 +++++++++++++++++++++++ 6 files changed, 203 insertions(+), 12 deletions(-) create mode 100644 apis/cloudflare/src/billing.ts diff --git a/apis/cloudflare/src/billing.ts b/apis/cloudflare/src/billing.ts new file mode 100644 index 00000000..02a55b8a --- /dev/null +++ b/apis/cloudflare/src/billing.ts @@ -0,0 +1,82 @@ +import { type BillingEvent } from "@braintrust/proxy"; + +const DEFAULT_BILLING_TELEMETRY_URL = + "https://api.braintrust.dev/billing/telemetry/ingest"; + +function buildPayloadEvent(event: BillingEvent) { + if (!event.org_id) { + console.warn("billing event skipped: missing org_id"); + return null; + } + if (!event.model) { + console.warn("billing event skipped: missing model"); + return null; + } + if (!event.resolved_model) { + console.warn("billing event skipped: missing resolved_model"); + return null; + } + const hasTokenUsageData = + event.input_tokens !== undefined || + event.output_tokens !== undefined || + event.cached_input_tokens !== undefined || + event.cache_write_input_tokens !== undefined; + if (!hasTokenUsageData) { + console.warn("billing event skipped: missing token usage"); + return null; + } + const requestId = crypto.randomUUID(); + const timestamp = new Date().toISOString(); + + return { + event_name: "NativeInferenceTokenUsageEvent", + external_customer_id: event.org_id, + timestamp, + idempotency_key: requestId, + properties: { + model: event.model, + resolved_model: event.resolved_model, + org_id: event.org_id, + input_tokens: event.input_tokens, + output_tokens: event.output_tokens, + cached_input_tokens: event.cached_input_tokens, + cache_write_input_tokens: event.cache_write_input_tokens, + }, + }; +} + +export async function sendBillingTelemetryEvent({ + telemetryUrl, + event, +}: { + telemetryUrl?: string; + event: BillingEvent; +}): Promise { + try { + const payloadEvent = buildPayloadEvent(event); + if (!payloadEvent) { + return; + } + + const destination = telemetryUrl || DEFAULT_BILLING_TELEMETRY_URL; + const response = await fetch(destination, { + method: "POST", + headers: { + Authorization: `Bearer ${event.auth_token}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + events: [payloadEvent], + }), + }); + + if (!response.ok) { + const responseBody = await response.text(); + console.warn( + `billing event failed: ${response.status} ${response.statusText} ${responseBody}`, + ); + } + } catch (error) { + console.warn("billing event threw an error", error); + } +} diff --git a/apis/cloudflare/src/env.ts b/apis/cloudflare/src/env.ts index 5fdb164f..ac9a3eb5 100644 --- a/apis/cloudflare/src/env.ts +++ b/apis/cloudflare/src/env.ts @@ -4,6 +4,7 @@ declare global { BRAINTRUST_APP_URL: string; WHITELISTED_ORIGINS?: string; METRICS_LICENSE_KEY?: string; + BILLING_TELEMETRY_URL?: string; NATIVE_INFERENCE_SECRET_KEY?: string; } } diff --git a/apis/cloudflare/src/proxy.ts b/apis/cloudflare/src/proxy.ts index 5eb614da..5c74d961 100644 --- a/apis/cloudflare/src/proxy.ts +++ b/apis/cloudflare/src/proxy.ts @@ -19,6 +19,7 @@ import { BT_PARENT, resolveParentHeader } from "braintrust/util"; import { cachedLogin, makeProxySpanLogger } from "./tracing"; import { MeterProvider } from "@opentelemetry/sdk-metrics"; import { Meter, Attributes, Histogram } from "@opentelemetry/api"; +import { sendBillingTelemetryEvent } from "./billing"; export type LogHistogramFn = (args: { name: string; @@ -117,6 +118,30 @@ export async function handleProxyV1( let span: Span | undefined; let spanId: string | undefined; let spanExport: string | undefined; + let billingOrgId: string | undefined; + const orgName = request.headers.get(ORG_NAME_HEADER) ?? undefined; + const apiKey = + parseAuthHeader({ + authorization: request.headers.get("authorization") ?? undefined, + }) ?? undefined; + + const getLoginState = async () => + cachedLogin({ + appUrl: braintrustAppUrl(env).toString(), + apiKey, + orgName, + cache: credentialsCache, + }); + + if (apiKey) { + try { + const loginState = await getLoginState(); + billingOrgId = loginState.orgId ?? undefined; + } catch (error) { + console.warn("Failed to resolve billing org id", error); + } + } + const parentHeader = request.headers.get(BT_PARENT); if (parentHeader) { let parent; @@ -131,19 +156,11 @@ export async function handleProxyV1( ); } - const orgName = request.headers.get(ORG_NAME_HEADER) ?? undefined; - const apiKey = - parseAuthHeader({ - authorization: request.headers.get("authorization") ?? undefined, - }) ?? undefined; + const loginState = await getLoginState(); + billingOrgId = loginState.orgId ?? undefined; span = startSpan({ - state: await cachedLogin({ - appUrl: braintrustAppUrl(env).toString(), - apiKey, - orgName, - cache: credentialsCache, - }), + state: loginState, type: "llm", name: "LLM", parent: parent.toStr(), @@ -199,6 +216,17 @@ export async function handleProxyV1( spanLogger, spanId, spanExport, + billingOrgId, + onBillingEvent: (event) => { + ctx.waitUntil( + sendBillingTelemetryEvent({ + telemetryUrl: env.BILLING_TELEMETRY_URL, + event, + }).catch((error) => { + console.warn("billing waitUntil task failed", error); + }), + ); + }, nativeInferenceSecretKey: env.NATIVE_INFERENCE_SECRET_KEY, }; diff --git a/apis/cloudflare/wrangler-template.toml b/apis/cloudflare/wrangler-template.toml index 47b3ef4b..140710cc 100644 --- a/apis/cloudflare/wrangler-template.toml +++ b/apis/cloudflare/wrangler-template.toml @@ -28,10 +28,12 @@ head_sampling_rate = 0.2 # You should not need to edit this BRAINTRUST_APP_URL = "https://www.braintrust.dev" METRICS_LICENSE_KEY="" +BILLING_TELEMETRY_URL="https://api.braintrust.dev/billing/telemetry/ingest" [env.staging.vars] BRAINTRUST_APP_URL = "https://www.braintrust.dev" METRICS_LICENSE_KEY="" +BILLING_TELEMETRY_URL="https://api.braintrust.dev/billing/telemetry/ingest" [env.staging] kv_namespaces = [ diff --git a/packages/proxy/edge/index.ts b/packages/proxy/edge/index.ts index a577651b..0a7f5e83 100644 --- a/packages/proxy/edge/index.ts +++ b/packages/proxy/edge/index.ts @@ -1,6 +1,6 @@ import { DEFAULT_BRAINTRUST_APP_URL } from "@lib/constants"; import { flushMetrics } from "@lib/metrics"; -import { proxyV1, SpanLogger, LogHistogramFn } from "@lib/proxy"; +import { proxyV1, SpanLogger, LogHistogramFn, BillingEvent } from "@lib/proxy"; import { isEmpty } from "@lib/util"; import { MeterProvider } from "@opentelemetry/sdk-metrics"; @@ -36,6 +36,8 @@ export interface ProxyOpts { logHistogram?: LogHistogramFn; whitelist?: (string | RegExp)[]; spanLogger?: SpanLogger; + billingOrgId?: string; + onBillingEvent?: (event: BillingEvent) => void; spanId?: string; spanExport?: string; nativeInferenceSecretKey?: string; @@ -398,6 +400,8 @@ export function EdgeProxyV1(opts: ProxyOpts) { digest: digestMessage, logHistogram: opts.logHistogram, spanLogger: opts.spanLogger, + billingOrgId: opts.billingOrgId, + onBillingEvent: opts.onBillingEvent, }); } catch (e) { return new Response(`${e}`, { diff --git a/packages/proxy/src/proxy.ts b/packages/proxy/src/proxy.ts index 78a80d1b..eb37f37f 100644 --- a/packages/proxy/src/proxy.ts +++ b/packages/proxy/src/proxy.ts @@ -190,6 +190,19 @@ export interface SpanLogger { reportProgress: (progress: string) => void; } +export type BillingEvent = { + event_name: "NativeInferenceTokenUsageEvent"; + auth_token: string; + org_id?: string; + model?: string | null; + resolved_model?: string | null; + org_name?: string; + input_tokens?: number; + output_tokens?: number; + cached_input_tokens?: number; + cache_write_input_tokens?: number; +}; + // This is an isomorphic implementation of proxyV1, which is used by both edge functions // in CloudFlare and by the node proxy (locally and in lambda). export async function proxyV1({ @@ -208,6 +221,8 @@ export async function proxyV1({ cacheKeyOptions = {}, decompressFetch = false, spanLogger, + billingOrgId, + onBillingEvent, signal, fetch = globalThis.fetch, }: { @@ -237,6 +252,8 @@ export async function proxyV1({ cacheKeyOptions?: CacheKeyOptions; decompressFetch?: boolean; spanLogger?: SpanLogger; + billingOrgId?: string; + onBillingEvent?: (event: BillingEvent) => void; signal?: AbortSignal; fetch?: FetchFn; }): Promise { @@ -299,6 +316,7 @@ export async function proxyV1({ ); let orgName: string | undefined = proxyHeaders[ORG_NAME_HEADER] ?? undefined; + let resolvedOrgName: string | undefined = orgName; const projectId: string | undefined = proxyHeaders[PROJECT_ID_HEADER] ?? undefined; @@ -649,6 +667,7 @@ export async function proxyV1({ if (secrets.length > 0 && !orgName && secrets[0].org_name) { baseAttributes.org_name = secrets[0].org_name; + resolvedOrgName = secrets[0].org_name; } logRequest(); @@ -759,6 +778,11 @@ export async function proxyV1({ if (stream) { let first = true; const allChunks: Uint8Array[] = []; + let resolvedModel: string | undefined = undefined; + let inputTokens: number | undefined = undefined; + let outputTokens: number | undefined = undefined; + let cachedInputTokens: number | undefined = undefined; + let cacheWriteInputTokens: number | undefined = undefined; // These parameters are for the streaming case let reasoning: OpenAIReasoning[] | undefined = undefined; @@ -787,10 +811,20 @@ export async function proxyV1({ | OpenAIChatCompletionChunk | undefined; if (result) { + if (typeof result.model === "string" && result.model) { + resolvedModel = result.model; + } const extendedUsage = completionUsageSchema.safeParse( result.usage, ); if (extendedUsage.success) { + inputTokens = extendedUsage.data.prompt_tokens; + outputTokens = extendedUsage.data.completion_tokens; + cachedInputTokens = + extendedUsage.data.prompt_tokens_details?.cached_tokens; + cacheWriteInputTokens = + extendedUsage.data.prompt_tokens_details + ?.cache_creation_tokens; spanLogger?.log({ metrics: { tokens: extendedUsage.data.total_tokens, @@ -978,10 +1012,20 @@ export async function proxyV1({ case "chat": case "completion": { const data = dataRaw as ChatCompletion; + if (typeof data.model === "string" && data.model) { + resolvedModel = data.model; + } const extendedUsage = completionUsageSchema.safeParse( data.usage, ); if (extendedUsage.success) { + inputTokens = extendedUsage.data.prompt_tokens; + outputTokens = extendedUsage.data.completion_tokens; + cachedInputTokens = + extendedUsage.data.prompt_tokens_details?.cached_tokens; + cacheWriteInputTokens = + extendedUsage.data.prompt_tokens_details + ?.cache_creation_tokens; spanLogger?.log({ output: data.choices, metrics: { @@ -1041,6 +1085,15 @@ export async function proxyV1({ } case "response": { const data = dataRaw as OpenAIResponse; + if (typeof data.model === "string" && data.model) { + resolvedModel = data.model; + } + if (data.usage) { + inputTokens = data.usage.input_tokens; + outputTokens = data.usage.output_tokens; + cachedInputTokens = + data.usage.input_tokens_details?.cached_tokens; + } spanLogger?.log({ output: data.output, metrics: { @@ -1089,6 +1142,27 @@ export async function proxyV1({ }); spanLogger?.end(); + if (!responseFailed) { + try { + if (typeof onBillingEvent !== "function") { + return; + } + onBillingEvent({ + event_name: "NativeInferenceTokenUsageEvent", + auth_token: authToken, + org_id: billingOrgId, + model, + resolved_model: resolvedModel, + org_name: resolvedOrgName, + input_tokens: inputTokens, + output_tokens: outputTokens, + cached_input_tokens: cachedInputTokens, + cache_write_input_tokens: cacheWriteInputTokens, + }); + } catch (error) { + console.warn("billing callback failed", error); + } + } controller.terminate(); }, }); From b8426af7692e79f508d44fcf4e5b329bb8f0f211 Mon Sep 17 00:00:00 2001 From: Ken Jiang Date: Wed, 1 Apr 2026 20:10:36 -0400 Subject: [PATCH 2/5] add brain model guard --- apis/cloudflare/src/billing.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/apis/cloudflare/src/billing.ts b/apis/cloudflare/src/billing.ts index 02a55b8a..8398a96c 100644 --- a/apis/cloudflare/src/billing.ts +++ b/apis/cloudflare/src/billing.ts @@ -3,6 +3,10 @@ import { type BillingEvent } from "@braintrust/proxy"; const DEFAULT_BILLING_TELEMETRY_URL = "https://api.braintrust.dev/billing/telemetry/ingest"; +function isBrainModel(model: string): boolean { + return model.startsWith("brain-"); +} + function buildPayloadEvent(event: BillingEvent) { if (!event.org_id) { console.warn("billing event skipped: missing org_id"); @@ -12,6 +16,10 @@ function buildPayloadEvent(event: BillingEvent) { console.warn("billing event skipped: missing model"); return null; } + // Skip non-brain models since braintrust only hosts brain models. + if (!isBrainModel(event.model)) { + return null; + } if (!event.resolved_model) { console.warn("billing event skipped: missing resolved_model"); return null; From d3880ec4893e3ecff5f22cf56a636674a968edff Mon Sep 17 00:00:00 2001 From: Ken Jiang Date: Thu, 2 Apr 2026 13:25:41 -0400 Subject: [PATCH 3/5] move brain model call up --- apis/cloudflare/src/billing.ts | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/apis/cloudflare/src/billing.ts b/apis/cloudflare/src/billing.ts index 8398a96c..48d7aefc 100644 --- a/apis/cloudflare/src/billing.ts +++ b/apis/cloudflare/src/billing.ts @@ -8,10 +8,6 @@ function isBrainModel(model: string): boolean { } function buildPayloadEvent(event: BillingEvent) { - if (!event.org_id) { - console.warn("billing event skipped: missing org_id"); - return null; - } if (!event.model) { console.warn("billing event skipped: missing model"); return null; @@ -20,6 +16,11 @@ function buildPayloadEvent(event: BillingEvent) { if (!isBrainModel(event.model)) { return null; } + + if (!event.org_id) { + console.warn("billing event skipped: missing org_id"); + return null; + } if (!event.resolved_model) { console.warn("billing event skipped: missing resolved_model"); return null; From b15dfb2bea7291a1733898ef876c06fa99edd8b2 Mon Sep 17 00:00:00 2001 From: Ken Jiang Date: Mon, 6 Apr 2026 23:33:47 -0400 Subject: [PATCH 4/5] check via braintrust secret type --- apis/cloudflare/src/billing.ts | 7 +--- packages/proxy/src/proxy.ts | 7 ++++ packages/proxy/src/util.test.ts | 62 ++++++++++++++++++++++++++++++++- packages/proxy/src/util.ts | 22 ++++++++++++ 4 files changed, 91 insertions(+), 7 deletions(-) diff --git a/apis/cloudflare/src/billing.ts b/apis/cloudflare/src/billing.ts index 48d7aefc..fa51211b 100644 --- a/apis/cloudflare/src/billing.ts +++ b/apis/cloudflare/src/billing.ts @@ -3,17 +3,12 @@ import { type BillingEvent } from "@braintrust/proxy"; const DEFAULT_BILLING_TELEMETRY_URL = "https://api.braintrust.dev/billing/telemetry/ingest"; -function isBrainModel(model: string): boolean { - return model.startsWith("brain-"); -} - function buildPayloadEvent(event: BillingEvent) { if (!event.model) { console.warn("billing event skipped: missing model"); return null; } - // Skip non-brain models since braintrust only hosts brain models. - if (!isBrainModel(event.model)) { + if (!event.isNativeInference) { return null; } diff --git a/packages/proxy/src/proxy.ts b/packages/proxy/src/proxy.ts index eb37f37f..91a1b43b 100644 --- a/packages/proxy/src/proxy.ts +++ b/packages/proxy/src/proxy.ts @@ -102,6 +102,7 @@ import { flattenChunksArray, getRandomInt, isEmpty, + isNativeInferenceSecret, isObject, ModelResponse, parseAuthHeader, @@ -194,6 +195,7 @@ export type BillingEvent = { event_name: "NativeInferenceTokenUsageEvent"; auth_token: string; org_id?: string; + isNativeInference?: boolean; model?: string | null; resolved_model?: string | null; org_name?: string; @@ -1151,6 +1153,7 @@ export async function proxyV1({ event_name: "NativeInferenceTokenUsageEvent", auth_token: authToken, org_id: billingOrgId, + isNativeInference: nativeInferenceSecret !== undefined, model, resolved_model: resolvedModel, org_name: resolvedOrgName, @@ -1330,6 +1333,10 @@ async function fetchModelLoop( } const initialIdx = getRandomInt(secrets.length); + const nativeInferenceSecret = + model === null || model === undefined + ? undefined + : secrets.find((secret) => isNativeInferenceSecret(secret, model)); let proxyResponse: ModelResponse | null = null; let secretName: string | null | undefined = null; let lastException = null; diff --git a/packages/proxy/src/util.test.ts b/packages/proxy/src/util.test.ts index 5d047fa2..4e4299e5 100644 --- a/packages/proxy/src/util.test.ts +++ b/packages/proxy/src/util.test.ts @@ -1,5 +1,10 @@ import { describe, expect, test } from "vitest"; -import { parseFileMetadataFromUrl, _urljoin } from "./util"; +import { type APISecret } from "@schema"; +import { + isNativeInferenceSecret, + parseFileMetadataFromUrl, + _urljoin, +} from "./util"; describe("parseFileMetadataFromUrl", () => { test("handles basic URLs", () => { @@ -222,3 +227,58 @@ test("_urljoin", () => { expect(_urljoin()).toBe(""); expect(_urljoin("a")).toBe("a"); }); + +describe("isNativeInferenceSecret", () => { + test("treats braintrust secrets without custom models as native inference", () => { + const secret: APISecret = { + type: "braintrust", + secret: "test-secret", + metadata: { + api_base: "http://localhost:8001/native-inference-test", + }, + }; + + expect(isNativeInferenceSecret(secret, "brain-test-native-1")).toBe(true); + }); + + test("treats custom model secrets as native inference only for matching models", () => { + const secret: APISecret = { + type: "openai", + secret: "test-secret", + metadata: { + customModels: { + "brain-test-native-1": { + format: "openai", + flavor: "chat", + }, + }, + }, + }; + + expect(isNativeInferenceSecret(secret, "brain-test-native-1")).toBe(true); + expect(isNativeInferenceSecret(secret, "brain-test-native-2")).toBe(false); + }); + + test("treats non-braintrust secrets without matching custom models as non-native", () => { + const secret: APISecret = { + type: "openai", + secret: "test-secret", + metadata: { + api_base: "https://api.openai.com/v1", + }, + }; + + expect(isNativeInferenceSecret(secret, "brain-test-native-1")).toBe(false); + }); + + test("returns false when the model is missing", () => { + const secret: APISecret = { + type: "braintrust", + secret: "test-secret", + metadata: null, + }; + + expect(isNativeInferenceSecret(secret, null)).toBe(false); + expect(isNativeInferenceSecret(secret, undefined)).toBe(false); + }); +}); diff --git a/packages/proxy/src/util.ts b/packages/proxy/src/util.ts index 5fc68b46..1c824fbe 100644 --- a/packages/proxy/src/util.ts +++ b/packages/proxy/src/util.ts @@ -1,4 +1,5 @@ import contentDisposition from "content-disposition"; +import { type APISecret } from "@schema"; export interface ModelResponse { stream: ReadableStream | null; response: Response; @@ -79,6 +80,27 @@ export function isEmpty(a: any): a is null | undefined { return a === undefined || a === null; } +export function isNativeInferenceSecret( + secret: APISecret, + model: string | null | undefined, +): boolean { + if (model === null || model === undefined) { + return false; + } + + const customModels = secret.metadata?.customModels; + if ( + customModels === null || + customModels === undefined || + typeof customModels !== "object" || + Array.isArray(customModels) + ) { + return secret.type === "braintrust"; + } + + return Object.prototype.hasOwnProperty.call(customModels, model); +} + export function getRandomInt(max: number) { return Math.floor(Math.random() * max); } From 4053567be43908682eb6f47a8be50aa6f835ca10 Mon Sep 17 00:00:00 2001 From: Ken Jiang Date: Tue, 7 Apr 2026 10:16:20 -0400 Subject: [PATCH 5/5] fixx --- packages/proxy/src/proxy.ts | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/packages/proxy/src/proxy.ts b/packages/proxy/src/proxy.ts index 91a1b43b..fa090ab1 100644 --- a/packages/proxy/src/proxy.ts +++ b/packages/proxy/src/proxy.ts @@ -588,6 +588,10 @@ export async function proxyV1({ } let responseFailed = false; + let isNativeInference = false; + let secretName: string | null | undefined = undefined; + let proxyResponse: Response | undefined = undefined; + let proxyStream: ReadableStream | null = null; const overridenHeaders: string[] = []; const setOverriddenHeader = (name: string, value: string) => { @@ -612,10 +616,7 @@ export async function proxyV1({ ); } - const { - modelResponse: { response: proxyResponse, stream: proxyStream }, - secretName, - } = await fetchModelLoop( + const fetchResult = await fetchModelLoop( logHistogram, method, url, @@ -690,6 +691,10 @@ export async function proxyV1({ signal, fetch, ); + proxyResponse = fetchResult.modelResponse.response; + proxyStream = fetchResult.modelResponse.stream; + secretName = fetchResult.secretName; + isNativeInference = fetchResult.isNativeInference; stream = proxyStream; if (!proxyResponse.ok) { @@ -1153,7 +1158,7 @@ export async function proxyV1({ event_name: "NativeInferenceTokenUsageEvent", auth_token: authToken, org_id: billingOrgId, - isNativeInference: nativeInferenceSecret !== undefined, + isNativeInference, model, resolved_model: resolvedModel, org_name: resolvedOrgName, @@ -1279,7 +1284,11 @@ async function fetchModelLoop( model: string | null, signal: AbortSignal | undefined, fetch: FetchFn, -): Promise<{ modelResponse: ModelResponse; secretName?: string | null }> { +): Promise<{ + modelResponse: ModelResponse; + secretName?: string | null; + isNativeInference: boolean; +}> { // model is now passed as a parameter // TODO: Make this smarter. For now, just pick a random one. @@ -1634,6 +1643,7 @@ async function fetchModelLoop( response: proxyResponse.response, }, secretName, + isNativeInference: nativeInferenceSecret !== undefined, }; }