Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
285 changes: 278 additions & 7 deletions src/api_type/anthropic.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::collections::HashMap;

use async_trait::async_trait;
use axum::body::Body;
use serde::Deserialize;
Expand All @@ -9,10 +11,53 @@ use crate::request_metadata::RequestInspectionMetadata;

use super::{ApiTypeHandler, Inspector, ResponseMetadata, ResponseMetadataInspector};

#[derive(Debug, Deserialize)]
struct CacheCreation {
ephemeral_5m_input_tokens: Option<u64>,
ephemeral_1h_input_tokens: Option<u64>,
}

#[derive(Debug, Deserialize)]
struct Usage {
input_tokens: Option<u64>,
output_tokens: Option<u64>,
cache_creation_input_tokens: Option<u64>,
cache_read_input_tokens: Option<u64>,
cache_creation: Option<CacheCreation>,
}

fn build_cache_creation_map(
usage: &Usage,
cache_ttl_hint: Option<u64>,
) -> Option<HashMap<String, u64>> {
if let Some(cc) = &usage.cache_creation {
let mut map = HashMap::new();
if let Some(tokens) = cc.ephemeral_5m_input_tokens
&& tokens > 0
{
map.insert("5m".to_owned(), tokens);
}
if let Some(tokens) = cc.ephemeral_1h_input_tokens
&& tokens > 0
{
map.insert("1h".to_owned(), tokens);
}
if map.is_empty() {
return None;
} else {
return Some(map);
}
} else if let Some(tokens) = usage.cache_creation_input_tokens
&& tokens > 0
{
let duration = match cache_ttl_hint {
Some(300) => "5m",
Some(3600) => "1h",
_ => "unknown",
};
return Some(HashMap::from([(duration.to_owned(), tokens)]));
}
None
}

// Payload for both non-streaming and SSE.
Expand All @@ -27,9 +72,60 @@ struct MessageStartData {
message: AnthropicDataWithUsage,
}

#[derive(Debug, Deserialize)]
struct Message {
content: Option<serde_json::Value>,
}

#[derive(Debug, Deserialize)]
struct AnthropicRequestBody {
model: Option<String>,
system: Option<serde_json::Value>,
messages: Option<Vec<Message>>,
}

/// Collect cache_control TTL values from a JSON value that may be a single
/// content block or an array of content blocks.
fn collect_cache_ttls(value: &serde_json::Value, ttls: &mut Vec<u64>) {
let blocks: Vec<&serde_json::Value> = if let Some(arr) = value.as_array() {
arr.iter().collect()
} else if value.is_object() {
vec![value]
} else {
return;
};
for block in blocks {
if let Some(cc) = block.get("cache_control") {
// cache_control block present → default TTL is 300s when ttl field absent
let ttl = cc.get("ttl").and_then(|v| v.as_u64()).unwrap_or(300);
ttls.push(ttl);
}
}
}

/// Extract a uniform cache TTL from the request body.
/// Returns Some(ttl) if all cache_control blocks share the same TTL, None if mixed or absent.
fn extract_cache_ttl(body: &AnthropicRequestBody) -> Option<u64> {
let mut ttls = Vec::new();
if let Some(system) = &body.system {
collect_cache_ttls(system, &mut ttls);
}
if let Some(messages) = &body.messages {
for msg in messages {
if let Some(content) = &msg.content {
collect_cache_ttls(content, &mut ttls);
}
}
}
if ttls.is_empty() {
return None;
}
let first = ttls[0];
if ttls.iter().all(|&t| t == first) {
Some(first)
} else {
None
}
}

pub struct AnthropicMessagesHandler;
Expand Down Expand Up @@ -59,7 +155,13 @@ impl ApiTypeHandler for AnthropicMessagesHandler {
}
};
let metadata = match serde_json::from_slice::<AnthropicRequestBody>(&bytes) {
Ok(body) => RequestInspectionMetadata { model: body.model },
Ok(body) => {
let cache_ttl_secs = extract_cache_ttl(&body);
RequestInspectionMetadata {
model: body.model,
cache_ttl_secs,
}
}
Err(e) => {
tracing::error!("Failed to parse Anthropic request body: {e}");
RequestInspectionMetadata::default()
Expand All @@ -73,13 +175,17 @@ impl ApiTypeHandler for AnthropicMessagesHandler {
&self,
_status: u16,
headers: &http::HeaderMap,
request_metadata: &RequestInspectionMetadata,
) -> ResponseMetadataInspector {
if is_event_stream(headers) {
Box::new(ProtocolInspector::new(
SseProtocol::new(),
AnthropicSseInspector {
input_tokens: None,
output_tokens: None,
cache_creation_tokens: None,
cache_read_input_tokens: None,
cache_ttl_hint: request_metadata.cache_ttl_secs,
},
))
} else {
Expand All @@ -98,10 +204,12 @@ fn is_event_stream(headers: &http::HeaderMap) -> bool {
.is_some_and(|ct| ct.starts_with("text/event-stream"))
}

#[derive(Default)]
pub(crate) struct AnthropicSseInspector {
pub(crate) input_tokens: Option<u64>,
pub(crate) output_tokens: Option<u64>,
pub(crate) cache_creation_tokens: Option<HashMap<String, u64>>,
pub(crate) cache_read_input_tokens: Option<u64>,
pub(crate) cache_ttl_hint: Option<u64>,
}

#[derive(Debug, Deserialize)]
Expand All @@ -123,6 +231,9 @@ impl AnthropicSseInspector {
"message_start" => {
if let Ok(msg) = serde_json::from_str::<MessageStartData>(data) {
self.input_tokens = msg.message.usage.input_tokens;
self.cache_creation_tokens =
build_cache_creation_map(&msg.message.usage, self.cache_ttl_hint);
self.cache_read_input_tokens = msg.message.usage.cache_read_input_tokens;
}
}
"message_delta" => {
Expand All @@ -146,11 +257,12 @@ impl Inspector<SseEvent> for AnthropicSseInspector {
if self.input_tokens.is_none() && self.output_tokens.is_none() {
return Err(anyhow::anyhow!("no token usage found in SSE stream"));
}
let response_metadata = ResponseMetadata {
Ok(ResponseMetadata {
input_tokens: self.input_tokens,
output_tokens: self.output_tokens,
};
Ok(response_metadata)
cache_creation_tokens: self.cache_creation_tokens,
cache_read_input_tokens: self.cache_read_input_tokens,
})
}
}

Expand All @@ -173,9 +285,12 @@ impl Inspector<TextBody> for AnthropicJsonInspector {

fn parse_anthropic_json(data: &[u8]) -> Result<ResponseMetadata, anyhow::Error> {
let parsed = serde_json::from_slice::<AnthropicDataWithUsage>(data)?;
let cache_creation_tokens = build_cache_creation_map(&parsed.usage, None);
Ok(ResponseMetadata {
input_tokens: parsed.usage.input_tokens,
output_tokens: parsed.usage.output_tokens,
cache_creation_tokens,
cache_read_input_tokens: parsed.usage.cache_read_input_tokens,
})
}

Expand All @@ -191,16 +306,31 @@ mod tests {
}

fn make_json_inspector() -> ResponseMetadataInspector {
AnthropicMessagesHandler.response_inspector(200, &http::HeaderMap::new())
AnthropicMessagesHandler.response_inspector(
200,
&http::HeaderMap::new(),
&RequestInspectionMetadata::default(),
)
}

fn make_sse_inspector() -> ResponseMetadataInspector {
make_sse_inspector_with_hint(None)
}

fn make_sse_inspector_with_hint(cache_ttl_secs: Option<u64>) -> ResponseMetadataInspector {
let mut headers = http::HeaderMap::new();
headers.insert(
http::header::CONTENT_TYPE,
"text/event-stream".parse().unwrap(),
);
AnthropicMessagesHandler.response_inspector(200, &headers)
AnthropicMessagesHandler.response_inspector(
200,
&headers,
&RequestInspectionMetadata {
cache_ttl_secs,
..Default::default()
},
)
}

#[tokio::test]
Expand Down Expand Up @@ -311,4 +441,145 @@ data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"outpu
assert_eq!(metadata.input_tokens, Some(25));
assert_eq!(metadata.output_tokens, Some(150));
}

// --- Cache token tests ---
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe move to another module?


#[tokio::test]
async fn inspect_request_uniform_ttl() {
let body = br#"{
"model": "claude-sonnet-4-20250514",
"system": [{"type": "text", "text": "You are helpful.", "cache_control": {"type": "ephemeral", "ttl": 3600}}],
"messages": [{"role": "user", "content": [{"type": "text", "text": "Hi", "cache_control": {"type": "ephemeral", "ttl": 3600}}]}]
}"#;
let request = make_request(body);
let (result, _) = AnthropicMessagesHandler.inspect_request(request).await;
let metadata = result.unwrap();
assert_eq!(metadata.cache_ttl_secs, Some(3600));
}

#[tokio::test]
async fn inspect_request_mixed_ttls() {
let body = br#"{
"model": "claude-sonnet-4-20250514",
"system": [{"type": "text", "text": "You are helpful.", "cache_control": {"type": "ephemeral", "ttl": 300}}],
"messages": [{"role": "user", "content": [{"type": "text", "text": "Hi", "cache_control": {"type": "ephemeral", "ttl": 3600}}]}]
}"#;
let request = make_request(body);
let (result, _) = AnthropicMessagesHandler.inspect_request(request).await;
let metadata = result.unwrap();
assert_eq!(metadata.cache_ttl_secs, None);
}

#[tokio::test]
async fn inspect_request_default_ttl() {
let body = br#"{
"model": "claude-sonnet-4-20250514",
"system": [{"type": "text", "text": "You are helpful.", "cache_control": {"type": "ephemeral"}}]
}"#;
let request = make_request(body);
let (result, _) = AnthropicMessagesHandler.inspect_request(request).await;
let metadata = result.unwrap();
assert_eq!(metadata.cache_ttl_secs, Some(300));
}

#[tokio::test]
async fn inspect_request_no_cache_control() {
let body = br#"{
"model": "claude-sonnet-4-20250514",
"messages": [{"role": "user", "content": "Hi"}]
}"#;
let request = make_request(body);
let (result, _) = AnthropicMessagesHandler.inspect_request(request).await;
let metadata = result.unwrap();
assert_eq!(metadata.cache_ttl_secs, None);
}

#[test]
fn inspect_json_cache_creation_breakdown() {
let body = br#"{
"id": "msg_123",
"type": "message",
"usage": {
"input_tokens": 100,
"output_tokens": 50,
"cache_creation_input_tokens": 348,
"cache_read_input_tokens": 1800,
"cache_creation": {
"ephemeral_5m_input_tokens": 248,
"ephemeral_1h_input_tokens": 100
}
}
}"#;
let mut inspector = make_json_inspector();
inspector.feed(body);
let metadata = inspector.finish().unwrap();
let map = metadata.cache_creation_tokens.unwrap();
assert_eq!(map.get("5m"), Some(&248));
assert_eq!(map.get("1h"), Some(&100));
assert_eq!(metadata.cache_read_input_tokens, Some(1800));
}

#[test]
fn inspect_json_no_cache_fields() {
let body = br#"{
"id": "msg_123",
"type": "message",
"usage": {"input_tokens": 25, "output_tokens": 150}
}"#;
let mut inspector = make_json_inspector();
inspector.feed(body);
let metadata = inspector.finish().unwrap();
assert_eq!(metadata.cache_creation_tokens, None);
assert_eq!(metadata.cache_read_input_tokens, None);
}

#[test]
fn inspect_sse_cache_with_ttl_hint_5m() {
let body = br#"event: message_start
data: {"type":"message_start","message":{"id":"msg_123","type":"message","role":"assistant","content":[],"model":"claude-sonnet-4-20250514","usage":{"input_tokens":25,"output_tokens":1,"cache_creation_input_tokens":348,"cache_read_input_tokens":0}}}

event: message_delta
data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":150}}

"#;
let mut inspector = make_sse_inspector_with_hint(Some(300));
inspector.feed(body);
let metadata = inspector.finish().unwrap();
let map = metadata.cache_creation_tokens.unwrap();
assert_eq!(map.get("5m"), Some(&348));
assert_eq!(metadata.cache_read_input_tokens, Some(0));
}

#[test]
fn inspect_sse_cache_with_ttl_hint_1h() {
let body = br#"event: message_start
data: {"type":"message_start","message":{"id":"msg_123","type":"message","role":"assistant","content":[],"model":"claude-sonnet-4-20250514","usage":{"input_tokens":25,"output_tokens":1,"cache_creation_input_tokens":500,"cache_read_input_tokens":200}}}

event: message_delta
data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":10}}

"#;
let mut inspector = make_sse_inspector_with_hint(Some(3600));
inspector.feed(body);
let metadata = inspector.finish().unwrap();
let map = metadata.cache_creation_tokens.unwrap();
assert_eq!(map.get("1h"), Some(&500));
assert_eq!(metadata.cache_read_input_tokens, Some(200));
}

#[test]
fn inspect_sse_cache_no_ttl_hint() {
let body = br#"event: message_start
data: {"type":"message_start","message":{"id":"msg_123","type":"message","role":"assistant","content":[],"model":"claude-sonnet-4-20250514","usage":{"input_tokens":25,"output_tokens":1,"cache_creation_input_tokens":348,"cache_read_input_tokens":0}}}

event: message_delta
data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":150}}

"#;
let mut inspector = make_sse_inspector_with_hint(None);
inspector.feed(body);
let metadata = inspector.finish().unwrap();
let map = metadata.cache_creation_tokens.unwrap();
assert_eq!(map.get("unknown"), Some(&348));
}
}
Loading
Loading