Skip to content

Commit 7906cc2

Browse files
authored
fix(anthropic): move ephemeral cache creation fields to metadata (#205)
The ephemeral cache creation breakdown fields (ephemeral_5m_input_tokens, ephemeral_1h_input_tokens) were being recorded as metrics, which required adding them to the standard metrics allowlists. Move them to span metadata instead, since they are informational breakdowns of the already-tracked prompt_cache_creation_tokens metric. This fixes the CI failure in test_standard_metrics_crawl where the integration tests reject unrecognized standard metric names.
1 parent 0ced496 commit 7906cc2

2 files changed

Lines changed: 15 additions & 14 deletions

File tree

py/src/braintrust/integrations/anthropic/_utils.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ def __getattr__(self, name: str) -> Any:
2222
("cache_creation_input_tokens", "prompt_cache_creation_tokens"),
2323
)
2424

25-
_ANTHROPIC_CACHE_CREATION_METRIC_FIELDS = (
26-
("ephemeral_5m_input_tokens", "prompt_cache_creation_ephemeral_5m_tokens"),
27-
("ephemeral_1h_input_tokens", "prompt_cache_creation_ephemeral_1h_tokens"),
25+
_ANTHROPIC_CACHE_CREATION_METADATA_FIELDS = (
26+
("ephemeral_5m_input_tokens", "cache_creation_ephemeral_5m_input_tokens"),
27+
("ephemeral_1h_input_tokens", "cache_creation_ephemeral_1h_input_tokens"),
2828
)
2929

3030
_ANTHROPIC_USAGE_METADATA_FIELDS = frozenset(
@@ -76,16 +76,17 @@ def extract_anthropic_usage(usage: Any) -> tuple[dict[str, float], dict[str, Any
7676
return {}, {}
7777

7878
metrics: dict[str, float] = {}
79+
metadata: dict[str, Any] = {}
7980
for source_name, metric_name in _ANTHROPIC_USAGE_METRIC_FIELDS:
8081
_set_numeric_metric(metrics, metric_name, usage.get(source_name))
8182

8283
cache_creation = _try_to_dict(usage.get("cache_creation"))
8384
cache_creation_breakdown: list[float] = []
8485
if cache_creation is not None:
85-
for source_name, metric_name in _ANTHROPIC_CACHE_CREATION_METRIC_FIELDS:
86+
for source_name, metadata_key in _ANTHROPIC_CACHE_CREATION_METADATA_FIELDS:
8687
value = cache_creation.get(source_name)
87-
_set_numeric_metric(metrics, metric_name, value)
8888
if is_numeric(value):
89+
metadata[metadata_key] = int(value)
8990
cache_creation_breakdown.append(float(value))
9091

9192
server_tool_use = _try_to_dict(usage.get("server_tool_use"))
@@ -105,9 +106,7 @@ def extract_anthropic_usage(usage: Any) -> tuple[dict[str, float], dict[str, Any
105106
metrics["prompt_tokens"] = total_prompt_tokens
106107
metrics["tokens"] = total_prompt_tokens + metrics.get("completion_tokens", 0)
107108

108-
metadata = {
109-
f"usage_{name}": value
110-
for name, value in usage.items()
111-
if name in _ANTHROPIC_USAGE_METADATA_FIELDS and value is not None
112-
}
109+
for name, value in usage.items():
110+
if name in _ANTHROPIC_USAGE_METADATA_FIELDS and value is not None:
111+
metadata[f"usage_{name}"] = value
113112
return metrics, metadata

py/src/braintrust/integrations/anthropic/test_anthropic.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -608,8 +608,8 @@ def test_setup_creates_spans(memory_logger):
608608
)
609609
assert metrics["completion_tokens"] == usage.output_tokens
610610
assert metrics["prompt_cache_creation_tokens"] == usage.cache_creation_input_tokens
611-
assert metrics["prompt_cache_creation_ephemeral_5m_tokens"] == ephemeral_5m
612-
assert metrics["prompt_cache_creation_ephemeral_1h_tokens"] == ephemeral_1h
611+
assert span["metadata"]["cache_creation_ephemeral_5m_input_tokens"] == ephemeral_5m
612+
assert span["metadata"]["cache_creation_ephemeral_1h_input_tokens"] == ephemeral_1h
613613
assert "service_tier" not in metrics
614614

615615

@@ -634,12 +634,14 @@ def test_extract_anthropic_usage_preserves_nested_numeric_fields():
634634
assert metrics["completion_tokens"] == 12
635635
assert metrics["tokens"] == 27
636636
assert metrics["prompt_cache_creation_tokens"] == 7
637-
assert metrics["prompt_cache_creation_ephemeral_5m_tokens"] == 3
638-
assert metrics["prompt_cache_creation_ephemeral_1h_tokens"] == 4
637+
assert metadata["cache_creation_ephemeral_5m_input_tokens"] == 3
638+
assert metadata["cache_creation_ephemeral_1h_input_tokens"] == 4
639639
assert metrics["server_tool_use_web_search_requests"] == 2
640640
assert metrics["server_tool_use_web_fetch_requests"] == 1
641641
assert "service_tier" not in metrics
642642
assert metadata == {
643+
"cache_creation_ephemeral_5m_input_tokens": 3,
644+
"cache_creation_ephemeral_1h_input_tokens": 4,
643645
"usage_service_tier": "standard",
644646
"usage_inference_geo": "not_available",
645647
}

0 commit comments

Comments
 (0)