From 787c50fd7c3f2628ab072e61e94646d01e516212 Mon Sep 17 00:00:00 2001
From: matthiasschalk <schalk.matthias@gmail.com>
Date: Sun, 17 May 2026 06:48:40 +0200
Subject: [PATCH 1/6] fix(types,dashboard): exclude reasoning from token totals
 (ccusage parity)

ccusage never adds reasoning into its total: for Codex the reasoning
tokens are already counted inside output_tokens (OpenAI semantics), and
ccusage's Codex loader uses the reported total_tokens (=== input+output).
totalTokenCount() summed reasoning on top, double-counting every Codex
event in the dashboard total, heatmap, topModel and every chart.

Drop reasoning from totalTokenCount and every client-side token sum in
dashboard.ts. Claude is unaffected (reasoning is always 0 there). The
reasoning field stays on TokenCounts and is shown as a non-additive
informational row. Adds types.test.ts and a Codex parity invariant in
codex.test.ts (totalTokenCount == reported total_tokens for well-formed
logs).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/dashboard.ts          | 19 +++++++++++------
 src/loaders/codex.test.ts | 12 +++++++++++
 src/types.test.ts         | 45 +++++++++++++++++++++++++++++++++++++++
 src/types.ts              |  9 +++++++-
 4 files changed, 78 insertions(+), 7 deletions(-)
 create mode 100644 src/types.test.ts
diff --git a/src/dashboard.ts b/src/dashboard.ts
index d2c83af..1b8be10 100644
--- a/src/dashboard.ts
+++ b/src/dashboard.ts
@@ -410,7 +410,9 @@ function addTokens(a, b) {
   };
 }
 function sumTokens(tokens) {
-  return tokens.input + tokens.output + tokens.cacheCreation + tokens.cacheRead + tokens.reasoning;
+  // reasoning is informational only — already inside output for Codex; never
+  // added to the total (mirrors server-side totalTokenCount / ccusage).
+  return tokens.input + tokens.output + tokens.cacheCreation + tokens.cacheRead;
 }
 function unique(values) {
   return [...new Set(values)];
@@ -1042,7 +1044,7 @@ function escapeHtml(s) {
 
 function totalProjectTokens(p) {
   const t = p.tokens || {};
-  return (t.input || 0) + (t.output || 0) + (t.cacheCreation || 0) + (t.cacheRead || 0) + (t.reasoning || 0);
+  return (t.input || 0) + (t.output || 0) + (t.cacheCreation || 0) + (t.cacheRead || 0);
 }
 
 function renderProjects(data) {
@@ -1120,7 +1122,7 @@ function renderProjects(data) {
         tbody.appendChild(emptyTr);
       } else {
         for (const s of perSource) {
-          const tot = (s.tokens.input||0) + (s.tokens.output||0) + (s.tokens.cacheCreation||0) + (s.tokens.cacheRead||0) + (s.tokens.reasoning||0);
+          const tot = (s.tokens.input||0) + (s.tokens.output||0) + (s.tokens.cacheCreation||0) + (s.tokens.cacheRead||0);
           const chip =
             '<span class="inline-block px-1.5 py-0.5 rounded text-xs" style="background:' +
             SOURCE_COLORS[s.source] + '22;color:' + SOURCE_COLORS[s.source] + '">' +
@@ -1562,7 +1564,8 @@ function buildTokensPopup(data) {
     { label: 'Cache Read', val: tok.cacheRead, color: '#34d399' },
     { label: 'Cache Write', val: tok.cacheCreation, color: '#a78bfa' }
   ];
-  if (tok.reasoning > 0) segs.push({ label: 'Reasoning', val: tok.reasoning, color: '#fbbf24' });
+  // reasoning is NOT a segment: for Codex it is already inside output, so the
+  // donut/total stay equal to ccusage. It is surfaced as an info row below.
   const dSegs = segs.map(s => ({ ...s, pct: s.val / total }));
 
   let html = '';
@@ -1583,6 +1586,10 @@ function buildTokensPopup(data) {
     + '</div>'
     + '</div>';
 
+  if (tok.reasoning > 0) {
+    html += pRow('Reasoning', fmt(tok.reasoning), 'already counted inside Output');
+  }
+
   const peakDay = data.daily.reduce((b, d) => sumTokens(d.tokens) > sumTokens(b.tokens) ? d : b, data.daily[0]);
   if (peakDay) {
     html += pSection('Peak Usage');
@@ -1686,7 +1693,7 @@ function buildCostPerDayPopup(data) {
 function buildTopModelPopup(data) {
   const modelsWithTok = data.byModel.map(m => ({
     m,
-    tok: m.tokens.input + m.tokens.output + m.tokens.cacheRead + m.tokens.cacheCreation + (m.tokens.reasoning || 0)
+    tok: m.tokens.input + m.tokens.output + m.tokens.cacheRead + m.tokens.cacheCreation
   }));
   modelsWithTok.sort((a, b) => b.tok - a.tok);
   const totalTok = Math.max(modelsWithTok.reduce((s, r) => s + r.tok, 0), 1);
@@ -1768,7 +1775,7 @@ function buildSourceChartPopup(data) {
   const sourceTok = data.bySource.map(s => ({
     source: s.source,
     tokens: s.tokens,
-    total: s.tokens.input + s.tokens.output + s.tokens.cacheRead + s.tokens.cacheCreation + (s.tokens.reasoning || 0)
+    total: s.tokens.input + s.tokens.output + s.tokens.cacheRead + s.tokens.cacheCreation
   }));
   const total = Math.max(sourceTok.reduce((a, b) => a + b.total, 0), 1);
   const dSegs = sourceTok.map(s => ({
diff --git a/src/loaders/codex.test.ts b/src/loaders/codex.test.ts
index 6f5f4d9..1b94235 100644
--- a/src/loaders/codex.test.ts
+++ b/src/loaders/codex.test.ts
@@ -4,6 +4,7 @@ import { mkdtempSync, rmSync, mkdirSync, writeFileSync, utimesSync } from 'node:
 import { tmpdir } from 'node:os';
 import path from 'node:path';
 import { loadCodexEvents, loadCodexRateLimits, normalizeUsage, subtractUsage, type RawUsage } from './codex.js';
+import { totalTokenCount } from '../types.js';
 
 function makeSession(dir: string, name: string, lines: string[], mtimeSec?: number): string {
 	const file = path.join(dir, name);
@@ -321,5 +322,16 @@ describe('loadCodexEvents', () => {
 			cacheRead: 100,
 			reasoning: 5,
 		});
+
+		// ccusage parity invariant (the documented Fix #1 residual): ccusage's
+		// Codex total is the reported `total_tokens` (data-loader.ts:91,
+		// `total_tokens > 0 ? total_tokens : input + output`). We reconstruct it
+		// from the 5-field breakdown instead. Because OpenAI defines
+		// total_tokens === input_tokens + output_tokens and cached ⊆ input, our
+		// totalTokenCount (input+output+cacheCreation+cacheRead, reasoning
+		// excluded) must equal the reported total_tokens for well-formed logs.
+		// Turn 1 reported total = 1100; turn 2 reported delta total = 1310-1100.
+		assert.equal(totalTokenCount(events[0].tokens), 1100);
+		assert.equal(totalTokenCount(events[1].tokens), 1310 - 1100);
 	});
 });
diff --git a/src/types.test.ts b/src/types.test.ts
new file mode 100644
index 0000000..5f61e3e
--- /dev/null
+++ b/src/types.test.ts
@@ -0,0 +1,45 @@
+import { test, describe } from 'node:test';
+import assert from 'node:assert/strict';
+import { totalTokenCount, addTokens, emptyTokens, type TokenCounts } from './types.js';
+
+describe('totalTokenCount — ccusage parity', () => {
+  test('sums input + output + cacheCreation + cacheRead', () => {
+    const t: TokenCounts = {
+      input: 1000, output: 500, cacheCreation: 2000, cacheRead: 300, reasoning: 0,
+    };
+    // Matches ccusage getTotalTokens (_token-utils.ts): 1000+500+2000+300.
+    assert.equal(totalTokenCount(t), 3800);
+  });
+
+  test('EXCLUDES reasoning — it is informational only (already inside output for Codex)', () => {
+    const withReasoning: TokenCounts = {
+      input: 100, output: 50, cacheCreation: 0, cacheRead: 0, reasoning: 9999,
+    };
+    // ccusage never adds reasoning into its total; for Codex it lives inside
+    // `output`. Adding it here would double-count. Total must ignore it.
+    assert.equal(totalTokenCount(withReasoning), 150);
+  });
+
+  test('Claude-shaped tokens (reasoning always 0) are unaffected by the change', () => {
+    const claude: TokenCounts = {
+      input: 1234, output: 567, cacheCreation: 89, cacheRead: 4321, reasoning: 0,
+    };
+    assert.equal(totalTokenCount(claude), 1234 + 567 + 89 + 4321);
+  });
+
+  test('addTokens still tracks reasoning so it stays available for display', () => {
+    const sum = addTokens(
+      { input: 1, output: 2, cacheCreation: 3, cacheRead: 4, reasoning: 5 },
+      { input: 1, output: 2, cacheCreation: 3, cacheRead: 4, reasoning: 5 },
+    );
+    assert.equal(sum.reasoning, 10);
+    // ...but the total of the aggregate still excludes it.
+    assert.equal(totalTokenCount(sum), 2 + 4 + 6 + 8);
+  });
+
+  test('emptyTokens carries the reasoning field', () => {
+    assert.deepEqual(emptyTokens(), {
+      input: 0, output: 0, cacheCreation: 0, cacheRead: 0, reasoning: 0,
+    });
+  });
+});
diff --git a/src/types.ts b/src/types.ts
index 85b343f..d1aa23b 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -161,8 +161,15 @@ export function addTokens(a: TokenCounts, b: TokenCounts): TokenCounts {
 	};
 }
 
+// `reasoning` is informational only and is deliberately NOT part of the total.
+// For Codex/OpenAI the reasoning tokens are already counted inside `output`
+// (OpenAI semantics), so adding them would double-count. ccusage never adds
+// reasoning into its total either — apps/codex/src/data-loader.ts uses the
+// reported `total_tokens` (=== input + output). Excluding it here leaves Claude
+// totals unchanged (reasoning is always 0 there) and makes the Codex total
+// match ccusage. The `reasoning` field stays on TokenCounts for display.
 export function totalTokenCount(t: TokenCounts): number {
-	return t.input + t.output + t.cacheCreation + t.cacheRead + t.reasoning;
+	return t.input + t.output + t.cacheCreation + t.cacheRead;
 }
 
 // Returns true iff `s` parses to a finite Date. Loaders use this to drop

From ec8618378091b253d5fd38e61f35a55ed185a62d Mon Sep 17 00:00:00 2001
From: matthiasschalk <schalk.matthias@gmail.com>
Date: Sun, 17 May 2026 06:48:53 +0200
Subject: [PATCH 2/6] fix(pricing): apply LiteLLM >200k tiered pricing like
 ccusage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

calculateCost used flat per-token rates only. ccusage applies LiteLLM's
*_above_200k_tokens long-context rates per token type, per event, at the
200k threshold (packages/internal/src/pricing.ts calculateTieredCost).
Claude Code turns routinely carry cache_read > 200k, so we materially
undercharged long sessions versus ccusage.

Port calculateTieredCost faithfully and route all four token types
through it. Models without the above_200k fields stay flat — identical
to ccusage (it does not implement Gemini's 128k tier either). Extend
ModelPricing + the Sonnet 4 FALLBACK entry. Adds boundary tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/pricing.test.ts | 47 ++++++++++++++++++++++++++++
 src/pricing.ts      | 76 +++++++++++++++++++++++++++++++++++++++------
 2 files changed, 114 insertions(+), 9 deletions(-)

diff --git a/src/pricing.test.ts b/src/pricing.test.ts
index d936886..56f518e 100644
--- a/src/pricing.test.ts
+++ b/src/pricing.test.ts
@@ -34,6 +34,19 @@ mock.method(globalThis, 'fetch', async () =>
         input_cost_per_token: 10e-6,
         output_cost_per_token: 30e-6,
       },
+      // Claude-shaped tiered model: tokens above 200k (per type, per event)
+      // are billed at the *_above_200k_tokens rate. Mirrors ccusage's
+      // calculateTieredCost (packages/internal/src/pricing.ts:284).
+      'tiered-model': {
+        input_cost_per_token: 3e-6,
+        output_cost_per_token: 15e-6,
+        cache_creation_input_token_cost: 3.75e-6,
+        cache_read_input_token_cost: 0.3e-6,
+        input_cost_per_token_above_200k_tokens: 6e-6,
+        output_cost_per_token_above_200k_tokens: 22.5e-6,
+        cache_creation_input_token_cost_above_200k_tokens: 7.5e-6,
+        cache_read_input_token_cost_above_200k_tokens: 0.6e-6,
+      },
     }),
     { status: 200, headers: { 'Content-Type': 'application/json' } },
   ),
@@ -72,6 +85,40 @@ describe('calculateCost — cache pricing fallback (C11)', () => {
   });
 });
 
+describe('calculateCost — tiered >200k pricing (ccusage parity)', () => {
+  test('flat pricing below the 200k threshold', async () => {
+    const cost = await calculateCost('tiered-model', counts({
+      input: 100_000, output: 0, cacheCreation: 0, cacheRead: 0,
+    }));
+    // 100k * 3e-6 = 0.30 — no tier applied below threshold.
+    assert.equal(cost.toFixed(6), (0.3).toFixed(6));
+  });
+
+  test('input above 200k splits at the threshold (ccusage example)', async () => {
+    const cost = await calculateCost('tiered-model', counts({
+      input: 300_000, output: 0, cacheCreation: 0, cacheRead: 0,
+    }));
+    // ccusage docs example: 200k*3e-6 + 100k*6e-6 = 0.6 + 0.6 = 1.2
+    assert.equal(cost.toFixed(6), (1.2).toFixed(6));
+  });
+
+  test('each token type is tiered independently, per event', async () => {
+    const cost = await calculateCost('tiered-model', counts({
+      input: 0, output: 0, cacheCreation: 0, cacheRead: 250_000,
+    }));
+    // cacheRead: 200k*0.3e-6 + 50k*0.6e-6 = 0.06 + 0.03 = 0.09
+    assert.equal(cost.toFixed(6), (0.09).toFixed(6));
+  });
+
+  test('models WITHOUT above_200k fields stay flat even past 200k (== ccusage)', async () => {
+    const cost = await calculateCost('gpt-4o', counts({
+      input: 500_000, output: 0, cacheCreation: 0, cacheRead: 0,
+    }));
+    // No tiered field → flat: 500k * 5e-6 = 2.5 (ccusage falls back to base too).
+    assert.equal(cost.toFixed(6), (2.5).toFixed(6));
+  });
+});
+
 describe('findModelPricing — prefix lookups (C12)', () => {
   test('finds Gemini models via the gemini/ prefix', async () => {
     // Loader emits the bare model name; LiteLLM keys it with the prefix.
diff --git a/src/pricing.ts b/src/pricing.ts
index f82d595..d3bc1cc 100644
--- a/src/pricing.ts
+++ b/src/pricing.ts
@@ -8,6 +8,16 @@ type ModelPricing = {
 	output_cost_per_token?: number;
 	cache_creation_input_token_cost?: number;
 	cache_read_input_token_cost?: number;
+	// Long-context tiered rates: tokens of a given type ABOVE 200k (per event)
+	// are priced at these higher rates. LiteLLM publishes them for Claude/
+	// Anthropic models; ccusage applies exactly the 200k threshold per token
+	// type per entry (packages/internal/src/pricing.ts calculateTieredCost).
+	// Models without these fields fall back to flat pricing — identical to
+	// ccusage, which does the same (it does NOT implement Gemini's 128k tier).
+	input_cost_per_token_above_200k_tokens?: number;
+	output_cost_per_token_above_200k_tokens?: number;
+	cache_creation_input_token_cost_above_200k_tokens?: number;
+	cache_read_input_token_cost_above_200k_tokens?: number;
 };
 
 const FALLBACK_PRICES: Record<string, ModelPricing> = {
@@ -16,6 +26,12 @@ const FALLBACK_PRICES: Record<string, ModelPricing> = {
 		output_cost_per_token: 15e-6,
 		cache_read_input_token_cost: 0.3e-6,
 		cache_creation_input_token_cost: 3.75e-6,
+		// Anthropic >200k long-context rates (offline fallback only; live
+		// LiteLLM carries these verbatim and they're used the same way).
+		input_cost_per_token_above_200k_tokens: 6e-6,
+		output_cost_per_token_above_200k_tokens: 22.5e-6,
+		cache_read_input_token_cost_above_200k_tokens: 0.6e-6,
+		cache_creation_input_token_cost_above_200k_tokens: 7.5e-6,
 	},
 	'claude-opus-4-20250514': {
 		input_cost_per_token: 15e-6,
@@ -106,22 +122,64 @@ function findModelPricing(
 	return null;
 }
 
+// Faithful port of ccusage's tiered-cost helper
+// (packages/internal/src/pricing.ts:284). Tokens of a single token type, for a
+// single event, above `threshold` are billed at `tieredPrice`; the rest at
+// `basePrice`. When `tieredPrice` is absent the model is flat-priced — exactly
+// ccusage's behaviour, so non-Claude models (and Claude pre-tier) match.
+// The threshold is applied PER EVENT, before any daily/monthly aggregation,
+// because enrichCosts calls this once per UnifiedTokenEvent.
+function calculateTieredCost(
+	totalTokens: number | undefined,
+	basePrice: number | undefined,
+	tieredPrice: number | undefined,
+	threshold = 200_000,
+): number {
+	if (totalTokens == null || totalTokens <= 0) return 0;
+
+	if (totalTokens > threshold && tieredPrice != null) {
+		const tokensBelowThreshold = Math.min(totalTokens, threshold);
+		const tokensAboveThreshold = Math.max(0, totalTokens - threshold);
+
+		let tieredCost = tokensAboveThreshold * tieredPrice;
+		if (basePrice != null) tieredCost += tokensBelowThreshold * basePrice;
+		return tieredCost;
+	}
+
+	if (basePrice != null) return totalTokens * basePrice;
+	return 0;
+}
+
 export async function calculateCost(model: string, tokens: TokenCounts): Promise<number> {
 	const prices = await fetchPricing();
 	const pricing = findModelPricing(prices, model);
 	if (!pricing) return 0;
 
-	const inputCost = tokens.input * (pricing.input_cost_per_token ?? 0);
-	const outputCost = tokens.output * (pricing.output_cost_per_token ?? 0);
+	const inputCost = calculateTieredCost(
+		tokens.input,
+		pricing.input_cost_per_token,
+		pricing.input_cost_per_token_above_200k_tokens,
+	);
+	const outputCost = calculateTieredCost(
+		tokens.output,
+		pricing.output_cost_per_token,
+		pricing.output_cost_per_token_above_200k_tokens,
+	);
 	// Cache pricing is provider-specific (Anthropic charges 1.25× input for
 	// writes / 0.1× for reads; OpenAI ~0.5× for reads). When LiteLLM doesn't
-	// publish explicit cache rates for a model, fall back to 0 — falling back
-	// to the input rate would inflate cache-read cost up to 10× (Anthropic)
-	// and silently misprice every Claude Code session.
-	const cacheCreateCost =
-		tokens.cacheCreation * (pricing.cache_creation_input_token_cost ?? 0);
-	const cacheReadCost =
-		tokens.cacheRead * (pricing.cache_read_input_token_cost ?? 0);
+	// publish explicit cache rates for a model, calculateTieredCost returns 0 —
+	// falling back to the input rate would inflate cache-read cost up to 10×
+	// (Anthropic) and silently misprice every Claude Code session.
+	const cacheCreateCost = calculateTieredCost(
+		tokens.cacheCreation,
+		pricing.cache_creation_input_token_cost,
+		pricing.cache_creation_input_token_cost_above_200k_tokens,
+	);
+	const cacheReadCost = calculateTieredCost(
+		tokens.cacheRead,
+		pricing.cache_read_input_token_cost,
+		pricing.cache_read_input_token_cost_above_200k_tokens,
+	);
 
 	return inputCost + outputCost + cacheCreateCost + cacheReadCost;
 }

From dadd1b49b7b4aa3679e5f5c6263a0165c030a6d1 Mon Sep 17 00:00:00 2001
From: matthiasschalk <schalk.matthias@gmail.com>
Date: Sun, 17 May 2026 06:51:07 +0200
Subject: [PATCH 3/6] fix(loaders/claude,store): match ccusage usage schema;
 finite-number guard
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

parseLine coerced via Number(x ?? 0): a non-numeric input_tokens became
NaN (?? only catches null/undefined), the 0/0 guard let NaN through, and
addTokens propagated it — one bad line turned the whole dashboard total
into NaN. ccusage's valibot usageDataSchema instead requires input_tokens/
output_tokens to be real numbers (entry dropped otherwise) and treats the
cache fields as v.optional(v.number()) (absent -> 0, present -> must be a
number).

Add requiredTokenNumber/optionalTokenNumber mirroring that contract (no
string coercion; present null/non-number rejects the entry; non-finite
rejected as intentional hardening). Harden store.ts isTokenCounts to
require finite numbers and use it in loadFile so a poisoned historical
line (NaN -> null on disk) is dropped instead of re-poisoning the store.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/loaders/claude.test.ts | 126 +++++++++++++++++++++++++++++++++++++
 src/loaders/claude.ts      |  39 ++++++++++--
 src/store.test.ts          |  19 ++++++
 src/store.ts               |  20 ++++--
 4 files changed, 194 insertions(+), 10 deletions(-)
 create mode 100644 src/loaders/claude.test.ts

diff --git a/src/loaders/claude.test.ts b/src/loaders/claude.test.ts
new file mode 100644
index 0000000..b24e24f
--- /dev/null
+++ b/src/loaders/claude.test.ts
@@ -0,0 +1,126 @@
+import { test, describe, before, after, beforeEach } from 'node:test';
+import assert from 'node:assert/strict';
+import { mkdtempSync, rmSync, mkdirSync, writeFileSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import path from 'node:path';
+import { loadClaudeEvents } from './claude.js';
+
+// loadClaudeEvents reads CLAUDE_CONFIG_DIR/projects/**/*.jsonl. We isolate the
+// loader cache to a throwaway dir and disable it so each case is deterministic.
+let tmpRoot: string;
+let projDir: string;
+const ORIG_CFG = process.env.CLAUDE_CONFIG_DIR;
+const ORIG_DATA = process.env.TOKENBBQ_DATA_DIR;
+const ORIG_NOCACHE = process.env.TOKENBBQ_DISABLE_LOADER_CACHE;
+
+function writeSession(name: string, objs: unknown[]): void {
+  writeFileSync(
+    path.join(projDir, name),
+    objs.map((o) => JSON.stringify(o)).join('\n') + '\n',
+    'utf-8',
+  );
+}
+
+const line = (over: Record<string, unknown> = {}): Record<string, unknown> => ({
+  timestamp: '2026-05-01T10:00:00.000Z',
+  sessionId: 's1',
+  requestId: 'req-' + Math.random().toString(36).slice(2),
+  message: {
+    id: 'msg-' + Math.random().toString(36).slice(2),
+    model: 'claude-sonnet-4-20250514',
+    usage: { input_tokens: 100, output_tokens: 50 },
+  },
+  ...over,
+});
+
+describe('loadClaudeEvents — ccusage usageDataSchema parity', () => {
+  before(() => {
+    tmpRoot = mkdtempSync(path.join(tmpdir(), 'claude-test-'));
+    projDir = path.join(tmpRoot, 'projects', 'proj');
+    mkdirSync(projDir, { recursive: true });
+    process.env.CLAUDE_CONFIG_DIR = tmpRoot;
+    process.env.TOKENBBQ_DATA_DIR = path.join(tmpRoot, '.data');
+    process.env.TOKENBBQ_DISABLE_LOADER_CACHE = '1';
+  });
+
+  after(() => {
+    if (ORIG_CFG === undefined) delete process.env.CLAUDE_CONFIG_DIR;
+    else process.env.CLAUDE_CONFIG_DIR = ORIG_CFG;
+    if (ORIG_DATA === undefined) delete process.env.TOKENBBQ_DATA_DIR;
+    else process.env.TOKENBBQ_DATA_DIR = ORIG_DATA;
+    if (ORIG_NOCACHE === undefined) delete process.env.TOKENBBQ_DISABLE_LOADER_CACHE;
+    else process.env.TOKENBBQ_DISABLE_LOADER_CACHE = ORIG_NOCACHE;
+    rmSync(tmpRoot, { recursive: true, force: true });
+  });
+
+  beforeEach(() => {
+    rmSync(projDir, { recursive: true, force: true });
+    mkdirSync(projDir, { recursive: true });
+  });
+
+  test('counts a well-formed entry and defaults absent cache fields to 0', async () => {
+    writeSession('a.jsonl', [line({
+      message: { id: 'm1', model: 'claude-sonnet-4-20250514', usage: { input_tokens: 120, output_tokens: 30 } },
+      requestId: 'r1',
+    })]);
+    const events = await loadClaudeEvents();
+    assert.equal(events.length, 1);
+    assert.deepEqual(events[0].tokens, {
+      input: 120, output: 30, cacheCreation: 0, cacheRead: 0, reasoning: 0,
+    });
+  });
+
+  test('keeps cache token fields when present', async () => {
+    writeSession('b.jsonl', [line({
+      message: {
+        id: 'm2', model: 'claude-sonnet-4-20250514',
+        usage: { input_tokens: 10, output_tokens: 5, cache_creation_input_tokens: 7, cache_read_input_tokens: 900 },
+      },
+      requestId: 'r2',
+    })]);
+    const events = await loadClaudeEvents();
+    assert.equal(events.length, 1);
+    assert.equal(events[0].tokens.cacheCreation, 7);
+    assert.equal(events[0].tokens.cacheRead, 900);
+  });
+
+  test('drops the entry when input_tokens is a string (== ccusage v.number())', async () => {
+    writeSession('c.jsonl', [line({
+      message: { id: 'm3', model: 'x', usage: { input_tokens: '100', output_tokens: 50 } },
+      requestId: 'r3',
+    })]);
+    assert.equal((await loadClaudeEvents()).length, 0);
+  });
+
+  test('drops the entry when input_tokens is missing (required field)', async () => {
+    writeSession('d.jsonl', [line({
+      message: { id: 'm4', model: 'x', usage: { output_tokens: 50 } },
+      requestId: 'r4',
+    })]);
+    assert.equal((await loadClaudeEvents()).length, 0);
+  });
+
+  test('drops the entry when a present cache field is not a number', async () => {
+    writeSession('e.jsonl', [line({
+      message: {
+        id: 'm5', model: 'x',
+        usage: { input_tokens: 10, output_tokens: 5, cache_read_input_tokens: 'lots' },
+      },
+      requestId: 'r5',
+    })]);
+    assert.equal((await loadClaudeEvents()).length, 0);
+  });
+
+  test('drops the entry when a cache field is present but null (valibot v.optional rejects null)', async () => {
+    // valibot v.optional(v.number()) only excuses an ABSENT key; a present
+    // JSON null is not a number → ccusage drops the whole entry.
+    writeSession('e2.jsonl', [line({
+      message: {
+        id: 'm5b', model: 'x',
+        usage: { input_tokens: 10, output_tokens: 5, cache_creation_input_tokens: null },
+      },
+      requestId: 'r5b',
+    })]);
+    assert.equal((await loadClaudeEvents()).length, 0);
+  });
+});
diff --git a/src/loaders/claude.ts b/src/loaders/claude.ts
index aa0404a..9d9ac31 100644
--- a/src/loaders/claude.ts
+++ b/src/loaders/claude.ts
@@ -29,6 +29,21 @@ function getClaudePaths(): string[] {
 	return candidates.filter((p) => existsSync(path.join(p, 'projects')));
 }
 
+// valibot `v.number()` (required): present and a real number, else the entry
+// is rejected. Returns null to signal "reject the whole event".
+function requiredTokenNumber(x: unknown): number | null {
+	return typeof x === 'number' && Number.isFinite(x) ? x : null;
+}
+
+// valibot `v.optional(v.number())`: only an ABSENT key (JS `undefined`) is
+// allowed to be missing → default 0. A PRESENT value must be a real number;
+// `null` (JSON null), strings, etc. are not numbers, so valibot fails the
+// parse and ccusage drops the whole entry. Returning null signals "reject".
+function optionalTokenNumber(x: unknown): number | null {
+	if (x === undefined) return 0;
+	return typeof x === 'number' && Number.isFinite(x) ? x : null;
+}
+
 function parseLine(raw: Record<string, unknown>): UnifiedTokenEvent | null {
 	if (!isValidTimestamp(raw.timestamp)) return null;
 
@@ -39,8 +54,24 @@ function parseLine(raw: Record<string, unknown>): UnifiedTokenEvent | null {
 	if (!usage) return null;
 
 	const model = String(message.model ?? 'unknown');
-	const input = Number(usage.input_tokens ?? 0);
-	const output = Number(usage.output_tokens ?? 0);
+
+	// Mirror ccusage's usageDataSchema (apps/ccusage/src/data-loader.ts:167):
+	// message.usage.input_tokens / output_tokens are required `v.number()`,
+	// the two cache fields are `v.optional(v.number())`. A required field that
+	// is absent or not a number makes ccusage drop the whole entry; an optional
+	// field absent defaults to 0 but, if present, must be a number. We coerce
+	// nothing (string "100" is rejected, just like valibot) and additionally
+	// reject non-finite numbers (Infinity from `1e999`) — intentional hardening
+	// over bare v.number(); such values can't occur in well-formed JSONL.
+	const input = requiredTokenNumber(usage.input_tokens);
+	if (input === null) return null;
+	const output = requiredTokenNumber(usage.output_tokens);
+	if (output === null) return null;
+	const cacheCreation = optionalTokenNumber(usage.cache_creation_input_tokens);
+	if (cacheCreation === null) return null;
+	const cacheRead = optionalTokenNumber(usage.cache_read_input_tokens);
+	if (cacheRead === null) return null;
+
 	if (input === 0 && output === 0) return null;
 
 	return {
@@ -51,8 +82,8 @@ function parseLine(raw: Record<string, unknown>): UnifiedTokenEvent | null {
 		tokens: {
 			input,
 			output,
-			cacheCreation: Number(usage.cache_creation_input_tokens ?? 0),
-			cacheRead: Number(usage.cache_read_input_tokens ?? 0),
+			cacheCreation,
+			cacheRead,
 			reasoning: 0,
 		},
 		costUSD: typeof raw.costUSD === 'number' ? raw.costUSD : 0,
diff --git a/src/store.test.ts b/src/store.test.ts
index 1bff687..d6f0997 100644
--- a/src/store.test.ts
+++ b/src/store.test.ts
@@ -101,6 +101,25 @@ describe('loadStore', () => {
     assert.equal(state.events.length, 1);
   });
 
+  test('rejects a NaN-poisoned token line (serializes to null on disk)', () => {
+    const good = ev({ sessionId: 'ok' });
+    const goodLine = JSON.stringify({ v: 1, ...good, eventHash: hashEvent(good) }) + '\n';
+    // A pre-fix loader could let NaN into tokens.input; JSON.stringify turns
+    // NaN into null, so the persisted line carries `"input": null`. loadStore
+    // must drop it instead of summing null/NaN into every aggregate.
+    const poisoned = ev({ sessionId: 'bad' });
+    const poisonedLine = JSON.stringify({
+      v: 1, ...poisoned,
+      tokens: { ...poisoned.tokens, input: null },
+      eventHash: 'x',
+    }) + '\n';
+    appendFileSync(legacyPath(), goodLine + poisonedLine);
+
+    const state = loadStore();
+    assert.equal(state.events.length, 1);
+    assert.equal(state.events[0].sessionId, 'ok');
+  });
+
   test('ignores a poisoned cache written by the pre-fix version', () => {
     const eventsDir = path.join(tmp, 'events');
     mkdirSync(eventsDir, { recursive: true });
diff --git a/src/store.ts b/src/store.ts
index e7b655b..3932a08 100644
--- a/src/store.ts
+++ b/src/store.ts
@@ -133,15 +133,23 @@ function sameFileSet(a: StoreFileMeta[], b: StoreFileMeta[]): boolean {
   return true;
 }
 
+function isFiniteNumber(x: unknown): x is number {
+  return typeof x === 'number' && Number.isFinite(x);
+}
+
 function isTokenCounts(v: unknown): v is UnifiedTokenEvent['tokens'] {
   if (!v || typeof v !== 'object') return false;
   const t = v as Record<string, unknown>;
+  // Require *finite* numbers, not just `typeof number`: NaN/Infinity are
+  // typeof 'number' and would poison every aggregate that sums tokens. JSON
+  // can't carry NaN (a poisoned value serializes to null), so this also
+  // rejects historical lines written before the loader-side finite guard.
   return (
-    typeof t.input === 'number' &&
-    typeof t.output === 'number' &&
-    typeof t.cacheCreation === 'number' &&
-    typeof t.cacheRead === 'number' &&
-    typeof t.reasoning === 'number'
+    isFiniteNumber(t.input) &&
+    isFiniteNumber(t.output) &&
+    isFiniteNumber(t.cacheCreation) &&
+    isFiniteNumber(t.cacheRead) &&
+    isFiniteNumber(t.reasoning)
   );
 }
 
@@ -236,7 +244,7 @@ function loadFile(file: string, into: LoadOutcome): void {
       typeof parsed.timestamp !== 'string' ||
       typeof parsed.sessionId !== 'string' ||
       typeof parsed.model !== 'string' ||
-      !parsed.tokens || typeof parsed.tokens !== 'object'
+      !isTokenCounts(parsed.tokens)
     ) {
       into.badSeen++;
       continue;

From 3856dc23fd3ac4581510f9237098209d4c60989c Mon Sep 17 00:00:00 2001
From: matthiasschalk <schalk.matthias@gmail.com>
Date: Sun, 17 May 2026 06:51:42 +0200
Subject: [PATCH 4/6] fix(loaders/claude): keep zero-token & cache-only entries
 (ccusage parity)

parseLine dropped any entry with input_tokens === 0 && output_tokens ===
0, before reading the cache fields. ccusage's schema accepts 0/0 and
calculateTotals still sums cache_creation/cache_read, so a cache-only
turn (input=0, output=0, cache_read>0) is real usage. Dropping it
undercounted tokens and cost versus ccusage. Remove the guard.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/loaders/claude.test.ts | 23 +++++++++++++++++++++++
 src/loaders/claude.ts      |  5 ++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/src/loaders/claude.test.ts b/src/loaders/claude.test.ts
index b24e24f..6454027 100644
--- a/src/loaders/claude.test.ts
+++ b/src/loaders/claude.test.ts
@@ -123,4 +123,27 @@ describe('loadClaudeEvents — ccusage usageDataSchema parity', () => {
     })]);
     assert.equal((await loadClaudeEvents()).length, 0);
   });
+
+  test('keeps a cache-only entry (input=0, output=0, cache_read>0) — ccusage parity', async () => {
+    writeSession('f.jsonl', [line({
+      message: {
+        id: 'm6', model: 'claude-sonnet-4-20250514',
+        usage: { input_tokens: 0, output_tokens: 0, cache_read_input_tokens: 1500 },
+      },
+      requestId: 'r6',
+    })]);
+    const events = await loadClaudeEvents();
+    assert.equal(events.length, 1);
+    assert.deepEqual(events[0].tokens, {
+      input: 0, output: 0, cacheCreation: 0, cacheRead: 1500, reasoning: 0,
+    });
+  });
+
+  test('keeps a pure zero-token entry (ccusage schema accepts 0/0)', async () => {
+    writeSession('g.jsonl', [line({
+      message: { id: 'm7', model: 'x', usage: { input_tokens: 0, output_tokens: 0 } },
+      requestId: 'r7',
+    })]);
+    assert.equal((await loadClaudeEvents()).length, 1);
+  });
 });
diff --git a/src/loaders/claude.ts b/src/loaders/claude.ts
index 9d9ac31..d133259 100644
--- a/src/loaders/claude.ts
+++ b/src/loaders/claude.ts
@@ -72,7 +72,10 @@ function parseLine(raw: Record<string, unknown>): UnifiedTokenEvent | null {
 	const cacheRead = optionalTokenNumber(usage.cache_read_input_tokens);
 	if (cacheRead === null) return null;
 
-	if (input === 0 && output === 0) return null;
+	// No zero-token drop: ccusage's schema accepts input_tokens/output_tokens
+	// of 0 and still sums cache_creation/cache_read (calculateTotals). A cache-
+	// only turn (input=0, output=0, cache_read>0) is real usage; dropping it
+	// here undercounted tokens and cost versus ccusage.
 
 	return {
 		source: 'claude-code',

From cc7114d7113d25b6d2736c35e65eb8ab072d57ff Mon Sep 17 00:00:00 2001
From: matthiasschalk <schalk.matthias@gmail.com>
Date: Sun, 17 May 2026 06:52:52 +0200
Subject: [PATCH 5/6] fix(loaders/claude): never dedupe entries missing
 message/request id
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The dedupe fallback key `timestamp:model:input:output` (used when
messageId or requestId was absent) could collapse genuinely distinct
ID-less turns — and it ignored cache tokens entirely — making totals
lower than ccusage. ccusage's createUniqueHash returns null when either
id is missing and isDuplicateEntry(null) === false: ID-less entries are
never deduped.

Make dedupeKey `string | null`, drop the synthetic fallback, and skip
both the seen-check and seen-insert for null keys (== ccusage
markAsProcessed(null) noop). Bump the loader CACHE_VERSION to 2 so v1
records carrying stale synthetic keys are reparsed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/loaders/cache.ts       |  6 +++++-
 src/loaders/claude.test.ts | 29 +++++++++++++++++++++++++++++
 src/loaders/claude.ts      | 18 +++++++++++++++---
 3 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/src/loaders/cache.ts b/src/loaders/cache.ts
index e063fd9..8b0bb18 100644
--- a/src/loaders/cache.ts
+++ b/src/loaders/cache.ts
@@ -3,7 +3,11 @@ import path from 'node:path';
 import { getStoreDir } from '../store.js';
 import type { Source, UnifiedTokenEvent } from '../types.js';
 
-const CACHE_VERSION = 1;
+// v2: the Claude loader's dedupeKey is now `string | null` (null for entries
+// missing messageId/requestId, never deduped — ccusage parity). v1 records
+// carry the old synthetic `ts:model:in:out` fallback string; bumping forces a
+// one-time reparse so stale synthetic keys can't suppress ID-less events.
+const CACHE_VERSION = 2;
 
 interface FileCacheEntry<T> {
 	mtimeMs: number;
diff --git a/src/loaders/claude.test.ts b/src/loaders/claude.test.ts
index 6454027..7a81a41 100644
--- a/src/loaders/claude.test.ts
+++ b/src/loaders/claude.test.ts
@@ -146,4 +146,33 @@ describe('loadClaudeEvents — ccusage usageDataSchema parity', () => {
     })]);
     assert.equal((await loadClaudeEvents()).length, 1);
   });
+
+  test('dedupes entries that share messageId:requestId (== ccusage)', async () => {
+    const dup = {
+      timestamp: '2026-05-01T10:00:00.000Z', sessionId: 's1', requestId: 'same-req',
+      message: { id: 'same-msg', model: 'claude-sonnet-4-20250514', usage: { input_tokens: 100, output_tokens: 50 } },
+    };
+    writeSession('h.jsonl', [dup, { ...dup }]);
+    assert.equal((await loadClaudeEvents()).length, 1);
+  });
+
+  test('NEVER dedupes ID-less entries — even byte-identical ones (ccusage isDuplicateEntry(null)=false)', async () => {
+    // Same timestamp/model/input/output and NO requestId: the old synthetic
+    // fallback key collapsed these into 1, undercounting vs ccusage.
+    const idless = {
+      timestamp: '2026-05-01T10:00:00.000Z', sessionId: 's1',
+      message: { model: 'claude-sonnet-4-20250514', usage: { input_tokens: 100, output_tokens: 50 } },
+    };
+    writeSession('i.jsonl', [idless, { ...idless }, { ...idless }]);
+    assert.equal((await loadClaudeEvents()).length, 3);
+  });
+
+  test('missing only requestId → not deduped (either id absent ⇒ null key)', async () => {
+    const noReq = {
+      timestamp: '2026-05-01T10:00:00.000Z', sessionId: 's1',
+      message: { id: 'msg-x', model: 'claude-sonnet-4-20250514', usage: { input_tokens: 100, output_tokens: 50 } },
+    };
+    writeSession('j.jsonl', [noReq, { ...noReq }]);
+    assert.equal((await loadClaudeEvents()).length, 2);
+  });
 });
diff --git a/src/loaders/claude.ts b/src/loaders/claude.ts
index d133259..82e8eb6 100644
--- a/src/loaders/claude.ts
+++ b/src/loaders/claude.ts
@@ -93,8 +93,11 @@ function parseLine(raw: Record<string, unknown>): UnifiedTokenEvent | null {
 	};
 }
 
+// dedupeKey is null when the upstream entry lacks a messageId or requestId.
+// ccusage's createUniqueHash returns null in that case and isDuplicateEntry
+// (null) === false — i.e. ID-less entries are NEVER treated as duplicates.
 type CachedClaudeEvent = {
-	dedupeKey: string;
+	dedupeKey: string | null;
 	event: UnifiedTokenEvent;
 };
 
@@ -103,7 +106,7 @@ function isCachedClaudeEvent(value: unknown): value is CachedClaudeEvent {
 	const record = value as Record<string, unknown>;
 	const event = record.event as Record<string, unknown> | undefined;
 	return (
-		typeof record.dedupeKey === 'string' &&
+		(typeof record.dedupeKey === 'string' || record.dedupeKey === null) &&
 		!!event &&
 		typeof event.source === 'string' &&
 		typeof event.timestamp === 'string' &&
@@ -166,9 +169,14 @@ export async function loadClaudeEvents(): Promise<UnifiedTokenEvent[]> {
 
 			const requestId = String(parsed.requestId ?? '');
 			const messageId = String((parsed.message as Record<string, unknown>)?.id ?? '');
+			// Match ccusage exactly: a stable key ONLY when both ids exist;
+			// otherwise null → never deduplicated. The previous synthetic
+			// `timestamp:model:input:output` fallback could collapse genuinely
+			// distinct ID-less events (it also ignored cache tokens), making
+			// totals lower than ccusage.
 			const dedupeKey = requestId && messageId
 				? `${messageId}:${requestId}`
-				: `${event.timestamp}:${event.model}:${event.tokens.input}:${event.tokens.output}`;
+				: null;
 
 			fileEvents.push({ dedupeKey, event });
 		}
@@ -177,6 +185,10 @@ export async function loadClaudeEvents(): Promise<UnifiedTokenEvent[]> {
 
 	const seen = new Set<string>();
 	const events = records.flatMap((record) => {
+		// null key (missing messageId/requestId) is never a duplicate and is
+		// never recorded — mirrors ccusage isDuplicateEntry(null)===false +
+		// markAsProcessed(null)=noop. Only id-bearing entries are deduped.
+		if (record.dedupeKey === null) return [record.event];
 		if (seen.has(record.dedupeKey)) return [];
 		seen.add(record.dedupeKey);
 		return [record.event];

From 49a5c0573b75469c4964493178481a8c50a73d79 Mon Sep 17 00:00:00 2001
From: matthiasschalk <schalk.matthias@gmail.com>
Date: Sun, 17 May 2026 06:53:46 +0200
Subject: [PATCH 6/6] docs(store): document store-vs-ccusage divergence; guard
 distinct turns
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ccusage is stateless; TokenBBQ persists an append-only store with a
content-hash dedup that differs from ccusage's messageId:requestId. Per
product decision (harden + document, no risky store-hash migration, no
new mode): document the parity invariant on hashEvent — the loader is the
dedup authority and matches ccusage; the content hash is only for
multi-process safety and is injective for realistically-distinct Claude
turns. Add a regression test proving two distinct turns with identical
token counts both survive, and record the accepted residuals (post-prune
TokenBBQ >= ccusage is intended) in CCUSAGE_PARITY_REVIEW.md.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CCUSAGE_PARITY_REVIEW.md | 147 +++++++++++++++++++++++++++++++++++++++
 src/store.test.ts        |  27 +++++++
 src/store.ts             |  15 ++++
 3 files changed, 189 insertions(+)
 create mode 100644 CCUSAGE_PARITY_REVIEW.md

diff --git a/CCUSAGE_PARITY_REVIEW.md b/CCUSAGE_PARITY_REVIEW.md
new file mode 100644
index 0000000..59f8eeb
--- /dev/null
+++ b/CCUSAGE_PARITY_REVIEW.md
@@ -0,0 +1,147 @@
+# 🔥 Code-Review: Token-Berechnung TokenBBQ vs. ccusage
+
+> **Datum:** 2026-05-17
+> **Scope:** Token- & Kosten-Berechnung im Dashboard (`src/loaders/`, `src/aggregator.ts`, `src/pricing.ts`, `src/store.ts`)
+> **Referenz:** ccusage v18.0.8 (vendored unter `ccusage/`)
+> **Methode:** Unabhängige Code-Analyse + Cross-Check via Codex; alle Codex-Behauptungen am Code gegengeprüft.
+> **Ziel:** Unsere Zahlen sollen mit ccusage übereinstimmen.
+
+## TL;DR — Stimmen unsere Zahlen mit ccusage überein?
+
+**Claude Code (ccusage-Kerndomäne):** Token-Summen stimmen *fast* — Formel, Dedup und Datums-Bucketing sind identisch. Aber die **Kosten** weichen systematisch nach unten ab (fehlende >200k-Staffelpreise). **Codex:** Die Dashboard-Token sind **systematisch zu hoch** (Reasoning-Doppelzählung) — das ist die sichtbarste Abweichung beim Vergleich pro Tool.
+
+| # | Schwere | Bereich | Effekt |
+|---|---------|---------|--------|
+| 1 | 🔴 Kritisch | Codex-Tokens | Total systematisch **zu hoch** |
+| 2 | 🔴 Kritisch | Claude-Kosten | Kosten **zu niedrig** bei großen Kontexten |
+| 3 | 🟠 Wichtig | Robustheit | Ein kaputtes Feld kann Total auf `NaN` ziehen |
+| 4 | 🟡 Mittel | Claude-Tokens | Cache-only-Events fallen raus (Unterzählung) |
+| 5 | 🟡 Mittel | Claude-Tokens | Synthetischer Dedup-Key kollabiert echte Events |
+| 6 | 🟢 Architektur | Alle | Persistenter Store driftet vs. stateless ccusage |
+
+---
+
+## 🔴 1. Codex: Reasoning-Tokens werden doppelt gezählt
+
+**Ort:** `src/loaders/codex.ts:166` (`reasoning: raw.reasoning`) + `src/types.ts:165` (`totalTokenCount`) + `src/dashboard.ts:413`
+
+**Divergenz:** OpenAI/Codex liefert `output_tokens` **inklusive** der Reasoning-Tokens; `reasoning_output_tokens` ist nur eine informative Teilmenge. ccusage implementiert das explizit:
+
+- `ccusage/apps/codex/src/data-loader.ts:60-62`: *„includes them as a separate field but does not add them to total_tokens"* → `total = input + output`
+- `ccusage/apps/codex/src/token-utils.ts:39`: *„Reasoning tokens are already included in output_tokens, so they are not added separately to avoid double-counting"* — die Kostenformel hat **keinen** Reasoning-Term.
+
+TokenBBQ speichert `reasoning` separat **und** `totalTokenCount` summiert `input+output+cacheCreation+cacheRead+reasoning`. Da `output` die Reasoning-Tokens bereits enthält, zählt jedes Codex-Event seine Reasoning-Tokens **zweimal**. Bei gpt-5/o-Modellen (Codex' Standard) oft 50–90 % der Output-Tokens → massive Überzählung in Dashboard-Total, Heatmap, `topModel` und jeder Tokens-Chart.
+
+**Fix:** Für Codex `output` als Brutto behalten und `reasoning` nur als Anzeige-Metainfo führen (nicht im Total). ccusage' Weg: Codex' eigenes `total_tokens` (in `normalizeUsage` bereits berechnet, aber nie verwendet) als Wahrheit nehmen statt selbst zu rekonstruieren.
+
+---
+
+## 🔴 2. Claude: Fehlende Staffelpreise >200k Tokens
+
+**Ort:** `src/pricing.ts:109-127` (`calculateCost`)
+
+**Divergenz:** ccusage rechnet Claude/Anthropic-Modelle mit **Tiered Pricing** ab — Tokens über 200k pro Token-Typ pro Event zur höheren Rate (`ccusage/packages/internal/src/pricing.ts:284-336`, `calculateTieredCost`). Für Sonnet 4: cache_read 200k+ zu $0,60/M statt $0,30/M (2×), Input 2×, Output 1,5×.
+
+`pricing.ts` nutzt ausschließlich Flat-Raten. In Claude-Code-Sessions ist `cache_read` pro Turn praktisch der gesamte bisherige Kontext — regelmäßig **>200k**. Ergebnis: TokenBBQ **unterberechnet** Kosten heavy-user-Sessions deutlich. Wahrscheinlich der größte moderne Kosten-Mismatch.
+
+**Fix:** `ModelPricing`-Typ um `*_above_200k_tokens`-Felder erweitern und `calculateTieredCost`-Logik (200k-Schwelle, pro Token-Typ) nachbauen. Schwelle nur für Claude/Anthropic (GPT = flat, Gemini = 128k — Letzteres setzt auch ccusage nicht um).
+
+**Nebenpunkt:** Bei `auto` nutzt ccusage `data.costUSD`, sobald `!= null`. TokenBBQ rechnet neu, sobald `costUSD <= 0`. Bei modernen Logs ohne `costUSD` rechnen beide → Punkt 2 dominiert. Bei Logs *mit* `costUSD: 0` weicht ihr ab (ihr rechnet, ccusage nimmt 0). Selten.
+
+---
+
+## 🟠 3. Robustheit: Nicht-numerische Token-Felder vergiften das Total mit `NaN`
+
+**Ort:** `src/loaders/claude.ts:42-44`
+
+```ts
+const input = Number(usage.input_tokens ?? 0);
+const output = Number(usage.output_tokens ?? 0);
+if (input === 0 && output === 0) return null;
+```
+
+**Divergenz:** `?? 0` fängt nur `null`/`undefined`. Ist `usage.input_tokens` ein String/Objekt, wird `Number(...)` → `NaN`. Der Guard `input === 0 && output === 0` ist bei `NaN` `false` → das Event passiert mit `tokens.input = NaN`. `addTokens` propagiert das → **das gesamte Dashboard-Total wird `NaN`**. ccusage' valibot-`v.number()` lehnt solche Einträge hart ab.
+
+Verschärfend: `store.ts:isTokenCounts` prüft `typeof t.input === 'number'` — `NaN` ist `typeof 'number'`, läuft also durch und wird **dauerhaft persistiert** (Store-Cache vergiftet bis manueller Eingriff).
+
+**Fix:** Nach Konvertierung `Number.isFinite()` erzwingen für alle 4 Token-Felder (`const input = Number(usage.input_tokens); if (!Number.isFinite(input)) return null;`), zusätzlich in `store.ts:isTokenCounts` `Number.isFinite` statt nur `typeof === 'number'`.
+
+---
+
+## 🟡 4. Claude: Cache-only-Events werden verworfen
+
+**Ort:** `src/loaders/claude.ts:44` — `if (input === 0 && output === 0) return null;`
+
+**Divergenz:** ccusage' `usageDataSchema` verlangt nur, dass `input_tokens`/`output_tokens` Zahlen sind (0 erlaubt) und summiert `cache_creation`/`cache_read` trotzdem (`ccusage/apps/ccusage/src/data-loader.ts:441`). TokenBBQ wirft das ganze Event weg, *bevor* die Cache-Felder gelesen werden — Events mit `input=0, output=0, cacheRead>0` gehen verloren → Unterzählung von Tokens **und** Kosten. Selten, aber real.
+
+**Fix:** Guard erst nach Berechnung aller fünf Felder anwenden und nur verwerfen, wenn die Gesamtsumme 0 ist (oder ganz weglassen — ccusage filtert hier nicht).
+
+---
+
+## 🟡 5. Claude: Synthetischer Dedup-Fallback kollabiert echte Events
+
+**Ort:** `src/loaders/claude.ts:135-137`
+
+```ts
+const dedupeKey = requestId && messageId
+  ? `${messageId}:${requestId}`
+  : `${event.timestamp}:${event.model}:${event.tokens.input}:${event.tokens.output}`;
+```
+
+**Divergenz:** ccusage' `createUniqueHash` gibt `null` zurück, wenn messageId **oder** requestId fehlt — `isDuplicateEntry(null)` ist immer `false`, ID-lose Events werden **nie** dedupliziert (alle gezählt). TokenBBQ baut einen synthetischen Key und dedupliziert sie doch → potenzielle Unterzählung.
+
+**Zusatz:** Der Fallback-Key enthält **nur** `timestamp:model:input:output` — *nicht* `cacheCreation`, `cacheRead`, `costUSD` oder Session/Datei. Zwei ID-lose Events mit gleichem input/output aber unterschiedlichen Cache-Tokens kollidieren und eines wird fälschlich verworfen. Moderne Logs haben immer beide IDs (geringe Praxisrelevanz), aber für exakte ccusage-Parität: Fallback streichen und ID-lose Events wie ccusage immer durchzählen.
+
+---
+
+## 🟢 6. Architektur: Persistenter Store driftet vs. stateless ccusage
+
+**Ort:** `src/store.ts` (`hashEvent`, `appendEvents`) + `src/index.ts:117`
+
+**Divergenz:** ccusage ist **stateless** — liest bei jedem Lauf die JSONL-Dateien neu. TokenBBQ persistiert append-only und dedupliziert per **Content-Hash** (`source|sessionId|timestamp|model|input|output|cacheRead|cacheCreation|reasoning`) — ein *anderer* Schlüssel als ccusage' `messageId:requestId` (inkl. timestamp, ohne requestId). Folgen:
+
+1. **Drift nach Log-Rotation:** Löscht/rotiert der User alte Claude-JSONLs, zeigt ccusage weniger, TokenBBQ behält die Historie → TokenBBQ > ccusage. Bewusst so designt, bricht aber exakte Parität.
+2. Claude kommt im Dashboard aus `store.events` (nicht aus dem Frisch-Scan), der Store-Hash gewinnt. Kollidieren zwei legitim verschiedene Events im Content-Hash, ist eines **dauerhaft** weg (Hash bleibt in `state.hashes`, Re-Scan heilt nicht).
+
+**Empfehlung:** Design-Entscheidung dokumentieren („TokenBBQ ≥ ccusage nach Log-Pruning" = erwartet). Falls Bit-Parität gewünscht: optionaler „stateless/ccusage-compat"-Modus, der nur den Frisch-Scan ohne Store rendert.
+
+---
+
+## ✅ Was bereits korrekt mit ccusage übereinstimmt
+
+- **Claude-Total-Formel** `input+output+cacheCreation+cacheRead` = ccusage `getTotalTokens` (Reasoning bei Claude immer 0) — exakt gleich.
+- **Datums-Bucketing:** Beide lokale Zeitzone, `YYYY-MM-DD`. Identisch, solange ccusage ohne explizites `--timezone` läuft.
+- **Dedup bei vorhandenen IDs:** `messageId:requestId` — identisch zu ccusage.
+- **`isApiErrorMessage`:** ccusage filtert das **nicht** aus den Totals (nur für Reset-Time-Extraktion) — TokenBBQ ebenso. Kein Handlungsbedarf.
+- **`<synthetic>`-Modell:** Nur Anzeige-Divergenz (ccusage versteckt die Modellzeile, zählt Tokens mit). Totals unberührt — TokenBBQ zeigt zusätzlich eine `<synthetic>`-Zeile, kann `topModel` beeinflussen. Kosmetisch.
+- **Fehlendes `message.usage`:** Beide überspringen. Konsistent.
+
+---
+
+## Empfohlene Fix-Reihenfolge (Aufwand vs. Wirkung)
+
+1. **#1 Codex-Reasoning** — größte sichtbare Token-Abweichung, kleiner gezielter Fix.
+2. **#3 NaN-Guard** — echter Bug, billig, schützt zusätzlich den Store.
+3. **#2 Tiered Pricing** — größte Kosten-Abweichung, mittlerer Aufwand (`calculateTieredCost` portieren).
+4. **#4 + #5 Claude-Loader** — zusammen in `claude.ts` erledigbar (Guard nach hinten, Fallback-Key streichen).
+5. **#6** — Doku / optionaler Compat-Modus, kein dringender Code-Fix.
+
+---
+
+## Resolution (umgesetzt 2026-05-17, Branch `fix/ccusage-parity`, Codex-abgenommen)
+
+Alle 6 Findings behoben, je ein Commit, Design von Codex mit **PASS** abgenommen.
+
+| # | Umsetzung | Tests |
+|---|-----------|-------|
+| 1 | `totalTokenCount` ohne `reasoning` (types.ts); alle Client-Summen in dashboard.ts; Reasoning als nicht-additiver Info-Wert | `types.test.ts`, Codex-Paritäts-Invariante in `codex.test.ts` |
+| 2 | `calculateTieredCost` (faithful port, 200k, pro Token-Typ, pro Event) in `pricing.ts`; `ModelPricing` + FALLBACK erweitert | `pricing.test.ts` (Boundary + flat-fallback) |
+| 3 | `parseLine` verlangt finite `v.number()`-Parität; `store.ts` `isTokenCounts`/`loadFile` finite-gehärtet | `claude.test.ts`, `store.test.ts` |
+| 4 | Zero-Token-Drop entfernt (Cache-only & 0/0 bleiben wie bei ccusage) | `claude.test.ts` |
+| 5 | `dedupeKey=null` bei fehlender msgId/reqId, nie dedupliziert; Fallback entfernt; Loader-`CACHE_VERSION` 1→2 | `claude.test.ts` |
+| 6 | Store-Härtungs-Regressionstest + dokumentierte Invariante (kein Hash-Migration, keine neue Betriebsart — Userentscheidung) | `store.test.ts` |
+
+### Bewusst akzeptierte Rest-Divergenzen (von Codex bestätigt, vom User so entschieden)
+
+- **#1 Codex-Total-Quelle:** ccusage nutzt die gemeldete `total_tokens`; wir rekonstruieren `freshInput+cacheRead+output`. Da OpenAI `total_tokens ≡ input+output` definiert, sind sie für wohlgeformte Logs **gleich** (Regressionstest sichert das ab). Eine separate „reported total" durch das vereinheitlichte 5-Feld-Modell zu schleifen wäre invasiv — bewusst nicht umgesetzt.
+- **#6 Store vs. stateless:** Nach **manuellem** Log-Pruning behält TokenBBQ Historie (TokenBBQ ≥ ccusage) — gewollt. Keine kryptografische Kollisionsgarantie ohne Store-Hash-Migration — bewusst außerhalb des Scopes.
diff --git a/src/store.test.ts b/src/store.test.ts
index d6f0997..348476e 100644
--- a/src/store.test.ts
+++ b/src/store.test.ts
@@ -101,6 +101,33 @@ describe('loadStore', () => {
     assert.equal(state.events.length, 1);
   });
 
+  test('does NOT collapse distinct Claude turns (store-vs-ccusage parity guard)', () => {
+    // The Claude loader is the dedup authority and matches ccusage exactly
+    // (messageId:requestId, ID-less never deduped). The store adds a content
+    // hash only for multi-process safety. This guards the invariant that the
+    // content hash is injective for realistically-distinct Claude turns:
+    // every assistant turn has its own millisecond timestamp, so two genuine
+    // turns (even with identical token counts) must both survive — otherwise
+    // the dashboard would silently undercount vs ccusage.
+    const t1 = ev({
+      source: 'claude-code', sessionId: 'sess', model: 'claude-sonnet-4-20250514',
+      timestamp: '2026-05-01T10:00:00.000Z',
+      tokens: { input: 100, output: 50, cacheCreation: 0, cacheRead: 0, reasoning: 0 },
+    });
+    const t2 = ev({
+      source: 'claude-code', sessionId: 'sess', model: 'claude-sonnet-4-20250514',
+      timestamp: '2026-05-01T10:00:03.000Z', // 3s later — distinct turn
+      tokens: { input: 100, output: 50, cacheCreation: 0, cacheRead: 0, reasoning: 0 },
+    });
+    appendFileSync(
+      legacyPath(),
+      JSON.stringify({ v: 1, ...t1, eventHash: hashEvent(t1) }) + '\n' +
+      JSON.stringify({ v: 1, ...t2, eventHash: hashEvent(t2) }) + '\n',
+    );
+    const state = loadStore();
+    assert.equal(state.events.length, 2);
+  });
+
   test('rejects a NaN-poisoned token line (serializes to null on disk)', () => {
     const good = ev({ sessionId: 'ok' });
     const goodLine = JSON.stringify({ v: 1, ...good, eventHash: hashEvent(good) }) + '\n';
diff --git a/src/store.ts b/src/store.ts
index 3932a08..c6e5a4b 100644
--- a/src/store.ts
+++ b/src/store.ts
@@ -57,6 +57,21 @@ function getLegacyFilePath(): string {
   return path.join(getStoreDir(), 'events.ndjson');
 }
 
+// Store-vs-ccusage parity note:
+// ccusage is stateless — it re-reads the JSONL on every run. TokenBBQ persists
+// an append-only store. Source-level dedup is the LOADER's job and matches
+// ccusage exactly (Claude: messageId:requestId, ID-less never deduped). This
+// content hash exists ONLY for multi-process safety (two processes racing to
+// persist the same scanned event must collapse to one). It deliberately keeps
+// timestamp (ms) + sessionId, so it is injective for realistically-distinct
+// Claude turns and cannot drop an event ccusage would keep (see the
+// "does NOT collapse distinct Claude turns" store test).
+// Known, accepted residual: (1) after a user MANUALLY prunes/rotates Claude
+// JSONL, TokenBBQ retains history ccusage forgets (TokenBBQ >= ccusage) — this
+// is intended. (2) Two genuinely distinct Claude turns that are byte-identical
+// on (source,sessionId,timestamp-to-the-ms,model,all token fields) would still
+// collapse here; this does not occur in practice and a true cryptographic
+// guarantee would need a store-hash migration, deliberately out of scope.
 export function hashEvent(e: UnifiedTokenEvent): string {
   const payload = [
     e.source,