Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions migrations/021_llm_rag_index.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- Speed up RAG / span-type analytics queries
CREATE INDEX IF NOT EXISTS idx_llm_spans_type
ON llm_spans(project_id, span_type, started_at DESC);
120 changes: 120 additions & 0 deletions src/dashboard.html
Original file line number Diff line number Diff line change
Expand Up @@ -1345,6 +1345,7 @@ <h2 class="insights-title">LLM Tracing</h2>
<button class="insights-tab" onclick="switchLlmTab('search')">Search</button>
<button class="insights-tab" onclick="switchLlmTab('prompts')">Prompts</button>
<button class="insights-tab" onclick="switchLlmTab('scores')">Scores</button>
<button class="insights-tab" onclick="switchLlmTab('rag')">RAG</button>
</div>
<div class="insights-content" id="llmContent">
<div class="insights-empty">Select a tab to view LLM data</div>
Expand Down Expand Up @@ -3164,6 +3165,7 @@ <h2 class="detail-title" id="detailTitle"></h2>
search: loadLlmSearch,
prompts: loadLlmPrompts,
scores: loadLlmScores,
rag: loadLlmRag,
};
if (loaders[tab]) loaders[tab]();
}
Expand Down Expand Up @@ -3429,6 +3431,21 @@ <h2 class="detail-title" id="detailTitle"></h2>
s.time_to_first_token_ms ? el('span', {}, 'TTFT: ' + s.time_to_first_token_ms + 'ms') : null,
].filter(Boolean)),
]);
// RAG metadata for retrieval spans
if (s.span_type === 'retrieval' && s.metadata) {
const m = s.metadata;
const hasRag = m['rag.chunks_retrieved'] != null || m['rag.source'] != null;
if (hasRag) {
const ragRow = el('div', { style: 'display:flex;gap:12px;font-size:10px;color:var(--text-dim);margin-top:6px;padding-top:6px;border-top:1px solid var(--border-subtle);' }, [
m['rag.source'] ? el('span', {}, 'Source: ' + m['rag.source']) : null,
m['rag.chunks_used'] != null && m['rag.chunks_retrieved'] != null ? el('span', {}, 'Chunks: ' + m['rag.chunks_used'] + '/' + m['rag.chunks_retrieved']) : null,
m['rag.context_tokens'] != null ? el('span', {}, 'Context: ' + m['rag.context_tokens'] + ' tokens') : null,
m['rag.max_context_tokens'] != null && m['rag.context_tokens'] != null ? el('span', {}, 'Utilization: ' + ((m['rag.context_tokens'] / m['rag.max_context_tokens']) * 100).toFixed(1) + '%') : null,
m['rag.relevance_scores'] && m['rag.relevance_scores'].length ? el('span', {}, 'Top relevance: ' + m['rag.relevance_scores'][0].toFixed(3)) : null,
].filter(Boolean));
card.appendChild(ragRow);
}
}
content.appendChild(card);
});
}
Expand Down Expand Up @@ -3542,6 +3559,109 @@ <h2 class="detail-title" id="detailTitle"></h2>
}
}

// ── LLM RAG Tab ──
async function loadLlmRag() {
const content = document.getElementById('llmContent');
try {
const pp = projectParam();
const r = await authFetch(API + '/v1/llm/rag?hours=24' + (pp ? '&' + pp : ''));
if (!r) return;
const d = await r.json();
content.innerHTML = '';

// Stat cards
const totalRetrievals = d.relevance ? d.relevance.total_retrievals : 0;
const avgTopRelevance = d.relevance ? d.relevance.avg_top_relevance : 0;
const avgChunksRetrieved = d.relevance ? d.relevance.avg_chunks_retrieved : 0;
const chunkUtil = d.relevance ? d.relevance.chunk_utilization_pct : 0;

if (totalRetrievals === 0) {
content.innerHTML = '<div class="insights-empty">No RAG data in the last ' + d.window_hours + ' hours</div>';
return;
}

const cards = el('div', { style: 'display:grid;grid-template-columns:repeat(4,1fr);gap:8px;margin-bottom:16px;' }, [
el('div', { style: 'background:var(--surface-raised);border:1px solid var(--border);border-radius:8px;padding:12px;text-align:center;' }, [
el('div', { style: 'font-size:18px;font-weight:700;color:var(--text);' }, String(totalRetrievals)),
el('div', { style: 'font-size:10px;color:var(--text-dim);margin-top:2px;' }, 'Total Retrievals'),
]),
el('div', { style: 'background:var(--surface-raised);border:1px solid var(--border);border-radius:8px;padding:12px;text-align:center;' }, [
el('div', { style: 'font-size:18px;font-weight:700;color:var(--text);' }, avgTopRelevance.toFixed(3)),
el('div', { style: 'font-size:10px;color:var(--text-dim);margin-top:2px;' }, 'Avg Top Relevance'),
]),
el('div', { style: 'background:var(--surface-raised);border:1px solid var(--border);border-radius:8px;padding:12px;text-align:center;' }, [
el('div', { style: 'font-size:18px;font-weight:700;color:var(--text);' }, avgChunksRetrieved.toFixed(1)),
el('div', { style: 'font-size:10px;color:var(--text-dim);margin-top:2px;' }, 'Avg Chunks Retrieved'),
]),
el('div', { style: 'background:var(--surface-raised);border:1px solid var(--border);border-radius:8px;padding:12px;text-align:center;' }, [
el('div', { style: 'font-size:18px;font-weight:700;color:' + (chunkUtil > 80 ? 'var(--red-text)' : 'var(--text)') + ';' }, chunkUtil.toFixed(1) + '%'),
el('div', { style: 'font-size:10px;color:var(--text-dim);margin-top:2px;' }, 'Chunk Utilization'),
]),
]);
content.appendChild(cards);

// Source table
if (d.sources && d.sources.length) {
const table = el('table', { style: 'width:100%;border-collapse:collapse;font-size:11px;' });
const thead = el('tr', {}, [
el('th', { style: 'text-align:left;padding:6px 8px;color:var(--text-dim);font-weight:500;border-bottom:1px solid var(--border);' }, 'Name'),
el('th', { style: 'text-align:left;padding:6px 8px;color:var(--text-dim);font-weight:500;border-bottom:1px solid var(--border);' }, 'Source'),
el('th', { style: 'text-align:right;padding:6px 8px;color:var(--text-dim);font-weight:500;border-bottom:1px solid var(--border);' }, 'Calls'),
el('th', { style: 'text-align:right;padding:6px 8px;color:var(--text-dim);font-weight:500;border-bottom:1px solid var(--border);' }, 'Avg Latency'),
el('th', { style: 'text-align:right;padding:6px 8px;color:var(--text-dim);font-weight:500;border-bottom:1px solid var(--border);' }, 'P95'),
el('th', { style: 'text-align:right;padding:6px 8px;color:var(--text-dim);font-weight:500;border-bottom:1px solid var(--border);' }, 'Chunks'),
el('th', { style: 'text-align:right;padding:6px 8px;color:var(--text-dim);font-weight:500;border-bottom:1px solid var(--border);' }, 'Ctx Tokens'),
el('th', { style: 'text-align:right;padding:6px 8px;color:var(--text-dim);font-weight:500;border-bottom:1px solid var(--border);' }, 'Utilization'),
el('th', { style: 'text-align:right;padding:6px 8px;color:var(--text-dim);font-weight:500;border-bottom:1px solid var(--border);' }, 'Error Rate'),
]);
table.appendChild(thead);
d.sources.forEach(s => {
const utilPct = s.avg_context_utilization_pct || 0;
table.appendChild(el('tr', {}, [
el('td', { style: 'padding:6px 8px;color:var(--text);font-weight:600;border-bottom:1px solid var(--border-subtle);' }, s.retrieval_name || '-'),
el('td', { style: 'padding:6px 8px;color:var(--text-dim);border-bottom:1px solid var(--border-subtle);' }, s.source || '-'),
el('td', { style: 'text-align:right;padding:6px 8px;color:var(--text);font-family:var(--mono);border-bottom:1px solid var(--border-subtle);' }, String(s.call_count)),
el('td', { style: 'text-align:right;padding:6px 8px;color:var(--text);font-family:var(--mono);border-bottom:1px solid var(--border-subtle);' }, s.avg_latency_ms.toFixed(0) + 'ms'),
el('td', { style: 'text-align:right;padding:6px 8px;color:var(--text);font-family:var(--mono);border-bottom:1px solid var(--border-subtle);' }, s.p95_latency_ms.toFixed(0) + 'ms'),
el('td', { style: 'text-align:right;padding:6px 8px;color:var(--text);font-family:var(--mono);border-bottom:1px solid var(--border-subtle);' }, s.avg_chunks_retrieved.toFixed(1)),
el('td', { style: 'text-align:right;padding:6px 8px;color:var(--text);font-family:var(--mono);border-bottom:1px solid var(--border-subtle);' }, s.avg_context_tokens.toFixed(0)),
el('td', { style: 'text-align:right;padding:6px 8px;font-family:var(--mono);border-bottom:1px solid var(--border-subtle);color:' + (utilPct > 80 ? 'var(--red-text)' : 'var(--text)') + ';' }, utilPct.toFixed(1) + '%'),
el('td', { style: 'text-align:right;padding:6px 8px;font-family:var(--mono);border-bottom:1px solid var(--border-subtle);' + (s.error_rate > 0 ? 'color:var(--red-text);' : 'color:var(--text-dim);') }, s.error_rate.toFixed(1) + '%'),
]));
});
content.appendChild(table);
}

// Relevance summary bar
if (d.relevance && d.relevance.total_retrievals > 0) {
const rel = d.relevance;
const pct = Math.max(0, Math.min(100, rel.avg_top_relevance * 100));
const barColor = pct >= 70 ? 'var(--green, #4ade80)' : pct >= 40 ? 'var(--yellow, #facc15)' : 'var(--red-text, #f87171)';
const relCard = el('div', { style: 'background:var(--surface-raised);border:1px solid var(--border);border-radius:8px;padding:14px;margin-top:12px;' }, [
el('div', { style: 'font-size:11px;color:var(--text-dim);text-transform:uppercase;letter-spacing:0.05em;margin-bottom:8px;' }, 'Relevance Distribution'),
el('div', { style: 'display:flex;justify-content:space-between;font-size:10px;color:var(--text-dim);margin-bottom:4px;' }, [
el('span', {}, 'Avg: ' + rel.avg_top_relevance.toFixed(3)),
el('span', {}, 'Min: ' + rel.min_top_relevance.toFixed(3) + ' / Max: ' + rel.max_top_relevance.toFixed(3)),
]),
(() => {
const bar = el('div', { style: 'width:100%;height:8px;background:var(--border);border-radius:4px;overflow:hidden;' });
const fill = el('div', { style: 'height:100%;width:' + pct + '%;background:' + barColor + ';border-radius:4px;' });
bar.appendChild(fill);
return bar;
})(),
el('div', { style: 'display:flex;gap:16px;font-size:10px;color:var(--text-dim);margin-top:8px;' }, [
el('span', {}, 'Avg chunks retrieved: ' + rel.avg_chunks_retrieved.toFixed(1)),
el('span', {}, 'Avg chunks used: ' + rel.avg_chunks_used.toFixed(1)),
el('span', {}, 'Chunk utilization: ' + rel.chunk_utilization_pct.toFixed(1) + '%'),
]),
]);
content.appendChild(relCard);
}
} catch (e) {
content.innerHTML = '<div class="insights-empty">Failed to load RAG data</div>';
}
}

// ── LLM Scores Tab ──
async function loadLlmScores() {
const content = document.getElementById('llmContent');
Expand Down
85 changes: 85 additions & 0 deletions src/llm_tracing/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,3 +280,88 @@ GROUP BY prompt_name
ORDER BY total_traces DESC
LIMIT $3
"#;

/// RAG source overview: per-source aggregates for retrieval spans with rag.* metadata.
/// Parameters: $1 = since_ms, $2 = project_id (or NULL), $3 = limit
pub const RAG_OVERVIEW_SQL: &str = r#"
SELECT
COALESCE(name, '') AS retrieval_name,
COALESCE(json_extract_string(metadata, '$."rag.source"'), 'unknown') AS source,
COUNT(*) AS call_count,
COALESCE(AVG(latency_ms), 0) AS avg_latency_ms,
COALESCE(PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY latency_ms), 0) AS p50_latency_ms,
COALESCE(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY latency_ms), 0) AS p95_latency_ms,
SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) AS error_count,
CASE WHEN COUNT(*) > 0
THEN (SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) * 100.0 / COUNT(*))
ELSE 0.0 END AS error_rate,
COALESCE(AVG(CAST(json_extract(metadata, '$."rag.chunks_retrieved"') AS DOUBLE)), 0) AS avg_chunks_retrieved,
COALESCE(AVG(CAST(json_extract(metadata, '$."rag.chunks_used"') AS DOUBLE)), 0) AS avg_chunks_used,
COALESCE(AVG(CAST(json_extract(metadata, '$."rag.context_tokens"') AS DOUBLE)), 0) AS avg_context_tokens,
COALESCE(AVG(
CASE WHEN CAST(json_extract(metadata, '$."rag.max_context_tokens"') AS DOUBLE) > 0
THEN CAST(json_extract(metadata, '$."rag.context_tokens"') AS DOUBLE) * 100.0
/ CAST(json_extract(metadata, '$."rag.max_context_tokens"') AS DOUBLE)
ELSE NULL END
), 0) AS avg_context_utilization_pct
FROM bloop.llm_spans
WHERE started_at >= $1
AND ($2 IS NULL OR project_id = $2)
AND span_type = 'retrieval'
AND metadata IS NOT NULL
AND json_extract(metadata, '$."rag.chunks_retrieved"') IS NOT NULL
GROUP BY name, json_extract_string(metadata, '$."rag.source"')
ORDER BY call_count DESC
LIMIT $3
"#;

/// RAG hourly metrics: time-series for retrieval span activity.
/// Parameters: $1 = since_ms, $2 = project_id (or NULL)
pub const RAG_METRICS_SQL: &str = r#"
SELECT
(started_at / 3600000) * 3600000 AS hour_bucket,
COUNT(*) AS retrieval_count,
COALESCE(AVG(latency_ms), 0) AS avg_latency_ms,
SUM(CASE WHEN status = 'error' THEN 1 ELSE 0 END) AS error_count,
COALESCE(AVG(CAST(json_extract(metadata, '$."rag.chunks_retrieved"') AS DOUBLE)), 0) AS avg_chunks_retrieved,
COALESCE(AVG(CAST(json_extract(metadata, '$."rag.context_tokens"') AS DOUBLE)), 0) AS avg_context_tokens,
COALESCE(AVG(
CASE WHEN CAST(json_extract(metadata, '$."rag.max_context_tokens"') AS DOUBLE) > 0
THEN CAST(json_extract(metadata, '$."rag.context_tokens"') AS DOUBLE) * 100.0
/ CAST(json_extract(metadata, '$."rag.max_context_tokens"') AS DOUBLE)
ELSE NULL END
), 0) AS avg_context_utilization_pct,
COALESCE(AVG(CAST(json_extract(metadata, '$."rag.top_k"') AS DOUBLE)), 0) AS avg_top_k
FROM bloop.llm_spans
WHERE started_at >= $1
AND ($2 IS NULL OR project_id = $2)
AND span_type = 'retrieval'
AND metadata IS NOT NULL
AND json_extract(metadata, '$."rag.chunks_retrieved"') IS NOT NULL
GROUP BY (started_at / 3600000) * 3600000
ORDER BY hour_bucket DESC
"#;

/// RAG relevance summary: aggregate relevance score stats across retrieval spans.
/// Parameters: $1 = since_ms, $2 = project_id (or NULL)
pub const RAG_RELEVANCE_SQL: &str = r#"
SELECT
COUNT(*) AS total_retrievals,
COALESCE(AVG(CAST(json_extract(metadata, '$."rag.relevance_scores"[0]') AS DOUBLE)), 0) AS avg_top_relevance,
COALESCE(MIN(CAST(json_extract(metadata, '$."rag.relevance_scores"[0]') AS DOUBLE)), 0) AS min_top_relevance,
COALESCE(MAX(CAST(json_extract(metadata, '$."rag.relevance_scores"[0]') AS DOUBLE)), 0) AS max_top_relevance,
COALESCE(AVG(CAST(json_extract(metadata, '$."rag.chunks_retrieved"') AS DOUBLE)), 0) AS avg_chunks_retrieved,
COALESCE(AVG(CAST(json_extract(metadata, '$."rag.chunks_used"') AS DOUBLE)), 0) AS avg_chunks_used,
COALESCE(AVG(
CASE WHEN CAST(json_extract(metadata, '$."rag.chunks_retrieved"') AS DOUBLE) > 0
THEN CAST(json_extract(metadata, '$."rag.chunks_used"') AS DOUBLE) * 100.0
/ CAST(json_extract(metadata, '$."rag.chunks_retrieved"') AS DOUBLE)
ELSE NULL END
), 0) AS chunk_utilization_pct
FROM bloop.llm_spans
WHERE started_at >= $1
AND ($2 IS NULL OR project_id = $2)
AND span_type = 'retrieval'
AND metadata IS NOT NULL
AND json_extract(metadata, '$."rag.chunks_retrieved"') IS NOT NULL
"#;
91 changes: 91 additions & 0 deletions src/llm_tracing/query_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -858,3 +858,94 @@ pub async fn update_settings(
content_storage: policy.as_str().to_string(),
}))
}

/// GET /v1/llm/rag
pub async fn rag(
State(state): State<Arc<LlmQueryState>>,
token_auth: Option<axum::Extension<TokenAuth>>,
Query(mut qp): Query<LlmQueryParams>,
) -> AppResult<Json<RagResponse>> {
qp.project_id = resolve_project(&token_auth, qp.project_id)?;
let hours = qp.hours();
let limit = qp.limit();
let now_ms = chrono::Utc::now().timestamp_millis();
let since = now_ms - (hours * 3_600_000);

// Sources query
let pid1 = qp.project_id.clone();
let sources = state
.conn
.query(move |conn: &duckdb::Connection| {
let mut stmt = conn.prepare(query::RAG_OVERVIEW_SQL)?;
let rows = stmt.query_map(params![since, pid1, limit], |row| {
Ok(RagSourceEntry {
retrieval_name: row.get(0)?,
source: row.get(1)?,
call_count: row.get(2)?,
avg_latency_ms: row.get(3)?,
p50_latency_ms: row.get(4)?,
p95_latency_ms: row.get(5)?,
error_count: row.get(6)?,
error_rate: row.get(7)?,
avg_chunks_retrieved: row.get(8)?,
avg_chunks_used: row.get(9)?,
avg_context_tokens: row.get(10)?,
avg_context_utilization_pct: row.get(11)?,
})
})?;
rows.collect::<Result<Vec<_>, _>>()
})
.await
.map_err(AppError::LlmTracing)?;

// Metrics query
let pid2 = qp.project_id.clone();
let metrics = state
.conn
.query(move |conn: &duckdb::Connection| {
let mut stmt = conn.prepare(query::RAG_METRICS_SQL)?;
let rows = stmt.query_map(params![since, pid2], |row| {
Ok(RagMetricsEntry {
hour_bucket: row.get(0)?,
retrieval_count: row.get(1)?,
avg_latency_ms: row.get(2)?,
error_count: row.get(3)?,
avg_chunks_retrieved: row.get(4)?,
avg_context_tokens: row.get(5)?,
avg_context_utilization_pct: row.get(6)?,
avg_top_k: row.get(7)?,
})
})?;
rows.collect::<Result<Vec<_>, _>>()
})
.await
.map_err(AppError::LlmTracing)?;

// Relevance query
let pid3 = qp.project_id.clone();
let relevance = state
.conn
.query(move |conn: &duckdb::Connection| {
let mut stmt = conn.prepare(query::RAG_RELEVANCE_SQL)?;
stmt.query_row(params![since, pid3], |row| {
Ok(RagRelevanceSummary {
total_retrievals: row.get(0)?,
avg_top_relevance: row.get(1)?,
min_top_relevance: row.get(2)?,
max_top_relevance: row.get(3)?,
avg_chunks_retrieved: row.get(4)?,
avg_chunks_used: row.get(5)?,
chunk_utilization_pct: row.get(6)?,
})
})
})
.await
.map_err(AppError::LlmTracing)?;

Ok(Json(RagResponse {
sources,
metrics,
relevance,
window_hours: hours,
}))
}
49 changes: 49 additions & 0 deletions src/llm_tracing/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -583,3 +583,52 @@ pub struct BudgetResponse {
pub days_elapsed: i64,
pub days_remaining: i64,
}

// ── RAG Types ──

#[derive(Debug, Serialize)]
pub struct RagSourceEntry {
pub retrieval_name: String,
pub source: String,
pub call_count: i64,
pub avg_latency_ms: f64,
pub p50_latency_ms: f64,
pub p95_latency_ms: f64,
pub error_count: i64,
pub error_rate: f64,
pub avg_chunks_retrieved: f64,
pub avg_chunks_used: f64,
pub avg_context_tokens: f64,
pub avg_context_utilization_pct: f64,
}

#[derive(Debug, Serialize)]
pub struct RagMetricsEntry {
pub hour_bucket: i64,
pub retrieval_count: i64,
pub avg_latency_ms: f64,
pub error_count: i64,
pub avg_chunks_retrieved: f64,
pub avg_context_tokens: f64,
pub avg_context_utilization_pct: f64,
pub avg_top_k: f64,
}

#[derive(Debug, Serialize)]
pub struct RagRelevanceSummary {
pub total_retrievals: i64,
pub avg_top_relevance: f64,
pub min_top_relevance: f64,
pub max_top_relevance: f64,
pub avg_chunks_retrieved: f64,
pub avg_chunks_used: f64,
pub chunk_utilization_pct: f64,
}

#[derive(Debug, Serialize)]
pub struct RagResponse {
pub sources: Vec<RagSourceEntry>,
pub metrics: Vec<RagMetricsEntry>,
pub relevance: RagRelevanceSummary,
pub window_hours: i64,
}
Loading
Loading