Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 49 additions & 1 deletion brain-bar/Sources/BrainBar/BrainDatabase.swift
Original file line number Diff line number Diff line change
Expand Up @@ -4548,13 +4548,61 @@ final class BrainDatabase: @unchecked Sendable {
"chunk_ids": columnText(stmt, 4) as Any,
"token_count": Int(sqlite3_column_int(stmt, 5))
]
if let event = try? InjectionEvent(row: row) {
if var event = try? InjectionEvent(row: row) {
let details = (try? injectionChunkDetails(ids: event.chunkIDs)) ?? []
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Batch chunk detail reads instead of querying per injection event

listInjectionEvents now calls injectionChunkDetails inside the row loop, which turns one feed refresh into N+1 SQL statements (1 + event_count). In this repo, InjectionStore polls every 250ms with a default limit of 50 events, so this path can execute ~51 queries per refresh and significantly increase read-path cost and lock pressure on the shared DB during normal UI use. Fetching all needed chunk IDs in one batched query would avoid this regression.

Useful? React with 👍 / 👎.

event = InjectionEvent(
id: event.id,
sessionID: event.sessionID,
timestamp: event.timestamp,
query: event.query,
chunkIDs: event.chunkIDs,
tokenCount: event.tokenCount,
chunks: details
)
events.append(event)
}
}
return events
}

private func injectionChunkDetails(ids: [String]) throws -> [InjectionChunk] {
guard let db else { throw DBError.notOpen }
let orderedIDs = ids.filter { !$0.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty }
guard !orderedIDs.isEmpty else { return [] }

let placeholders = Array(repeating: "?", count: orderedIDs.count).joined(separator: ",")
let sql = """
SELECT id, content, summary, source, source_file, tags, content_type
FROM chunks
WHERE id IN (\(placeholders))
"""
var stmt: OpaquePointer?
guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else {
throw DBError.prepare(sqlite3_errcode(db))
}
defer { sqlite3_finalize(stmt) }

for (offset, id) in orderedIDs.enumerated() {
bindText(id, to: stmt, index: Int32(offset + 1))
}

var detailsByID: [String: InjectionChunk] = [:]
while sqlite3_step(stmt) == SQLITE_ROW {
let row: [String: Any] = [
"id": columnText(stmt, 0) as Any,
"content": columnText(stmt, 1) as Any,
"summary": columnText(stmt, 2) as Any,
"source": columnText(stmt, 3) as Any,
"source_file": columnText(stmt, 4) as Any,
"tags": columnText(stmt, 5) as Any,
"content_type": columnText(stmt, 6) as Any,
]
let detail = InjectionChunk(row: row)
detailsByID[detail.id] = detail
}
return orderedIDs.compactMap { detailsByID[$0] }
}
Comment on lines +4568 to +4604
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

SQL variable limit could silently drop chunk metadata for high-cardinality events.

SQLite's default SQLITE_MAX_VARIABLE_NUMBER is 999. For events with ≥999 chunkIDs, sqlite3_prepare_v2 will fail and the caller's try? fallback returns empty chunks—silently degrading the event.

Consider batching queries (e.g., 500 IDs per batch) to avoid hitting the limit.

♻️ Suggested batching approach
 private func injectionChunkDetails(ids: [String]) throws -> [InjectionChunk] {
     guard let db else { throw DBError.notOpen }
     let orderedIDs = ids.filter { !$0.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty }
     guard !orderedIDs.isEmpty else { return [] }

+    let batchSize = 500
+    var detailsByID: [String: InjectionChunk] = [:]
+
+    for batchStart in stride(from: 0, to: orderedIDs.count, by: batchSize) {
+        let batchEnd = min(batchStart + batchSize, orderedIDs.count)
+        let batch = Array(orderedIDs[batchStart..<batchEnd])
+        let placeholders = Array(repeating: "?", count: batch.count).joined(separator: ",")
+        let sql = """
+            SELECT id, content, summary, source, source_file, tags, content_type
+            FROM chunks
+            WHERE id IN (\(placeholders))
+        """
+        var stmt: OpaquePointer?
+        guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else {
+            throw DBError.prepare(sqlite3_errcode(db))
+        }
+        defer { sqlite3_finalize(stmt) }
+
+        for (offset, id) in batch.enumerated() {
+            bindText(id, to: stmt, index: Int32(offset + 1))
+        }
+
+        while sqlite3_step(stmt) == SQLITE_ROW {
+            let row: [String: Any] = [
+                "id": columnText(stmt, 0) as Any,
+                "content": columnText(stmt, 1) as Any,
+                "summary": columnText(stmt, 2) as Any,
+                "source": columnText(stmt, 3) as Any,
+                "source_file": columnText(stmt, 4) as Any,
+                "tags": columnText(stmt, 5) as Any,
+                "content_type": columnText(stmt, 6) as Any,
+            ]
+            let detail = InjectionChunk(row: row)
+            detailsByID[detail.id] = detail
+        }
+    }
+
+    return orderedIDs.compactMap { detailsByID[$0] }
-    let placeholders = Array(repeating: "?", count: orderedIDs.count).joined(separator: ",")
-    // ... rest of current implementation
 }
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@brain-bar/Sources/BrainBar/BrainDatabase.swift` around lines 4568 - 4604,
injectionChunkDetails currently builds one IN(...) statement which will fail
silently for large id arrays due to SQLite's SQLITE_MAX_VARIABLE_NUMBER limit;
modify injectionChunkDetails to batch orderedIDs into smaller chunks (e.g., 500
IDs per batch) and for each batch build the placeholders, call
sqlite3_prepare_v2, bindText for that batch, step through rows to populate
detailsByID, finalize the statement, then continue with the next batch; after
all batches are processed, return orderedIDs.compactMap { detailsByID[$0] } so
order is preserved and no metadata is dropped when the variable limit is
reached.


// MARK: - brain_digest: rule-based entity extraction

func digest(content: String) throws -> [String: Any] {
Expand Down
263 changes: 261 additions & 2 deletions brain-bar/Sources/BrainBar/InjectionEvent.swift
Comment thread
macroscopeapp[bot] marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -1,15 +1,266 @@
import Foundation

struct InjectionChunk: Equatable, Sendable, Identifiable {
let id: String
let content: String
let summary: String
let source: String
let sourceFile: String
let tags: [String]
let contentType: String

var displayText: String {
let preferred = summary.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty
? content
: summary
return Self.elide(preferred, limit: 80)
}

var kind: InjectionKind {
InjectionKind.classify(source: source, sourceFile: sourceFile, tags: tags, content: content)
}

init(
id: String,
content: String,
summary: String,
source: String,
sourceFile: String,
tags: [String],
contentType: String
) {
self.id = id
self.content = content
self.summary = summary
self.source = source
self.sourceFile = sourceFile
self.tags = tags
self.contentType = contentType
}

init(row: [String: Any]) {
id = row["id"] as? String ?? ""
content = row["content"] as? String ?? ""
summary = row["summary"] as? String ?? ""
source = row["source"] as? String ?? ""
sourceFile = row["source_file"] as? String ?? ""
contentType = row["content_type"] as? String ?? ""
tags = InjectionChunk.decodeTags(row["tags"])
}

static func elide(_ text: String, limit: Int) -> String {
let collapsed = text
.trimmingCharacters(in: .whitespacesAndNewlines)
.components(separatedBy: .whitespacesAndNewlines)
.filter { !$0.isEmpty }
.joined(separator: " ")
guard collapsed.count > limit else { return collapsed }
return "\(collapsed.prefix(max(limit - 1, 1)))…"
}

private static func decodeTags(_ value: Any?) -> [String] {
if let tags = value as? [String] {
return tags
}
guard let text = value as? String,
let data = text.data(using: .utf8),
let decoded = try? JSONSerialization.jsonObject(with: data) as? [String] else {
return []
}
return decoded
}
}

enum InjectionKind: String, CaseIterable, Equatable, Sendable {
case memoryCheckpoint
case realtimeCapture
case storedMemory
case dailyDigest
case videoKnowledge
case chat
case toolSession
case quickCapture
case checkpoint
case other

var glyph: String {
switch self {
case .memoryCheckpoint: return "🧠"
case .realtimeCapture: return "💬"
case .storedMemory: return "📝"
case .dailyDigest: return "🌅"
case .videoKnowledge: return "🎬"
case .chat: return "📱"
case .toolSession: return "🛠"
case .quickCapture: return "⚡"
case .checkpoint: return "🏷"
case .other: return "📄"
}
}

var label: String {
switch self {
case .memoryCheckpoint: return "Memory Checkpoint"
case .realtimeCapture: return "Realtime Capture"
case .storedMemory: return "Stored Memory"
case .dailyDigest: return "Daily Digest"
case .videoKnowledge: return "Video Knowledge"
case .chat: return "Chat"
case .toolSession: return "Tool Session"
case .quickCapture: return "Quick Capture"
case .checkpoint: return "Checkpoint"
case .other: return "Other"
}
}

var modalTitle: String {
switch self {
case .memoryCheckpoint, .checkpoint:
return "Memory Checkpoint"
default:
return label
}
}

var paletteIndex: Int {
Self.allCases.firstIndex(of: self) ?? 0
}

static func classify(source: String, sourceFile: String, tags: [String], content: String) -> InjectionKind {
let normalizedSource = source.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
let normalizedFile = sourceFile.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
let normalizedTags = tags.map { $0.lowercased() }
let normalizedContent = content.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()

if normalizedTags.contains(where: { $0.contains("pr-merge") }) ||
normalizedContent.hasPrefix("[checkpoint]") {
return .checkpoint
}

if normalizedSource == "precompact-hook" || normalizedFile.hasPrefix("precompact:") {
return .memoryCheckpoint
}
if normalizedSource == "realtime_watcher" || normalizedSource == "claude_code" {
return .realtimeCapture
}
if normalizedSource == "mcp" || normalizedSource == "brain_store" || normalizedSource == "manual" {
return .storedMemory
}
if normalizedSource == "digest" {
return .dailyDigest
}
if normalizedSource == "youtube" {
return .videoKnowledge
}
if normalizedSource == "whatsapp" {
return .chat
}
if ["cursor", "codex_cli", "codex"].contains(normalizedSource) {
return .toolSession
Comment on lines +158 to +159
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Include cursor_cli and gemini_cli in tool-session mapping

InjectionKind.classify only treats cursor, codex_cli, and codex as tool sessions, so chunks from the canonical CLI sources cursor_cli and gemini_cli fall through to .other. In practice this makes Tool-filtered views miss/mislabel real tool-origin injections (these source names are already first-class in src/brainlayer/pipeline/enrichment_tiers.py), which undermines the new type chips and modal labeling for common data.

Useful? React with 👍 / 👎.

Comment on lines +155 to +159
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Map known source values to the intended injection kinds

InjectionKind.classify currently recognizes chat only for whatsapp and tool sessions only for cursor/codex_cli/codex, so chunks from other known first-class sources (for example telegram, cursor_cli, gemini_cli) will be classified as .other. That causes the new type chips and labels to silently mis-filter/mislabel real events whenever those sources appear in chunk.source, even though the broader codebase already treats them as explicit source categories.

Useful? React with 👍 / 👎.

Comment on lines +158 to +159
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Map cursor_cli and gemini_cli sources to tool sessions

Tool-session classification only matches cursor, codex_cli, and codex, so chunks with source cursor_cli or gemini_cli are classified as .other and won't appear under the Tool filter despite being agent/tool sources in this codebase (see src/brainlayer/pipeline/enrichment_tiers.py). This causes incorrect type chips, legends, and modal titles for those injections.

Useful? React with 👍 / 👎.

}
if normalizedSource == "quick-capture" {
return .quickCapture
}
return .other
}
}

enum InjectionTypeFilter: String, CaseIterable, Equatable, Sendable {
case all
case memory
case stored
case realtime
case checkpoint
case video
case chat
case tool

var label: String {
switch self {
case .all: return "All"
case .memory: return "Memory"
case .stored: return "Stored"
case .realtime: return "Realtime"
case .checkpoint: return "Checkpoint"
case .video: return "Video"
case .chat: return "Chat"
case .tool: return "Tool"
}
}

func contains(_ kind: InjectionKind) -> Bool {
switch self {
case .all:
return true
case .memory:
return kind == .memoryCheckpoint || kind == .dailyDigest
case .stored:
return kind == .storedMemory || kind == .quickCapture
case .realtime:
return kind == .realtimeCapture
case .checkpoint:
return kind == .checkpoint || kind == .memoryCheckpoint
case .video:
return kind == .videoKnowledge
case .chat:
return kind == .chat
case .tool:
return kind == .toolSession
}
}
}

struct InjectionEvent: Equatable, Identifiable, Sendable {
let id: Int64
let sessionID: String
let timestamp: String
let query: String
let chunkIDs: [String]
let tokenCount: Int
let chunks: [InjectionChunk]

var chunkCount: Int { chunkIDs.count }

var primaryChunk: InjectionChunk? {
guard let firstChunkID = chunkIDs.first else { return chunks.first }
return chunks.first { $0.id == firstChunkID }
}

var primaryKind: InjectionKind {
primaryChunk?.kind ?? .other
}

var allKinds: [InjectionKind] {
let kinds = chunks.map(\.kind)
return kinds.isEmpty ? [.other] : kinds
}

func matches(typeFilter: InjectionTypeFilter) -> Bool {
if chunks.isEmpty, !chunkIDs.isEmpty, typeFilter != .all {
return true
}
return allKinds.contains { typeFilter.contains($0) }
}

var displayTitle: String {
if let chunk = primaryChunk, !chunk.displayText.isEmpty {
return chunk.displayText
}
return InjectionChunk.elide(query, limit: 80)
}
Comment thread
cursor[bot] marked this conversation as resolved.

var triggeredByText: String {
"Triggered by: \(InjectionChunk.elide(query, limit: 96))"
}

var modalTitle: String {
primaryKind.modalTitle
}

func openingModalTitle(forChunkID chunkID: String) -> String {
chunks.first { $0.id == chunkID }?.kind.modalTitle ?? "Conversation"
}

var summaryLine: String {
"\(query) • \(chunkCount) chunks • \(tokenCount) tok"
}
Expand All @@ -20,14 +271,16 @@ struct InjectionEvent: Equatable, Identifiable, Sendable {
timestamp: String,
query: String,
chunkIDs: [String],
tokenCount: Int
tokenCount: Int,
chunks: [InjectionChunk] = []
) {
self.id = id
self.sessionID = sessionID
self.timestamp = timestamp
self.query = query
self.chunkIDs = chunkIDs
self.tokenCount = tokenCount
self.chunks = chunks
}

init(row: [String: Any]) throws {
Expand All @@ -47,10 +300,16 @@ struct InjectionEvent: Equatable, Identifiable, Sendable {
self.chunkIDs = rawChunkIDs
} else if let text = row["chunk_ids"] as? String,
let data = text.data(using: .utf8),
let decoded = try JSONSerialization.jsonObject(with: data) as? [String] {
let decoded = try? JSONSerialization.jsonObject(with: data) as? [String] {
self.chunkIDs = decoded
} else {
self.chunkIDs = []
}

if let rawChunks = row["chunks"] as? [[String: Any]] {
chunks = rawChunks.map(InjectionChunk.init(row:))
} else {
chunks = []
}
}
}
Loading
Loading