From e2f9057755374b155b7595e584927dde3bd231fd Mon Sep 17 00:00:00 2001 From: Etan Joseph Heyman Date: Tue, 26 May 2026 19:33:30 +0300 Subject: [PATCH 1/2] feat(brainbar): aggregate entity aliases in graph --- .../Sources/BrainBar/BrainDatabase.swift | 164 +++++++++++++++--- .../BrainBar/KnowledgeGraph/KGNode.swift | 10 +- .../BrainBar/KnowledgeGraph/KGViewModel.swift | 1 + .../BrainBarTests/KnowledgeGraphTests.swift | 125 +++++++++++++ 4 files changed, 274 insertions(+), 26 deletions(-) diff --git a/brain-bar/Sources/BrainBar/BrainDatabase.swift b/brain-bar/Sources/BrainBar/BrainDatabase.swift index cefc2261..69e38fd1 100644 --- a/brain-bar/Sources/BrainBar/BrainDatabase.swift +++ b/brain-bar/Sources/BrainBar/BrainDatabase.swift @@ -3552,6 +3552,23 @@ final class BrainDatabase: @unchecked Sendable { let entityType: String let description: String? let importance: Double + let linkedChunkCount: Int + + init( + id: String, + name: String, + entityType: String, + description: String?, + importance: Double, + linkedChunkCount: Int = 0 + ) { + self.id = id + self.name = name + self.entityType = entityType + self.description = description + self.importance = importance + self.linkedChunkCount = linkedChunkCount + } } struct KGRelationRow: Equatable, Sendable { @@ -3661,12 +3678,14 @@ final class BrainDatabase: @unchecked Sendable { var rows: [KGEntityRow] = [] while sqlite3_step(stmt) == SQLITE_ROW { + let id = columnText(stmt, 0) ?? "" rows.append(KGEntityRow( - id: columnText(stmt, 0) ?? "", + id: id, name: columnText(stmt, 1) ?? "", entityType: columnText(stmt, 2) ?? "", description: columnText(stmt, 3), - importance: sqlite3_column_double(stmt, 4) + importance: sqlite3_column_double(stmt, 4), + linkedChunkCount: try fetchEntityChunkCount(entityId: id) )) } return rows @@ -3733,52 +3752,62 @@ final class BrainDatabase: @unchecked Sendable { } func fetchEntityChunkCount(entityId: String) throws -> Int { + let entityIds = try entityAliasGroupIDs(entityId: entityId) let sql = """ - SELECT COUNT(*) + SELECT COUNT(DISTINCT ec.chunk_id) FROM kg_entity_chunks ec JOIN chunks c ON c.id = ec.chunk_id - WHERE ec.entity_id = ? + WHERE ec.entity_id IN (\(placeholders(count: entityIds.count))) """ - return try fetchCount(sql: sql, entityId: entityId) + return try fetchCount(sql: sql, entityIds: entityIds) } func fetchEntitySourceFileCount(entityId: String) throws -> Int { guard let db else { throw DBError.notOpen } + let entityIds = try entityAliasGroupIDs(entityId: entityId) let sql = """ SELECT COUNT(DISTINCT c.source_file) FROM kg_entity_chunks ec JOIN chunks c ON c.id = ec.chunk_id - WHERE ec.entity_id = ? + WHERE ec.entity_id IN (\(placeholders(count: entityIds.count))) AND NULLIF(c.source_file, '') IS NOT NULL """ - return try fetchCount(sql: sql, entityId: entityId, db: db) + return try fetchCount(sql: sql, entityIds: entityIds, db: db) } func fetchEntityChunksPage(entityId: String, after: ChunkCursor?, limit: Int) throws -> ChunkPage { guard let db else { throw DBError.notOpen } let pageLimit = max(0, limit) guard pageLimit > 0 else { return ChunkPage(rows: [], nextCursor: nil) } + let entityIds = try entityAliasGroupIDs(entityId: entityId) let cursorPredicate: String if after == nil { cursorPredicate = "" } else { cursorPredicate = """ - AND ( - ec.relevance < ? - OR (ec.relevance = ? AND COALESCE(c.created_at, '') < ?) - OR (ec.relevance = ? AND COALESCE(c.created_at, '') = ? AND c.id < ?) + WHERE ( + relevance < ? + OR (relevance = ? AND created_at < ?) + OR (relevance = ? AND created_at = ? AND chunk_id < ?) ) """ } let sql = """ - SELECT c.id, COALESCE(NULLIF(c.summary, ''), substr(c.content, 1, 200)) AS snippet, - c.importance, ec.relevance, COALESCE(c.created_at, '') AS created_at - FROM kg_entity_chunks ec - JOIN chunks c ON c.id = ec.chunk_id - WHERE ec.entity_id = ? + SELECT chunk_id, snippet, importance, relevance, created_at + FROM ( + SELECT c.id AS chunk_id, + COALESCE(NULLIF(c.summary, ''), substr(c.content, 1, 200)) AS snippet, + c.importance AS importance, + MAX(ec.relevance) AS relevance, + COALESCE(c.created_at, '') AS created_at + FROM kg_entity_chunks ec + JOIN chunks c ON c.id = ec.chunk_id + WHERE ec.entity_id IN (\(placeholders(count: entityIds.count))) + GROUP BY c.id + ) \(cursorPredicate) - ORDER BY ec.relevance DESC, COALESCE(c.created_at, '') DESC, c.id DESC + ORDER BY relevance DESC, created_at DESC, chunk_id DESC LIMIT ? """ var stmt: OpaquePointer? @@ -3786,8 +3815,7 @@ final class BrainDatabase: @unchecked Sendable { throw DBError.prepare(sqlite3_errcode(db)) } defer { sqlite3_finalize(stmt) } - bindText(entityId, to: stmt, index: 1) - var bindIndex: Int32 = 2 + var bindIndex = bindEntityIDs(entityIds, to: stmt, startingAt: 1) if let after { sqlite3_bind_double(stmt, bindIndex, after.relevance) bindIndex += 1 @@ -3832,6 +3860,7 @@ final class BrainDatabase: @unchecked Sendable { guard let db else { throw DBError.notOpen } let pageLimit = max(0, limit) guard pageLimit > 0 else { return SourceFilePage(rows: [], nextCursor: nil) } + let entityIds = try entityAliasGroupIDs(entityId: entityId) let cursorPredicate: String if after == nil { @@ -3849,11 +3878,11 @@ final class BrainDatabase: @unchecked Sendable { SELECT source_file, chunk_count, top_relevance FROM ( SELECT c.source_file AS source_file, - COUNT(*) AS chunk_count, + COUNT(DISTINCT c.id) AS chunk_count, MAX(ec.relevance) AS top_relevance FROM kg_entity_chunks ec JOIN chunks c ON c.id = ec.chunk_id - WHERE ec.entity_id = ? + WHERE ec.entity_id IN (\(placeholders(count: entityIds.count))) AND NULLIF(c.source_file, '') IS NOT NULL GROUP BY c.source_file ) @@ -3866,8 +3895,7 @@ final class BrainDatabase: @unchecked Sendable { throw DBError.prepare(sqlite3_errcode(db)) } defer { sqlite3_finalize(stmt) } - bindText(entityId, to: stmt, index: 1) - var bindIndex: Int32 = 2 + var bindIndex = bindEntityIDs(entityIds, to: stmt, startingAt: 1) if let after { sqlite3_bind_double(stmt, bindIndex, after.topRelevance) bindIndex += 1 @@ -3920,13 +3948,17 @@ final class BrainDatabase: @unchecked Sendable { } private func fetchCount(sql: String, entityId: String, db providedDB: OpaquePointer? = nil) throws -> Int { + try fetchCount(sql: sql, entityIds: [entityId], db: providedDB) + } + + private func fetchCount(sql: String, entityIds: [String], db providedDB: OpaquePointer? = nil) throws -> Int { guard let db = providedDB ?? self.db else { throw DBError.notOpen } var stmt: OpaquePointer? guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { throw DBError.prepare(sqlite3_errcode(db)) } defer { sqlite3_finalize(stmt) } - bindText(entityId, to: stmt, index: 1) + _ = bindEntityIDs(entityIds, to: stmt, startingAt: 1) let stepRC = sqlite3_step(stmt) guard stepRC == SQLITE_ROW else { throw DBError.step(stepRC) @@ -3934,6 +3966,90 @@ final class BrainDatabase: @unchecked Sendable { return Int(sqlite3_column_int(stmt, 0)) } + private func entityAliasGroupIDs(entityId: String) throws -> [String] { + guard let db else { throw DBError.notOpen } + let entitySQL = "SELECT name, entity_type FROM kg_entities WHERE id = ?" + var entityStmt: OpaquePointer? + guard sqlite3_prepare_v2(db, entitySQL, -1, &entityStmt, nil) == SQLITE_OK else { + throw DBError.prepare(sqlite3_errcode(db)) + } + defer { sqlite3_finalize(entityStmt) } + bindText(entityId, to: entityStmt, index: 1) + guard sqlite3_step(entityStmt) == SQLITE_ROW else { + return [entityId] + } + let selectedName = columnText(entityStmt, 0) ?? "" + let selectedType = columnText(entityStmt, 1) ?? "" + + var orderedIds = [entityId] + var seen = Set(orderedIds) + func append(_ id: String) { + guard !id.isEmpty, !seen.contains(id) else { return } + seen.insert(id) + orderedIds.append(id) + } + + let canonicalSQL = """ + SELECT DISTINCT a.entity_id + FROM kg_entity_aliases a + JOIN kg_entities canonical ON canonical.id = a.entity_id + WHERE lower(a.alias) = lower(?) + AND canonical.entity_type = ? + ORDER BY a.entity_id + """ + var canonicalStmt: OpaquePointer? + guard sqlite3_prepare_v2(db, canonicalSQL, -1, &canonicalStmt, nil) == SQLITE_OK else { + throw DBError.prepare(sqlite3_errcode(db)) + } + defer { sqlite3_finalize(canonicalStmt) } + bindText(selectedName, to: canonicalStmt, index: 1) + bindText(selectedType, to: canonicalStmt, index: 2) + var canonicalMatches: [String] = [] + while sqlite3_step(canonicalStmt) == SQLITE_ROW { + if let id = columnText(canonicalStmt, 0), !id.isEmpty { + canonicalMatches.append(id) + } + } + if canonicalMatches.count == 1 { + append(canonicalMatches[0]) + } + + let aliasEntitySQL = """ + SELECT DISTINCT aliasEntity.id + FROM kg_entity_aliases a + JOIN kg_entities canonical ON canonical.id = a.entity_id + JOIN kg_entities aliasEntity ON lower(aliasEntity.name) = lower(a.alias) + WHERE a.entity_id IN (\(placeholders(count: orderedIds.count))) + AND aliasEntity.entity_type = canonical.entity_type + ORDER BY aliasEntity.id + """ + var aliasStmt: OpaquePointer? + guard sqlite3_prepare_v2(db, aliasEntitySQL, -1, &aliasStmt, nil) == SQLITE_OK else { + throw DBError.prepare(sqlite3_errcode(db)) + } + defer { sqlite3_finalize(aliasStmt) } + _ = bindEntityIDs(orderedIds, to: aliasStmt, startingAt: 1) + while sqlite3_step(aliasStmt) == SQLITE_ROW { + append(columnText(aliasStmt, 0) ?? "") + } + + return orderedIds + } + + private func placeholders(count: Int) -> String { + Array(repeating: "?", count: max(1, count)).joined(separator: ", ") + } + + @discardableResult + private func bindEntityIDs(_ entityIds: [String], to stmt: OpaquePointer?, startingAt index: Int32) -> Int32 { + var bindIndex = index + for entityId in entityIds { + bindText(entityId, to: stmt, index: bindIndex) + bindIndex += 1 + } + return bindIndex + } + func getChunk(id: String) throws -> [String: Any]? { guard let db else { throw DBError.notOpen } let sql = """ diff --git a/brain-bar/Sources/BrainBar/KnowledgeGraph/KGNode.swift b/brain-bar/Sources/BrainBar/KnowledgeGraph/KGNode.swift index 28f5313a..608f627b 100644 --- a/brain-bar/Sources/BrainBar/KnowledgeGraph/KGNode.swift +++ b/brain-bar/Sources/BrainBar/KnowledgeGraph/KGNode.swift @@ -5,13 +5,17 @@ struct KGNode: Identifiable, Equatable, Sendable { let name: String let entityType: String let importance: Double + let linkedChunkCount: Int var position: CGPoint var velocity: CGVector - /// Radius scales with importance (min 8, max 28) + /// Radius scales with importance and linked evidence density. var radius: CGFloat { - CGFloat(8 + (importance / 10.0) * 20) + let clampedImportance = max(0, min(10, importance)) + let importanceBoost = CGFloat((clampedImportance / 10.0) * 20) + let evidenceBoost = min(max(CGFloat(linkedChunkCount), 0) / 10.0, 8) + return 8 + importanceBoost + evidenceBoost } var color: Color { @@ -33,6 +37,7 @@ struct KGNode: Identifiable, Equatable, Sendable { name: String, entityType: String, importance: Double, + linkedChunkCount: Int = 0, position: CGPoint? = nil, velocity: CGVector = .zero ) { @@ -40,6 +45,7 @@ struct KGNode: Identifiable, Equatable, Sendable { self.name = name self.entityType = entityType self.importance = importance + self.linkedChunkCount = linkedChunkCount self.position = position ?? CGPoint( x: CGFloat.random(in: 100...500), y: CGFloat.random(in: 100...400) diff --git a/brain-bar/Sources/BrainBar/KnowledgeGraph/KGViewModel.swift b/brain-bar/Sources/BrainBar/KnowledgeGraph/KGViewModel.swift index 7cbd06e6..27728800 100644 --- a/brain-bar/Sources/BrainBar/KnowledgeGraph/KGViewModel.swift +++ b/brain-bar/Sources/BrainBar/KnowledgeGraph/KGViewModel.swift @@ -179,6 +179,7 @@ final class KGViewModel: ObservableObject { name: row.name, entityType: row.entityType, importance: row.importance, + linkedChunkCount: row.linkedChunkCount, position: existingNode?.position, velocity: existingNode?.velocity ?? .zero ) diff --git a/brain-bar/Tests/BrainBarTests/KnowledgeGraphTests.swift b/brain-bar/Tests/BrainBarTests/KnowledgeGraphTests.swift index 22c6a559..3f9ab918 100644 --- a/brain-bar/Tests/BrainBarTests/KnowledgeGraphTests.swift +++ b/brain-bar/Tests/BrainBarTests/KnowledgeGraphTests.swift @@ -125,6 +125,35 @@ final class KGDatabaseTests: XCTestCase { XCTAssertEqual(entities.last?.importance, 2.0) } + func testFetchKGEntitiesAggregatesAliasGroupChunkCount() throws { + try db.insertEntity(id: "person-etan-heyman", type: "person", name: "Etan Heyman") + try db.insertEntity(id: "person-etan", type: "person", name: "Etan") + try db.insertEntity(id: "project-brainlayer", type: "project", name: "BrainLayer") + try db.insertRelation(sourceId: "person-etan-heyman", targetId: "project-brainlayer", relationType: "builds") + try insertAlias(alias: "Etan", entityId: "person-etan-heyman") + try insertLinkedChunk( + id: "canonical-chunk", + entityId: "person-etan-heyman", + content: "Canonical Etan chunk", + sourceFile: "/tmp/canonical.md", + createdAt: "2026-05-24T12:00:00Z", + relevance: 0.9 + ) + try insertLinkedChunk( + id: "alias-chunk", + entityId: "person-etan", + content: "Alias Etan chunk", + sourceFile: "/tmp/alias.md", + createdAt: "2026-05-24T12:01:00Z", + relevance: 0.8 + ) + + let entities = try db.fetchKGEntities() + let etan = try XCTUnwrap(entities.first { $0.id == "person-etan-heyman" }) + + XCTAssertEqual(etan.linkedChunkCount, 2) + } + func testFetchKGEntitiesEmptyDB() throws { let entities = try db.fetchKGEntities() XCTAssertTrue(entities.isEmpty) @@ -365,6 +394,73 @@ final class KGDatabaseTests: XCTestCase { XCTAssertNil(page.nextCursor) } + func testFetchEntitySidebarCountsAndPagesAggregateAliasGroup() throws { + try db.insertEntity(id: "person-etan-heyman", type: "person", name: "Etan Heyman") + try db.insertEntity(id: "person-etan", type: "person", name: "Etan") + try insertAlias(alias: "Etan", entityId: "person-etan-heyman") + try insertLinkedChunk( + id: "canonical", + entityId: "person-etan-heyman", + content: "Canonical chunk", + sourceFile: "/tmp/canonical.md", + createdAt: "2026-05-24T12:00:00Z", + relevance: 0.9 + ) + try insertLinkedChunk( + id: "alias", + entityId: "person-etan", + content: "Alias chunk", + sourceFile: "/tmp/alias.md", + createdAt: "2026-05-24T12:01:00Z", + relevance: 0.8 + ) + + let page = try db.fetchEntityChunksPage(entityId: "person-etan-heyman", after: nil, limit: 10) + let files = try db.fetchEntitySourceFiles(entityId: "person-etan-heyman", limit: 10, after: nil) + + XCTAssertEqual(try db.fetchEntityChunkCount(entityId: "person-etan-heyman"), 2) + XCTAssertEqual(try db.fetchEntitySourceFileCount(entityId: "person-etan-heyman"), 2) + XCTAssertEqual(page.rows.map(\.chunkID), ["canonical", "alias"]) + XCTAssertEqual(files.rows.map(\.sourceFile), ["/tmp/canonical.md", "/tmp/alias.md"]) + } + + func testAliasAggregationDoesNotMergeAmbiguousAliasSurfaces() throws { + try db.insertEntity(id: "person-alex", type: "person", name: "Alex") + try db.insertEntity(id: "person-alex-one", type: "person", name: "Alex One") + try db.insertEntity(id: "person-alex-two", type: "person", name: "Alex Two") + try insertAlias(alias: "Alex", entityId: "person-alex-one") + try insertAlias(alias: "Alex", entityId: "person-alex-two") + try insertLinkedChunk( + id: "alias-only", + entityId: "person-alex", + content: "Alias row chunk", + sourceFile: "/tmp/alex.md", + createdAt: "2026-05-24T12:00:00Z", + relevance: 0.9 + ) + try insertLinkedChunk( + id: "canonical-one", + entityId: "person-alex-one", + content: "First canonical chunk", + sourceFile: "/tmp/one.md", + createdAt: "2026-05-24T12:01:00Z", + relevance: 0.8 + ) + try insertLinkedChunk( + id: "canonical-two", + entityId: "person-alex-two", + content: "Second canonical chunk", + sourceFile: "/tmp/two.md", + createdAt: "2026-05-24T12:02:00Z", + relevance: 0.7 + ) + + let page = try db.fetchEntityChunksPage(entityId: "person-alex", after: nil, limit: 10) + + XCTAssertEqual(try db.fetchEntityChunkCount(entityId: "person-alex"), 1) + XCTAssertEqual(page.rows.map(\.chunkID), ["alias-only"]) + } + func testFetchEntitySourceFileCountReturnsDistinctFiles() throws { try db.insertEntity(id: "e1", type: "person", name: "Alice") for i in 0..<5 { @@ -564,6 +660,21 @@ final class KGDatabaseTests: XCTestCase { try db.linkEntityChunk(entityId: entityId, chunkId: id, relevance: relevance) } + private func insertAlias(alias: String, entityId: String) throws { + guard let handle = db.dbHandle else { + XCTFail("Expected database handle") + return + } + let sql = "INSERT INTO kg_entity_aliases (alias, entity_id) VALUES (?, ?)" + var stmt: OpaquePointer? + XCTAssertEqual(sqlite3_prepare_v2(handle, sql, -1, &stmt, nil), SQLITE_OK) + defer { sqlite3_finalize(stmt) } + let transient = unsafeBitCast(-1, to: sqlite3_destructor_type.self) + sqlite3_bind_text(stmt, 1, alias, -1, transient) + sqlite3_bind_text(stmt, 2, entityId, -1, transient) + XCTAssertEqual(sqlite3_step(stmt), SQLITE_DONE) + } + private func updateChunk(id: String, sourceFile: String, createdAt: String) throws { guard let handle = db.dbHandle else { XCTFail("Expected database handle") @@ -664,6 +775,20 @@ final class KGModelTests: XCTestCase { XCTAssertGreaterThan(high.radius, low.radius) } + func testKGNodeRadiusScalesWithAliasAggregatedChunkCount() { + let sparse = KGNode(id: "sparse", name: "Sparse", entityType: "person", importance: 5.0, linkedChunkCount: 0) + let dense = KGNode(id: "dense", name: "Dense", entityType: "person", importance: 5.0, linkedChunkCount: 100) + + XCTAssertGreaterThan(dense.radius, sparse.radius) + } + + func testKGNodeRadiusIgnoresNegativeLinkedChunkCount() { + let zero = KGNode(id: "zero", name: "Zero", entityType: "person", importance: 5.0, linkedChunkCount: 0) + let negative = KGNode(id: "negative", name: "Negative", entityType: "person", importance: 5.0, linkedChunkCount: -10) + + XCTAssertEqual(negative.radius, zero.radius) + } + func testKGEdgeProperties() { let edge = KGEdge(sourceId: "a", targetId: "b", relationType: "builds") XCTAssertEqual(edge.sourceId, "a") From 4727818d3d7df855ded0e191147df0988de53346 Mon Sep 17 00:00:00 2001 From: Etan Joseph Heyman Date: Tue, 26 May 2026 19:49:19 +0300 Subject: [PATCH 2/2] fix(brainbar): tighten alias aggregation --- .../Sources/BrainBar/BrainDatabase.swift | 142 +++++++++++++----- .../BrainBarTests/KnowledgeGraphTests.swift | 27 ++++ 2 files changed, 132 insertions(+), 37 deletions(-) diff --git a/brain-bar/Sources/BrainBar/BrainDatabase.swift b/brain-bar/Sources/BrainBar/BrainDatabase.swift index 69e38fd1..2a35eeea 100644 --- a/brain-bar/Sources/BrainBar/BrainDatabase.swift +++ b/brain-bar/Sources/BrainBar/BrainDatabase.swift @@ -3571,6 +3571,12 @@ final class BrainDatabase: @unchecked Sendable { } } + private struct KGEntityAliasSeed { + let id: String + let name: String + let entityType: String + } + struct KGRelationRow: Equatable, Sendable { let id: String let sourceId: String @@ -3676,17 +3682,28 @@ final class BrainDatabase: @unchecked Sendable { defer { sqlite3_finalize(stmt) } sqlite3_bind_int(stmt, 1, Int32(limit)) - var rows: [KGEntityRow] = [] + var seeds: [KGEntityAliasSeed] = [] + var descriptions: [String: String?] = [:] + var importances: [String: Double] = [:] while sqlite3_step(stmt) == SQLITE_ROW { let id = columnText(stmt, 0) ?? "" - rows.append(KGEntityRow( - id: id, - name: columnText(stmt, 1) ?? "", - entityType: columnText(stmt, 2) ?? "", - description: columnText(stmt, 3), - importance: sqlite3_column_double(stmt, 4), - linkedChunkCount: try fetchEntityChunkCount(entityId: id) - )) + let name = columnText(stmt, 1) ?? "" + let entityType = columnText(stmt, 2) ?? "" + seeds.append(KGEntityAliasSeed(id: id, name: name, entityType: entityType)) + descriptions[id] = columnText(stmt, 3) + importances[id] = sqlite3_column_double(stmt, 4) + } + + let linkedChunkCounts = try fetchLinkedChunkCounts(for: seeds) + let rows = seeds.map { seed in + KGEntityRow( + id: seed.id, + name: seed.name, + entityType: seed.entityType, + description: descriptions[seed.id] ?? nil, + importance: importances[seed.id] ?? 5.0, + linkedChunkCount: linkedChunkCounts[seed.id, default: 0] + ) } return rows } @@ -3978,9 +3995,6 @@ final class BrainDatabase: @unchecked Sendable { guard sqlite3_step(entityStmt) == SQLITE_ROW else { return [entityId] } - let selectedName = columnText(entityStmt, 0) ?? "" - let selectedType = columnText(entityStmt, 1) ?? "" - var orderedIds = [entityId] var seen = Set(orderedIds) func append(_ id: String) { @@ -3989,31 +4003,6 @@ final class BrainDatabase: @unchecked Sendable { orderedIds.append(id) } - let canonicalSQL = """ - SELECT DISTINCT a.entity_id - FROM kg_entity_aliases a - JOIN kg_entities canonical ON canonical.id = a.entity_id - WHERE lower(a.alias) = lower(?) - AND canonical.entity_type = ? - ORDER BY a.entity_id - """ - var canonicalStmt: OpaquePointer? - guard sqlite3_prepare_v2(db, canonicalSQL, -1, &canonicalStmt, nil) == SQLITE_OK else { - throw DBError.prepare(sqlite3_errcode(db)) - } - defer { sqlite3_finalize(canonicalStmt) } - bindText(selectedName, to: canonicalStmt, index: 1) - bindText(selectedType, to: canonicalStmt, index: 2) - var canonicalMatches: [String] = [] - while sqlite3_step(canonicalStmt) == SQLITE_ROW { - if let id = columnText(canonicalStmt, 0), !id.isEmpty { - canonicalMatches.append(id) - } - } - if canonicalMatches.count == 1 { - append(canonicalMatches[0]) - } - let aliasEntitySQL = """ SELECT DISTINCT aliasEntity.id FROM kg_entity_aliases a @@ -4040,6 +4029,85 @@ final class BrainDatabase: @unchecked Sendable { Array(repeating: "?", count: max(1, count)).joined(separator: ", ") } + private func fetchLinkedChunkCounts(for entities: [KGEntityAliasSeed]) throws -> [String: Int] { + guard let db else { throw DBError.notOpen } + guard !entities.isEmpty else { return [:] } + + let visibleIds = entities.map(\.id) + var groupMembers = Dictionary(uniqueKeysWithValues: entities.map { ($0.id, [$0.id]) }) + let canonicalIds = visibleIds + let aliasMembers = try aliasEntityIDsByCanonicalID(canonicalIds: canonicalIds, db: db) + for entity in entities { + if let aliases = aliasMembers[entity.id] { + groupMembers[entity.id, default: [entity.id]].append(contentsOf: aliases) + } + } + + for (groupId, members) in groupMembers { + var seen: Set = [] + groupMembers[groupId] = members.filter { seen.insert($0).inserted } + } + return try fetchDistinctChunkCounts(groupMembers: groupMembers, db: db) + } + + private func aliasEntityIDsByCanonicalID(canonicalIds: [String], db: OpaquePointer) throws -> [String: [String]] { + guard !canonicalIds.isEmpty else { return [:] } + let sql = """ + SELECT a.entity_id, aliasEntity.id + FROM kg_entity_aliases a + JOIN kg_entities canonical ON canonical.id = a.entity_id + JOIN kg_entities aliasEntity ON lower(aliasEntity.name) = lower(a.alias) + WHERE a.entity_id IN (\(placeholders(count: canonicalIds.count))) + AND aliasEntity.entity_type = canonical.entity_type + ORDER BY a.entity_id, aliasEntity.id + """ + var stmt: OpaquePointer? + guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { + throw DBError.prepare(sqlite3_errcode(db)) + } + defer { sqlite3_finalize(stmt) } + _ = bindEntityIDs(canonicalIds, to: stmt, startingAt: 1) + var aliases: [String: [String]] = [:] + while sqlite3_step(stmt) == SQLITE_ROW { + let canonicalId = columnText(stmt, 0) ?? "" + let aliasId = columnText(stmt, 1) ?? "" + guard !canonicalId.isEmpty, !aliasId.isEmpty else { continue } + aliases[canonicalId, default: []].append(aliasId) + } + return aliases + } + + private func fetchDistinctChunkCounts(groupMembers: [String: [String]], db: OpaquePointer) throws -> [String: Int] { + let pairs = groupMembers.flatMap { groupId, entityIds in entityIds.map { (groupId, $0) } } + guard !pairs.isEmpty else { return [:] } + let values = Array(repeating: "(?, ?)", count: pairs.count).joined(separator: ", ") + let sql = """ + WITH group_members(group_id, entity_id) AS (VALUES \(values)) + SELECT gm.group_id, COUNT(DISTINCT ec.chunk_id) + FROM group_members gm + JOIN kg_entity_chunks ec ON ec.entity_id = gm.entity_id + JOIN chunks c ON c.id = ec.chunk_id + GROUP BY gm.group_id + """ + var stmt: OpaquePointer? + guard sqlite3_prepare_v2(db, sql, -1, &stmt, nil) == SQLITE_OK else { + throw DBError.prepare(sqlite3_errcode(db)) + } + defer { sqlite3_finalize(stmt) } + var bindIndex: Int32 = 1 + for (groupId, entityId) in pairs { + bindText(groupId, to: stmt, index: bindIndex) + bindIndex += 1 + bindText(entityId, to: stmt, index: bindIndex) + bindIndex += 1 + } + var counts: [String: Int] = [:] + while sqlite3_step(stmt) == SQLITE_ROW { + counts[columnText(stmt, 0) ?? ""] = Int(sqlite3_column_int(stmt, 1)) + } + return counts + } + @discardableResult private func bindEntityIDs(_ entityIds: [String], to stmt: OpaquePointer?, startingAt index: Int32) -> Int32 { var bindIndex = index diff --git a/brain-bar/Tests/BrainBarTests/KnowledgeGraphTests.swift b/brain-bar/Tests/BrainBarTests/KnowledgeGraphTests.swift index 3f9ab918..263e354f 100644 --- a/brain-bar/Tests/BrainBarTests/KnowledgeGraphTests.swift +++ b/brain-bar/Tests/BrainBarTests/KnowledgeGraphTests.swift @@ -461,6 +461,33 @@ final class KGDatabaseTests: XCTestCase { XCTAssertEqual(page.rows.map(\.chunkID), ["alias-only"]) } + func testCanonicalEntityDoesNotMergeIntoAnotherEntityUniqueAlias() throws { + try db.insertEntity(id: "person-alex", type: "person", name: "Alex") + try db.insertEntity(id: "person-alexander", type: "person", name: "Alexander") + try insertAlias(alias: "Alex", entityId: "person-alexander") + try insertLinkedChunk( + id: "alex-canonical", + entityId: "person-alex", + content: "Real Alex canonical chunk", + sourceFile: "/tmp/alex.md", + createdAt: "2026-05-24T12:00:00Z", + relevance: 0.9 + ) + try insertLinkedChunk( + id: "alexander-canonical", + entityId: "person-alexander", + content: "Alexander canonical chunk", + sourceFile: "/tmp/alexander.md", + createdAt: "2026-05-24T12:01:00Z", + relevance: 0.8 + ) + + let page = try db.fetchEntityChunksPage(entityId: "person-alex", after: nil, limit: 10) + + XCTAssertEqual(try db.fetchEntityChunkCount(entityId: "person-alex"), 1) + XCTAssertEqual(page.rows.map(\.chunkID), ["alex-canonical"]) + } + func testFetchEntitySourceFileCountReturnsDistinctFiles() throws { try db.insertEntity(id: "e1", type: "person", name: "Alice") for i in 0..<5 {